From 6d16a55d66783ed7db5182398c031944f089ec3c Mon Sep 17 00:00:00 2001
From: Rok Roskar <rokroskar@gmail.com>
Date: Tue, 7 Apr 2020 12:17:59 +0200
Subject: [PATCH] renku run --input notebooks/process/wikidata-pop-data.ipynb
 papermill -p out_folder ./data/atlas/wikidata --inject-paths
 notebooks/process/wikidata-pop-data.ipynb runs/wikidata-pop-data.run.ipynb

---
 ...3644720e4b8abdfcefe86a6bdf03_papermill.cwl |  70 +++++++
 data/atlas/wikidata/che-population.csv        |   4 +-
 data/atlas/wikidata/ita-population.csv        |   4 +-
 data/atlas/wikidata/usa-population.csv        |   4 +-
 runs/wikidata-pop-data.run.ipynb              | 173 ++++++++++--------
 5 files changed, 175 insertions(+), 80 deletions(-)
 create mode 100644 .renku/workflow/a01a3644720e4b8abdfcefe86a6bdf03_papermill.cwl

diff --git a/.renku/workflow/a01a3644720e4b8abdfcefe86a6bdf03_papermill.cwl b/.renku/workflow/a01a3644720e4b8abdfcefe86a6bdf03_papermill.cwl
new file mode 100644
index 000000000..1494f7705
--- /dev/null
+++ b/.renku/workflow/a01a3644720e4b8abdfcefe86a6bdf03_papermill.cwl
@@ -0,0 +1,70 @@
+arguments: []
+baseCommand:
+- papermill
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default: out_folder
+    inputBinding:
+      position: 1
+      prefix: -p
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+  input_2:
+    default: data/atlas/wikidata
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+  input_3:
+    default:
+      class: File
+      path: ../../notebooks/process/wikidata-pop-data.ipynb
+    inputBinding:
+      position: 3
+      prefix: --inject-paths
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default: runs/wikidata-pop-data.run.ipynb
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_4)
+    streamable: false
+    type: File
+  output_1:
+    outputBinding:
+      glob: $(inputs.input_2)
+    streamable: false
+    type: Directory
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: runs
+    writable: true
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/atlas/wikidata
+    writable: true
+  - entry: $(inputs.input_3)
+    entryname: notebooks/process/wikidata-pop-data.ipynb
+    writable: false
+successCodes: []
+temporaryFailCodes: []
diff --git a/data/atlas/wikidata/che-population.csv b/data/atlas/wikidata/che-population.csv
index bf74a2ff0..977dad422 100644
--- a/data/atlas/wikidata/che-population.csv
+++ b/data/atlas/wikidata/che-population.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2ef111e3d106d90faa83ec77894ec886222dc565f535a6aeb625a8efb6152b7
-size 823
+oid sha256:f62ac1fb07d53c40dedf456a88ff24b3f9c2ad26a25af9d705acef62eed4d3cc
+size 1146
diff --git a/data/atlas/wikidata/ita-population.csv b/data/atlas/wikidata/ita-population.csv
index db6ea7c38..a8e726165 100644
--- a/data/atlas/wikidata/ita-population.csv
+++ b/data/atlas/wikidata/ita-population.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e40c8f362efc8a1619ad19b2b9761730d8daa85eaf8b38b75ad904257f68a79
-size 631
+oid sha256:213bf76c1e7b8e99b959b92028585c154464dcf7ebf64f002d40234c5ce1b7df
+size 766
diff --git a/data/atlas/wikidata/usa-population.csv b/data/atlas/wikidata/usa-population.csv
index b9633408d..1f9b0b885 100644
--- a/data/atlas/wikidata/usa-population.csv
+++ b/data/atlas/wikidata/usa-population.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d846fe3e0df640a2d38df9631532a59fbbad753dad58bfe5f806d4b372dfeb27
-size 1350
+oid sha256:510c9fe14784576666092ac3f45b433c8ebe56ad83dcd46dc98eb2f9d0e3c4a6
+size 2615
diff --git a/runs/wikidata-pop-data.run.ipynb b/runs/wikidata-pop-data.run.ipynb
index b4e317e4b..f7bbb999a 100644
--- a/runs/wikidata-pop-data.run.ipynb
+++ b/runs/wikidata-pop-data.run.ipynb
@@ -4,10 +4,10 @@
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.012354,
-     "end_time": "2020-04-03T21:33:19.377218",
+     "duration": 0.012286,
+     "end_time": "2020-04-07T10:17:56.542399",
      "exception": false,
-     "start_time": "2020-04-03T21:33:19.364864",
+     "start_time": "2020-04-07T10:17:56.530113",
      "status": "completed"
     },
     "tags": []
@@ -21,10 +21,10 @@
    "execution_count": 1,
    "metadata": {
     "papermill": {
-     "duration": 3.985074,
-     "end_time": "2020-04-03T21:33:23.369368",
+     "duration": 0.679825,
+     "end_time": "2020-04-07T10:17:57.228665",
      "exception": false,
-     "start_time": "2020-04-03T21:33:19.384294",
+     "start_time": "2020-04-07T10:17:56.548840",
      "status": "completed"
     },
     "tags": []
@@ -42,10 +42,10 @@
    "execution_count": 2,
    "metadata": {
     "papermill": {
-     "duration": 0.015991,
-     "end_time": "2020-04-03T21:33:23.392938",
+     "duration": 0.012038,
+     "end_time": "2020-04-07T10:17:57.246729",
      "exception": false,
-     "start_time": "2020-04-03T21:33:23.376947",
+     "start_time": "2020-04-07T10:17:57.234691",
      "status": "completed"
     },
     "tags": [
@@ -63,10 +63,10 @@
    "execution_count": 3,
    "metadata": {
     "papermill": {
-     "duration": 0.015632,
-     "end_time": "2020-04-03T21:33:23.416629",
+     "duration": 0.012651,
+     "end_time": "2020-04-07T10:17:57.264967",
      "exception": false,
-     "start_time": "2020-04-03T21:33:23.400997",
+     "start_time": "2020-04-07T10:17:57.252316",
      "status": "completed"
     },
     "tags": [
@@ -86,10 +86,10 @@
    "execution_count": 4,
    "metadata": {
     "papermill": {
-     "duration": 0.016971,
-     "end_time": "2020-04-03T21:33:23.440680",
+     "duration": 0.012847,
+     "end_time": "2020-04-07T10:17:57.283166",
      "exception": false,
-     "start_time": "2020-04-03T21:33:23.423709",
+     "start_time": "2020-04-07T10:17:57.270319",
      "status": "completed"
     },
     "tags": []
@@ -97,10 +97,10 @@
    "outputs": [],
    "source": [
     "def write_population_data(iso_code, df):\n",
-    "    out_path = os.path.join(out_folder, f\"{iso_code.lower()}-population.csv\")\n",
-    "    print(f\"Writing {len(df)} rows to {out_path}\")\n",
     "    if PAPERMILL_OUTPUT_PATH is None:\n",
     "        return\n",
+    "    out_path = os.path.join(out_folder, f\"{iso_code.lower()}-population.csv\")\n",
+    "    print(f\"Writing {len(df)} rows to {out_path}\")\n",
     "    df.to_csv(out_path)"
    ]
   },
@@ -108,10 +108,10 @@
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.006952,
-     "end_time": "2020-04-03T21:33:23.455153",
+     "duration": 0.005165,
+     "end_time": "2020-04-07T10:17:57.293664",
      "exception": false,
-     "start_time": "2020-04-03T21:33:23.448201",
+     "start_time": "2020-04-07T10:17:57.288499",
      "status": "completed"
     },
     "tags": []
@@ -125,10 +125,10 @@
    "execution_count": 5,
    "metadata": {
     "papermill": {
-     "duration": 1.075272,
-     "end_time": "2020-04-03T21:33:24.536917",
+     "duration": 0.34016,
+     "end_time": "2020-04-07T10:17:57.639183",
      "exception": false,
-     "start_time": "2020-04-03T21:33:23.461645",
+     "start_time": "2020-04-07T10:17:57.299023",
      "status": "completed"
     },
     "tags": []
@@ -163,7 +163,8 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>region_iso</th>\n",
-       "      <th>regionLabel</th>\n",
+       "      <th>region_label</th>\n",
+       "      <th>country_label</th>\n",
        "      <th>istatid</th>\n",
        "      <th>population</th>\n",
        "    </tr>\n",
@@ -171,26 +172,28 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>IT-52</td>\n",
-       "      <td>Tuscany</td>\n",
-       "      <td>09</td>\n",
-       "      <td>3729641</td>\n",
+       "      <td>IT-34</td>\n",
+       "      <td>Veneto</td>\n",
+       "      <td>Italy</td>\n",
+       "      <td>05</td>\n",
+       "      <td>4926818</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>IT-55</td>\n",
-       "      <td>Umbria</td>\n",
-       "      <td>10</td>\n",
-       "      <td>882015</td>\n",
+       "      <td>IT-25</td>\n",
+       "      <td>Lombardy</td>\n",
+       "      <td>Italy</td>\n",
+       "      <td>03</td>\n",
+       "      <td>10067494</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "  region_iso regionLabel istatid population\n",
-       "0      IT-52     Tuscany      09    3729641\n",
-       "1      IT-55      Umbria      10     882015"
+       "  region_iso region_label country_label istatid population\n",
+       "0      IT-34       Veneto         Italy      05    4926818\n",
+       "1      IT-25     Lombardy         Italy      03   10067494"
       ]
      },
      "execution_count": 5,
@@ -214,10 +217,10 @@
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.008965,
-     "end_time": "2020-04-03T21:33:24.555296",
+     "duration": 0.005589,
+     "end_time": "2020-04-07T10:17:57.651022",
      "exception": false,
-     "start_time": "2020-04-03T21:33:24.546331",
+     "start_time": "2020-04-07T10:17:57.645433",
      "status": "completed"
     },
     "tags": []
@@ -231,10 +234,10 @@
    "execution_count": 6,
    "metadata": {
     "papermill": {
-     "duration": 0.566085,
-     "end_time": "2020-04-03T21:33:25.129545",
+     "duration": 0.615308,
+     "end_time": "2020-04-07T10:17:58.272517",
      "exception": false,
-     "start_time": "2020-04-03T21:33:24.563460",
+     "start_time": "2020-04-07T10:17:57.657209",
      "status": "completed"
     },
     "tags": []
@@ -269,31 +272,34 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>region_iso</th>\n",
-       "      <th>regionLabel</th>\n",
+       "      <th>region_label</th>\n",
+       "      <th>country_label</th>\n",
        "      <th>population</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>CH-SZ</td>\n",
-       "      <td>Canton of Schwyz</td>\n",
-       "      <td>159165</td>\n",
+       "      <td>CH-GE</td>\n",
+       "      <td>Canton of Geneva</td>\n",
+       "      <td>Switzerland</td>\n",
+       "      <td>499480</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>CH-TG</td>\n",
-       "      <td>Thurgau</td>\n",
-       "      <td>276472</td>\n",
+       "      <td>CH-JU</td>\n",
+       "      <td>Canton of Jura</td>\n",
+       "      <td>Switzerland</td>\n",
+       "      <td>73419</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "  region_iso       regionLabel population\n",
-       "0      CH-SZ  Canton of Schwyz     159165\n",
-       "1      CH-TG           Thurgau     276472"
+       "  region_iso      region_label country_label population\n",
+       "0      CH-GE  Canton of Geneva   Switzerland     499480\n",
+       "1      CH-JU    Canton of Jura   Switzerland      73419"
       ]
      },
      "execution_count": 6,
@@ -313,10 +319,10 @@
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.009881,
-     "end_time": "2020-04-03T21:33:25.149605",
+     "duration": 0.006945,
+     "end_time": "2020-04-07T10:17:58.286838",
      "exception": false,
-     "start_time": "2020-04-03T21:33:25.139724",
+     "start_time": "2020-04-07T10:17:58.279893",
      "status": "completed"
     },
     "tags": []
@@ -330,10 +336,10 @@
    "execution_count": 7,
    "metadata": {
     "papermill": {
-     "duration": 0.952179,
-     "end_time": "2020-04-03T21:33:26.110326",
+     "duration": 0.498088,
+     "end_time": "2020-04-07T10:17:58.792249",
      "exception": false,
-     "start_time": "2020-04-03T21:33:25.158147",
+     "start_time": "2020-04-07T10:17:58.294161",
      "status": "completed"
     },
     "tags": []
@@ -368,31 +374,34 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>region_iso</th>\n",
-       "      <th>regionLabel</th>\n",
+       "      <th>region_label</th>\n",
+       "      <th>country_label</th>\n",
        "      <th>population</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>US-NH</td>\n",
-       "      <td>New Hampshire</td>\n",
-       "      <td>1330608</td>\n",
+       "      <td>US-DE</td>\n",
+       "      <td>Delaware</td>\n",
+       "      <td>United States of America</td>\n",
+       "      <td>945934</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>US-GA</td>\n",
-       "      <td>Georgia</td>\n",
-       "      <td>10214860</td>\n",
+       "      <td>US-OR</td>\n",
+       "      <td>Oregon</td>\n",
+       "      <td>United States of America</td>\n",
+       "      <td>4028977</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "  region_iso    regionLabel population\n",
-       "0      US-NH  New Hampshire    1330608\n",
-       "1      US-GA        Georgia   10214860"
+       "  region_iso region_label             country_label population\n",
+       "0      US-DE     Delaware  United States of America     945934\n",
+       "1      US-OR       Oregon  United States of America    4028977"
       ]
      },
      "execution_count": 7,
@@ -407,13 +416,29 @@
     "write_population_data(iso_code, df)\n",
     "df.head(2)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "papermill": {
+     "duration": 0.0073,
+     "end_time": "2020-04-07T10:17:58.806896",
+     "exception": false,
+     "start_time": "2020-04-07T10:17:58.799596",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3.7.7 64-bit ('.venv': venv)",
    "language": "python",
-   "name": "python3"
+   "name": "python37764bitvenvvenv814492364d964019a25eb1cf3dc3e99c"
   },
   "language_info": {
    "codemirror_mode": {
@@ -425,11 +450,11 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.7.7"
   },
   "papermill": {
-   "duration": 8.557795,
-   "end_time": "2020-04-03T21:33:26.443798",
+   "duration": 3.228564,
+   "end_time": "2020-04-07T10:17:58.820057",
    "environment_variables": {},
    "exception": null,
    "input_path": "notebooks/process/wikidata-pop-data.ipynb",
@@ -439,8 +464,8 @@
     "PAPERMILL_OUTPUT_PATH": "runs/wikidata-pop-data.run.ipynb",
     "out_folder": "./data/atlas/wikidata"
    },
-   "start_time": "2020-04-03T21:33:17.886003",
-   "version": "1.1.0"
+   "start_time": "2020-04-07T10:17:55.591493",
+   "version": "2.0.0"
   }
  },
  "nbformat": 4,
-- 
GitLab