From 6d16a55d66783ed7db5182398c031944f089ec3c Mon Sep 17 00:00:00 2001 From: Rok Roskar <rokroskar@gmail.com> Date: Tue, 7 Apr 2020 12:17:59 +0200 Subject: [PATCH] renku run --input notebooks/process/wikidata-pop-data.ipynb papermill -p out_folder ./data/atlas/wikidata --inject-paths notebooks/process/wikidata-pop-data.ipynb runs/wikidata-pop-data.run.ipynb --- ...3644720e4b8abdfcefe86a6bdf03_papermill.cwl | 70 +++++++ data/atlas/wikidata/che-population.csv | 4 +- data/atlas/wikidata/ita-population.csv | 4 +- data/atlas/wikidata/usa-population.csv | 4 +- runs/wikidata-pop-data.run.ipynb | 173 ++++++++++-------- 5 files changed, 175 insertions(+), 80 deletions(-) create mode 100644 .renku/workflow/a01a3644720e4b8abdfcefe86a6bdf03_papermill.cwl diff --git a/.renku/workflow/a01a3644720e4b8abdfcefe86a6bdf03_papermill.cwl b/.renku/workflow/a01a3644720e4b8abdfcefe86a6bdf03_papermill.cwl new file mode 100644 index 000000000..1494f7705 --- /dev/null +++ b/.renku/workflow/a01a3644720e4b8abdfcefe86a6bdf03_papermill.cwl @@ -0,0 +1,70 @@ +arguments: [] +baseCommand: +- papermill +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: out_folder + inputBinding: + position: 1 + prefix: -p + separate: true + shellQuote: true + streamable: false + type: string + input_2: + default: data/atlas/wikidata + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: string + input_3: + default: + class: File + path: ../../notebooks/process/wikidata-pop-data.ipynb + inputBinding: + position: 3 + prefix: --inject-paths + separate: true + shellQuote: true + streamable: false + type: File + input_4: + default: runs/wikidata-pop-data.run.ipynb + inputBinding: + position: 4 + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_4) + streamable: false + type: File + output_1: + outputBinding: + glob: $(inputs.input_2) + streamable: false + type: Directory +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: runs + writable: true + - entry: '$({"listing": [], "class": "Directory"})' + entryname: data/atlas/wikidata + writable: true + - entry: $(inputs.input_3) + entryname: notebooks/process/wikidata-pop-data.ipynb + writable: false +successCodes: [] +temporaryFailCodes: [] diff --git a/data/atlas/wikidata/che-population.csv b/data/atlas/wikidata/che-population.csv index bf74a2ff0..977dad422 100644 --- a/data/atlas/wikidata/che-population.csv +++ b/data/atlas/wikidata/che-population.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b2ef111e3d106d90faa83ec77894ec886222dc565f535a6aeb625a8efb6152b7 -size 823 +oid sha256:f62ac1fb07d53c40dedf456a88ff24b3f9c2ad26a25af9d705acef62eed4d3cc +size 1146 diff --git a/data/atlas/wikidata/ita-population.csv b/data/atlas/wikidata/ita-population.csv index db6ea7c38..a8e726165 100644 --- a/data/atlas/wikidata/ita-population.csv +++ b/data/atlas/wikidata/ita-population.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e40c8f362efc8a1619ad19b2b9761730d8daa85eaf8b38b75ad904257f68a79 -size 631 +oid sha256:213bf76c1e7b8e99b959b92028585c154464dcf7ebf64f002d40234c5ce1b7df +size 766 diff --git a/data/atlas/wikidata/usa-population.csv b/data/atlas/wikidata/usa-population.csv index b9633408d..1f9b0b885 100644 --- a/data/atlas/wikidata/usa-population.csv +++ b/data/atlas/wikidata/usa-population.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d846fe3e0df640a2d38df9631532a59fbbad753dad58bfe5f806d4b372dfeb27 -size 1350 +oid sha256:510c9fe14784576666092ac3f45b433c8ebe56ad83dcd46dc98eb2f9d0e3c4a6 +size 2615 diff --git a/runs/wikidata-pop-data.run.ipynb b/runs/wikidata-pop-data.run.ipynb index b4e317e4b..f7bbb999a 100644 --- a/runs/wikidata-pop-data.run.ipynb +++ b/runs/wikidata-pop-data.run.ipynb @@ -4,10 +4,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.012354, - "end_time": "2020-04-03T21:33:19.377218", + "duration": 0.012286, + "end_time": "2020-04-07T10:17:56.542399", "exception": false, - "start_time": "2020-04-03T21:33:19.364864", + "start_time": "2020-04-07T10:17:56.530113", "status": "completed" }, "tags": [] @@ -21,10 +21,10 @@ "execution_count": 1, "metadata": { "papermill": { - "duration": 3.985074, - "end_time": "2020-04-03T21:33:23.369368", + "duration": 0.679825, + "end_time": "2020-04-07T10:17:57.228665", "exception": false, - "start_time": "2020-04-03T21:33:19.384294", + "start_time": "2020-04-07T10:17:56.548840", "status": "completed" }, "tags": [] @@ -42,10 +42,10 @@ "execution_count": 2, "metadata": { "papermill": { - "duration": 0.015991, - "end_time": "2020-04-03T21:33:23.392938", + "duration": 0.012038, + "end_time": "2020-04-07T10:17:57.246729", "exception": false, - "start_time": "2020-04-03T21:33:23.376947", + "start_time": "2020-04-07T10:17:57.234691", "status": "completed" }, "tags": [ @@ -63,10 +63,10 @@ "execution_count": 3, "metadata": { "papermill": { - "duration": 0.015632, - "end_time": "2020-04-03T21:33:23.416629", + "duration": 0.012651, + "end_time": "2020-04-07T10:17:57.264967", "exception": false, - "start_time": "2020-04-03T21:33:23.400997", + "start_time": "2020-04-07T10:17:57.252316", "status": "completed" }, "tags": [ @@ -86,10 +86,10 @@ "execution_count": 4, "metadata": { "papermill": { - "duration": 0.016971, - "end_time": "2020-04-03T21:33:23.440680", + "duration": 0.012847, + "end_time": "2020-04-07T10:17:57.283166", "exception": false, - "start_time": "2020-04-03T21:33:23.423709", + "start_time": "2020-04-07T10:17:57.270319", "status": "completed" }, "tags": [] @@ -97,10 +97,10 @@ "outputs": [], "source": [ "def write_population_data(iso_code, df):\n", - " out_path = os.path.join(out_folder, f\"{iso_code.lower()}-population.csv\")\n", - " print(f\"Writing {len(df)} rows to {out_path}\")\n", " if PAPERMILL_OUTPUT_PATH is None:\n", " return\n", + " out_path = os.path.join(out_folder, f\"{iso_code.lower()}-population.csv\")\n", + " print(f\"Writing {len(df)} rows to {out_path}\")\n", " df.to_csv(out_path)" ] }, @@ -108,10 +108,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.006952, - "end_time": "2020-04-03T21:33:23.455153", + "duration": 0.005165, + "end_time": "2020-04-07T10:17:57.293664", "exception": false, - "start_time": "2020-04-03T21:33:23.448201", + "start_time": "2020-04-07T10:17:57.288499", "status": "completed" }, "tags": [] @@ -125,10 +125,10 @@ "execution_count": 5, "metadata": { "papermill": { - "duration": 1.075272, - "end_time": "2020-04-03T21:33:24.536917", + "duration": 0.34016, + "end_time": "2020-04-07T10:17:57.639183", "exception": false, - "start_time": "2020-04-03T21:33:23.461645", + "start_time": "2020-04-07T10:17:57.299023", "status": "completed" }, "tags": [] @@ -163,7 +163,8 @@ " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>region_iso</th>\n", - " <th>regionLabel</th>\n", + " <th>region_label</th>\n", + " <th>country_label</th>\n", " <th>istatid</th>\n", " <th>population</th>\n", " </tr>\n", @@ -171,26 +172,28 @@ " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>IT-52</td>\n", - " <td>Tuscany</td>\n", - " <td>09</td>\n", - " <td>3729641</td>\n", + " <td>IT-34</td>\n", + " <td>Veneto</td>\n", + " <td>Italy</td>\n", + " <td>05</td>\n", + " <td>4926818</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>IT-55</td>\n", - " <td>Umbria</td>\n", - " <td>10</td>\n", - " <td>882015</td>\n", + " <td>IT-25</td>\n", + " <td>Lombardy</td>\n", + " <td>Italy</td>\n", + " <td>03</td>\n", + " <td>10067494</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ - " region_iso regionLabel istatid population\n", - "0 IT-52 Tuscany 09 3729641\n", - "1 IT-55 Umbria 10 882015" + " region_iso region_label country_label istatid population\n", + "0 IT-34 Veneto Italy 05 4926818\n", + "1 IT-25 Lombardy Italy 03 10067494" ] }, "execution_count": 5, @@ -214,10 +217,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.008965, - "end_time": "2020-04-03T21:33:24.555296", + "duration": 0.005589, + "end_time": "2020-04-07T10:17:57.651022", "exception": false, - "start_time": "2020-04-03T21:33:24.546331", + "start_time": "2020-04-07T10:17:57.645433", "status": "completed" }, "tags": [] @@ -231,10 +234,10 @@ "execution_count": 6, "metadata": { "papermill": { - "duration": 0.566085, - "end_time": "2020-04-03T21:33:25.129545", + "duration": 0.615308, + "end_time": "2020-04-07T10:17:58.272517", "exception": false, - "start_time": "2020-04-03T21:33:24.563460", + "start_time": "2020-04-07T10:17:57.657209", "status": "completed" }, "tags": [] @@ -269,31 +272,34 @@ " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>region_iso</th>\n", - " <th>regionLabel</th>\n", + " <th>region_label</th>\n", + " <th>country_label</th>\n", " <th>population</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>CH-SZ</td>\n", - " <td>Canton of Schwyz</td>\n", - " <td>159165</td>\n", + " <td>CH-GE</td>\n", + " <td>Canton of Geneva</td>\n", + " <td>Switzerland</td>\n", + " <td>499480</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>CH-TG</td>\n", - " <td>Thurgau</td>\n", - " <td>276472</td>\n", + " <td>CH-JU</td>\n", + " <td>Canton of Jura</td>\n", + " <td>Switzerland</td>\n", + " <td>73419</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ - " region_iso regionLabel population\n", - "0 CH-SZ Canton of Schwyz 159165\n", - "1 CH-TG Thurgau 276472" + " region_iso region_label country_label population\n", + "0 CH-GE Canton of Geneva Switzerland 499480\n", + "1 CH-JU Canton of Jura Switzerland 73419" ] }, "execution_count": 6, @@ -313,10 +319,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.009881, - "end_time": "2020-04-03T21:33:25.149605", + "duration": 0.006945, + "end_time": "2020-04-07T10:17:58.286838", "exception": false, - "start_time": "2020-04-03T21:33:25.139724", + "start_time": "2020-04-07T10:17:58.279893", "status": "completed" }, "tags": [] @@ -330,10 +336,10 @@ "execution_count": 7, "metadata": { "papermill": { - "duration": 0.952179, - "end_time": "2020-04-03T21:33:26.110326", + "duration": 0.498088, + "end_time": "2020-04-07T10:17:58.792249", "exception": false, - "start_time": "2020-04-03T21:33:25.158147", + "start_time": "2020-04-07T10:17:58.294161", "status": "completed" }, "tags": [] @@ -368,31 +374,34 @@ " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>region_iso</th>\n", - " <th>regionLabel</th>\n", + " <th>region_label</th>\n", + " <th>country_label</th>\n", " <th>population</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>US-NH</td>\n", - " <td>New Hampshire</td>\n", - " <td>1330608</td>\n", + " <td>US-DE</td>\n", + " <td>Delaware</td>\n", + " <td>United States of America</td>\n", + " <td>945934</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>US-GA</td>\n", - " <td>Georgia</td>\n", - " <td>10214860</td>\n", + " <td>US-OR</td>\n", + " <td>Oregon</td>\n", + " <td>United States of America</td>\n", + " <td>4028977</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ - " region_iso regionLabel population\n", - "0 US-NH New Hampshire 1330608\n", - "1 US-GA Georgia 10214860" + " region_iso region_label country_label population\n", + "0 US-DE Delaware United States of America 945934\n", + "1 US-OR Oregon United States of America 4028977" ] }, "execution_count": 7, @@ -407,13 +416,29 @@ "write_population_data(iso_code, df)\n", "df.head(2)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "papermill": { + "duration": 0.0073, + "end_time": "2020-04-07T10:17:58.806896", + "exception": false, + "start_time": "2020-04-07T10:17:58.799596", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3.7.7 64-bit ('.venv': venv)", "language": "python", - "name": "python3" + "name": "python37764bitvenvvenv814492364d964019a25eb1cf3dc3e99c" }, "language_info": { "codemirror_mode": { @@ -425,11 +450,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.7.7" }, "papermill": { - "duration": 8.557795, - "end_time": "2020-04-03T21:33:26.443798", + "duration": 3.228564, + "end_time": "2020-04-07T10:17:58.820057", "environment_variables": {}, "exception": null, "input_path": "notebooks/process/wikidata-pop-data.ipynb", @@ -439,8 +464,8 @@ "PAPERMILL_OUTPUT_PATH": "runs/wikidata-pop-data.run.ipynb", "out_folder": "./data/atlas/wikidata" }, - "start_time": "2020-04-03T21:33:17.886003", - "version": "1.1.0" + "start_time": "2020-04-07T10:17:55.591493", + "version": "2.0.0" } }, "nbformat": 4, -- GitLab