From ff93d6fe094da5e00e4abc74a129dbac0efba190 Mon Sep 17 00:00:00 2001 From: Chandrasekhar Ramakrishnan <cramakri@ethz.ch> Date: Wed, 25 Mar 2020 23:06:30 +0000 Subject: [PATCH] renku run papermill -p ts_folder ./data/covid-19_jhu-csse/ -p worldmap_path ./data/worldmap/country_centroids.csv -p out_folder ./data/geodata/ --inject-paths notebooks/process/CompileGeoData.ipynb runs/CompileGeoData.run.ipynb --- ...a8bde4314c539e6fde022b289b63_papermill.cwl | 113 ++++++++++ runs/CompileGeoData.run.ipynb | 202 +++++++++--------- 2 files changed, 217 insertions(+), 98 deletions(-) create mode 100644 .renku/workflow/9f96a8bde4314c539e6fde022b289b63_papermill.cwl diff --git a/.renku/workflow/9f96a8bde4314c539e6fde022b289b63_papermill.cwl b/.renku/workflow/9f96a8bde4314c539e6fde022b289b63_papermill.cwl new file mode 100644 index 00000000..c44a8022 --- /dev/null +++ b/.renku/workflow/9f96a8bde4314c539e6fde022b289b63_papermill.cwl @@ -0,0 +1,113 @@ +arguments: [] +baseCommand: +- papermill +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: ts_folder + inputBinding: + position: 1 + prefix: -p + separate: true + shellQuote: true + streamable: false + type: string + input_2: + default: + class: Directory + listing: [] + path: ../../data/covid-19_jhu-csse + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: Directory + input_3: + default: worldmap_path + inputBinding: + position: 3 + prefix: -p + separate: true + shellQuote: true + streamable: false + type: string + input_4: + default: + class: File + path: ../../data/worldmap/country_centroids.csv + inputBinding: + position: 4 + separate: true + shellQuote: true + streamable: false + type: File + input_5: + default: out_folder + inputBinding: + position: 5 + prefix: -p + separate: true + shellQuote: true + streamable: false + type: string + input_6: + default: + class: Directory + listing: [] + path: ../../data/geodata + inputBinding: + position: 6 + separate: true + shellQuote: true + streamable: false + type: Directory + input_7: + default: + class: File + path: ../../notebooks/process/CompileGeoData.ipynb + inputBinding: + position: 7 + prefix: --inject-paths + separate: true + shellQuote: true + streamable: false + type: File + input_8: + default: runs/CompileGeoData.run.ipynb + inputBinding: + position: 8 + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_8) + streamable: false + type: File +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: runs + writable: true + - entry: $(inputs.input_2) + entryname: data/covid-19_jhu-csse + writable: false + - entry: $(inputs.input_4) + entryname: data/worldmap/country_centroids.csv + writable: false + - entry: $(inputs.input_6) + entryname: data/geodata + writable: false + - entry: $(inputs.input_7) + entryname: notebooks/process/CompileGeoData.ipynb + writable: false +successCodes: [] +temporaryFailCodes: [] diff --git a/runs/CompileGeoData.run.ipynb b/runs/CompileGeoData.run.ipynb index df4d87d5..b634743e 100644 --- a/runs/CompileGeoData.run.ipynb +++ b/runs/CompileGeoData.run.ipynb @@ -4,10 +4,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.017678, - "end_time": "2020-03-20T21:46:14.724207", + "duration": 0.022012, + "end_time": "2020-03-25T23:06:29.412987", "exception": false, - "start_time": "2020-03-20T21:46:14.706529", + "start_time": "2020-03-25T23:06:29.390975", "status": "completed" }, "tags": [] @@ -23,10 +23,10 @@ "execution_count": 1, "metadata": { "papermill": { - "duration": 0.322985, - "end_time": "2020-03-20T21:46:15.058915", + "duration": 0.31006, + "end_time": "2020-03-25T23:06:29.733704", "exception": false, - "start_time": "2020-03-20T21:46:14.735930", + "start_time": "2020-03-25T23:06:29.423644", "status": "completed" }, "tags": [] @@ -42,18 +42,18 @@ "execution_count": 2, "metadata": { "papermill": { - "duration": 0.019294, - "end_time": "2020-03-20T21:46:15.093747", + "duration": 0.032622, + "end_time": "2020-03-25T23:06:29.791939", "exception": false, - "start_time": "2020-03-20T21:46:15.074453", + "start_time": "2020-03-25T23:06:29.759317", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ - "ts_folder = \"../data/covid-19_jhu-csse/\"\n", - "worldmap_path = \"../data/worldmap/country_centroids.csv\"\n", + "ts_folder = \"../../data/covid-19_jhu-csse/\"\n", + "worldmap_path = \"../../data/worldmap/country_centroids.csv\"\n", "out_folder = None\n", "PAPERMILL_OUTPUT_PATH = None" ] @@ -62,10 +62,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.009576, - "end_time": "2020-03-20T21:46:15.115538", + "duration": 0.010303, + "end_time": "2020-03-25T23:06:29.819422", "exception": false, - "start_time": "2020-03-20T21:46:15.105962", + "start_time": "2020-03-25T23:06:29.809119", "status": "completed" }, "tags": [ @@ -81,10 +81,10 @@ "execution_count": 3, "metadata": { "papermill": { - "duration": 0.020198, - "end_time": "2020-03-20T21:46:15.145269", + "duration": 0.02561, + "end_time": "2020-03-25T23:06:29.853366", "exception": false, - "start_time": "2020-03-20T21:46:15.125071", + "start_time": "2020-03-25T23:06:29.827756", "status": "completed" }, "tags": [ @@ -94,11 +94,11 @@ "outputs": [], "source": [ "# Parameters\n", - "PAPERMILL_INPUT_PATH = \"/tmp/n6nbb565/notebooks/process/CompileGeoData.ipynb\"\n", + "PAPERMILL_INPUT_PATH = \"notebooks/process/CompileGeoData.ipynb\"\n", "PAPERMILL_OUTPUT_PATH = \"runs/CompileGeoData.run.ipynb\"\n", - "ts_folder = \"/tmp/n6nbb565/data/covid-19_jhu-csse\"\n", - "worldmap_path = \"/tmp/n6nbb565/data/worldmap/country_centroids.csv\"\n", - "out_folder = \"/tmp/n6nbb565/data/geodata\"\n" + "ts_folder = \"./data/covid-19_jhu-csse/\"\n", + "worldmap_path = \"./data/worldmap/country_centroids.csv\"\n", + "out_folder = \"./data/geodata/\"\n" ] }, { @@ -106,10 +106,10 @@ "execution_count": 4, "metadata": { "papermill": { - "duration": 0.023663, - "end_time": "2020-03-20T21:46:15.180074", + "duration": 0.030257, + "end_time": "2020-03-25T23:06:29.896762", "exception": false, - "start_time": "2020-03-20T21:46:15.156411", + "start_time": "2020-03-25T23:06:29.866505", "status": "completed" }, "tags": [] @@ -117,7 +117,7 @@ "outputs": [], "source": [ "def read_jhu_covid_region_df(name):\n", - " filename = os.path.join(ts_folder, f\"time_series_19-covid-{name}.csv\")\n", + " filename = os.path.join(ts_folder, f\"time_series_covid19_{name}_global.csv\")\n", " df = pd.read_csv(filename)\n", " df = df.set_index(['Country/Region', 'Province/State', 'Lat', 'Long'])\n", " df.columns = pd.to_datetime(df.columns)\n", @@ -130,27 +130,27 @@ "execution_count": 5, "metadata": { "papermill": { - "duration": 0.054183, - "end_time": "2020-03-20T21:46:15.246608", + "duration": 0.043726, + "end_time": "2020-03-25T23:06:29.950112", "exception": false, - "start_time": "2020-03-20T21:46:15.192425", + "start_time": "2020-03-25T23:06:29.906386", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ - "confirmed_df = read_jhu_covid_region_df(\"Confirmed\")" + "confirmed_df = read_jhu_covid_region_df(\"confirmed\")" ] }, { "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.010179, - "end_time": "2020-03-20T21:46:15.270430", + "duration": 0.010675, + "end_time": "2020-03-25T23:06:29.969305", "exception": false, - "start_time": "2020-03-20T21:46:15.260251", + "start_time": "2020-03-25T23:06:29.958630", "status": "completed" }, "tags": [] @@ -164,10 +164,10 @@ "execution_count": 6, "metadata": { "papermill": { - "duration": 0.039445, - "end_time": "2020-03-20T21:46:15.320605", + "duration": 0.03387, + "end_time": "2020-03-25T23:06:30.011734", "exception": false, - "start_time": "2020-03-20T21:46:15.281160", + "start_time": "2020-03-25T23:06:29.977864", "status": "completed" }, "tags": [] @@ -184,10 +184,10 @@ "execution_count": 7, "metadata": { "papermill": { - "duration": 0.036049, - "end_time": "2020-03-20T21:46:15.370897", + "duration": 0.024666, + "end_time": "2020-03-25T23:06:30.044629", "exception": false, - "start_time": "2020-03-20T21:46:15.334848", + "start_time": "2020-03-25T23:06:30.019963", "status": "completed" }, "tags": [] @@ -214,10 +214,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.010081, - "end_time": "2020-03-20T21:46:15.394797", + "duration": 0.008554, + "end_time": "2020-03-25T23:06:30.061787", "exception": false, - "start_time": "2020-03-20T21:46:15.384716", + "start_time": "2020-03-25T23:06:30.053233", "status": "completed" }, "tags": [] @@ -231,10 +231,10 @@ "execution_count": 8, "metadata": { "papermill": { - "duration": 0.023787, - "end_time": "2020-03-20T21:46:15.428815", + "duration": 0.021791, + "end_time": "2020-03-25T23:06:30.092630", "exception": false, - "start_time": "2020-03-20T21:46:15.405028", + "start_time": "2020-03-25T23:06:30.070839", "status": "completed" }, "tags": [] @@ -260,10 +260,10 @@ "execution_count": 9, "metadata": { "papermill": { - "duration": 0.020553, - "end_time": "2020-03-20T21:46:15.463075", + "duration": 0.016177, + "end_time": "2020-03-25T23:06:30.118527", "exception": false, - "start_time": "2020-03-20T21:46:15.442522", + "start_time": "2020-03-25T23:06:30.102350", "status": "completed" }, "tags": [] @@ -278,10 +278,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.011159, - "end_time": "2020-03-20T21:46:15.486134", + "duration": 0.008023, + "end_time": "2020-03-25T23:06:30.135845", "exception": false, - "start_time": "2020-03-20T21:46:15.474975", + "start_time": "2020-03-25T23:06:30.127822", "status": "completed" }, "tags": [] @@ -295,10 +295,10 @@ "execution_count": 10, "metadata": { "papermill": { - "duration": 0.046021, - "end_time": "2020-03-20T21:46:15.542446", + "duration": 0.024846, + "end_time": "2020-03-25T23:06:30.169128", "exception": false, - "start_time": "2020-03-20T21:46:15.496425", + "start_time": "2020-03-25T23:06:30.144282", "status": "completed" }, "tags": [] @@ -325,8 +325,8 @@ " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", - " <th>2020-03-18</th>\n", - " <th>2020-03-19</th>\n", + " <th>2020-03-23</th>\n", + " <th>2020-03-24</th>\n", " </tr>\n", " <tr>\n", " <th>Country/Region</th>\n", @@ -336,66 +336,72 @@ " </thead>\n", " <tbody>\n", " <tr>\n", - " <th>Bahamas, The</th>\n", - " <td>1</td>\n", + " <th>Cabo Verde</th>\n", + " <td>3</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>Congo (Brazzaville)</th>\n", - " <td>1</td>\n", - " <td>3</td>\n", + " <td>4</td>\n", + " <td>4</td>\n", " </tr>\n", " <tr>\n", " <th>Congo (Kinshasa)</th>\n", - " <td>4</td>\n", - " <td>14</td>\n", + " <td>36</td>\n", + " <td>45</td>\n", " </tr>\n", " <tr>\n", - " <th>Cruise Ship</th>\n", + " <th>Diamond Princess</th>\n", " <td>712</td>\n", " <td>712</td>\n", " </tr>\n", " <tr>\n", " <th>Eswatini</th>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <td>4</td>\n", + " <td>4</td>\n", " </tr>\n", " <tr>\n", - " <th>Gambia, The</th>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <th>Gambia</th>\n", + " <td>2</td>\n", + " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>Holy See</th>\n", " <td>1</td>\n", - " <td>1</td>\n", + " <td>4</td>\n", " </tr>\n", " <tr>\n", - " <th>Martinique</th>\n", - " <td>19</td>\n", - " <td>23</td>\n", + " <th>Laos</th>\n", + " <td>0</td>\n", + " <td>2</td>\n", " </tr>\n", " <tr>\n", " <th>North Macedonia</th>\n", - " <td>35</td>\n", - " <td>48</td>\n", + " <td>136</td>\n", + " <td>148</td>\n", + " </tr>\n", + " <tr>\n", + " <th>The West Bank and Gaza</th>\n", + " <td>59</td>\n", + " <td>59</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ - " 2020-03-18 2020-03-19\n", - "Country/Region \n", - "Bahamas, The 1 3\n", - "Congo (Brazzaville) 1 3\n", - "Congo (Kinshasa) 4 14\n", - "Cruise Ship 712 712\n", - "Eswatini 1 1\n", - "Gambia, The 1 1\n", - "Holy See 1 1\n", - "Martinique 19 23\n", - "North Macedonia 35 48" + " 2020-03-23 2020-03-24\n", + "Country/Region \n", + "Cabo Verde 3 3\n", + "Congo (Brazzaville) 4 4\n", + "Congo (Kinshasa) 36 45\n", + "Diamond Princess 712 712\n", + "Eswatini 4 4\n", + "Gambia 2 3\n", + "Holy See 1 4\n", + "Laos 0 2\n", + "North Macedonia 136 148\n", + "The West Bank and Gaza 59 59" ] }, "execution_count": 10, @@ -413,10 +419,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.019504, - "end_time": "2020-03-20T21:46:15.583707", + "duration": 0.008814, + "end_time": "2020-03-25T23:06:30.186744", "exception": false, - "start_time": "2020-03-20T21:46:15.564203", + "start_time": "2020-03-25T23:06:30.177930", "status": "completed" }, "tags": [] @@ -430,10 +436,10 @@ "execution_count": 11, "metadata": { "papermill": { - "duration": 0.030535, - "end_time": "2020-03-20T21:46:15.627081", + "duration": 0.02285, + "end_time": "2020-03-25T23:06:30.218131", "exception": false, - "start_time": "2020-03-20T21:46:15.596546", + "start_time": "2020-03-25T23:06:30.195281", "status": "completed" }, "tags": [] @@ -465,20 +471,20 @@ "version": "3.7.3" }, "papermill": { - "duration": 2.076664, - "end_time": "2020-03-20T21:46:15.953254", + "duration": 2.077387, + "end_time": "2020-03-25T23:06:30.537956", "environment_variables": {}, "exception": null, - "input_path": "/tmp/n6nbb565/notebooks/process/CompileGeoData.ipynb", + "input_path": "notebooks/process/CompileGeoData.ipynb", "output_path": "runs/CompileGeoData.run.ipynb", "parameters": { - "PAPERMILL_INPUT_PATH": "/tmp/n6nbb565/notebooks/process/CompileGeoData.ipynb", + "PAPERMILL_INPUT_PATH": "notebooks/process/CompileGeoData.ipynb", "PAPERMILL_OUTPUT_PATH": "runs/CompileGeoData.run.ipynb", - "out_folder": "/tmp/n6nbb565/data/geodata", - "ts_folder": "/tmp/n6nbb565/data/covid-19_jhu-csse", - "worldmap_path": "/tmp/n6nbb565/data/worldmap/country_centroids.csv" + "out_folder": "./data/geodata/", + "ts_folder": "./data/covid-19_jhu-csse/", + "worldmap_path": "./data/worldmap/country_centroids.csv" }, - "start_time": "2020-03-20T21:46:13.876590", + "start_time": "2020-03-25T23:06:28.460569", "version": "1.1.0" } }, -- GitLab