From 2448967fa12dcff17c565683bbd88d369e2dd0a4 Mon Sep 17 00:00:00 2001 From: "CR (covid cron)" <cramakri+covid-cron@ethz.ch> Date: Thu, 30 Apr 2020 20:39:38 +0000 Subject: [PATCH] renku rerun data/covidtracking/states-metadata.json data/covidtracking/states-daily.json --- .../bf45cb4d1740458e971c5c555d51e3ae.cwl | 148 ++++++++++++++++++ data/covidtracking/states-daily.json | 4 +- data/covidtracking/states-metadata.json | 4 +- runs/download-covidtracking-data.runs.ipynb | 144 ++++++++--------- 4 files changed, 224 insertions(+), 76 deletions(-) create mode 100644 .renku/workflow/bf45cb4d1740458e971c5c555d51e3ae.cwl diff --git a/.renku/workflow/bf45cb4d1740458e971c5c555d51e3ae.cwl b/.renku/workflow/bf45cb4d1740458e971c5c555d51e3ae.cwl new file mode 100644 index 000000000..6912aafa9 --- /dev/null +++ b/.renku/workflow/bf45cb4d1740458e971c5c555d51e3ae.cwl @@ -0,0 +1,148 @@ +class: Workflow +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: out_folder + streamable: false + type: string + input_10: + default: states-metadata.json + streamable: false + type: string + input_2: + default: data/covidtracking + streamable: false + type: string + input_3: + default: + class: File + path: ../../notebooks/process/download-covidtracking-data.ipynb + streamable: false + type: File + input_4: + default: runs/download-covidtracking-data.runs.ipynb + streamable: false + type: string + input_5: + default: states-daily.json + streamable: false + type: string + input_6: + default: out_folder + streamable: false + type: string + input_7: + default: data/covidtracking + streamable: false + type: string + input_8: + default: + class: File + path: ../../notebooks/process/download-covidtracking-data.ipynb + streamable: false + type: File + input_9: + default: runs/download-covidtracking-data.runs.ipynb + streamable: false + type: string +outputs: + output_1: + outputSource: step_1/output_0 + streamable: false + type: File + output_3: + outputSource: step_1/output_1 + streamable: false + type: Directory +requirements: [] +steps: + step_1: + in: + input_1: input_1 + input_2: input_2 + input_3: input_3 + input_4: input_4 + out: + - output_0 + - output_1 + run: a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl + step_2: + in: + filename: input_5 + input_directory: step_3/output_1 + out: + - output_file + run: + arguments: [] + baseCommand: + - 'true' + class: CommandLineTool + cwlVersion: v1.0 + hints: [] + inputs: + filename: + default: states-daily.json + streamable: false + type: string + input_directory: + streamable: false + type: Directory + outputs: + output_file: + outputBinding: + glob: $(inputs.filename) + streamable: false + type: File + permanentFailCodes: [] + requirements: + - &id001 + class: InlineJavascriptRequirement + - &id002 + class: InitialWorkDirRequirement + listing: $(inputs.input_directory.listing) + successCodes: [] + temporaryFailCodes: [] + step_3: + in: + input_1: input_6 + input_2: input_7 + input_3: input_8 + input_4: input_9 + out: + - output_0 + - output_1 + run: a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl + step_4: + in: + filename: input_10 + input_directory: step_1/output_1 + out: + - output_file + run: + arguments: [] + baseCommand: + - 'true' + class: CommandLineTool + cwlVersion: v1.0 + hints: [] + inputs: + filename: + default: states-metadata.json + streamable: false + type: string + input_directory: + streamable: false + type: Directory + outputs: + output_file: + outputBinding: + glob: $(inputs.filename) + streamable: false + type: File + permanentFailCodes: [] + requirements: + - *id001 + - *id002 + successCodes: [] + temporaryFailCodes: [] diff --git a/data/covidtracking/states-daily.json b/data/covidtracking/states-daily.json index 1b830ed4e..e4ede11ba 100644 --- a/data/covidtracking/states-daily.json +++ b/data/covidtracking/states-daily.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b1aee7944a94451ab257145cd55497c11f26841b6071f5854817ad8d41d46576 -size 1530498 +oid sha256:de4b330e16d1440786340b4751e413d340c791f2ff303ed6ef576cb036f10a2f +size 1562090 diff --git a/data/covidtracking/states-metadata.json b/data/covidtracking/states-metadata.json index db5f4e07a..bfddaa037 100644 --- a/data/covidtracking/states-metadata.json +++ b/data/covidtracking/states-metadata.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1e5e886f27600015ae7ecab4c43995eb043c8370a0c8d6c26e4f9bffad486693 -size 30080 +oid sha256:3d5c8aaa3c2c56321df18cd2f9e3d851a1948cef43863db7f9213010a6dbc72a +size 29992 diff --git a/runs/download-covidtracking-data.runs.ipynb b/runs/download-covidtracking-data.runs.ipynb index d943a81fe..3e46874d4 100644 --- a/runs/download-covidtracking-data.runs.ipynb +++ b/runs/download-covidtracking-data.runs.ipynb @@ -5,10 +5,10 @@ "execution_count": 1, "metadata": { "papermill": { - "duration": 3.865322, - "end_time": "2020-04-29T07:27:05.870354", + "duration": 0.485688, + "end_time": "2020-04-30T20:39:31.475755", "exception": false, - "start_time": "2020-04-29T07:27:02.005032", + "start_time": "2020-04-30T20:39:30.990067", "status": "completed" }, "tags": [] @@ -25,10 +25,10 @@ "execution_count": 2, "metadata": { "papermill": { - "duration": 0.021783, - "end_time": "2020-04-29T07:27:05.905512", + "duration": 0.022523, + "end_time": "2020-04-30T20:39:31.508540", "exception": false, - "start_time": "2020-04-29T07:27:05.883729", + "start_time": "2020-04-30T20:39:31.486017", "status": "completed" }, "tags": [ @@ -46,10 +46,10 @@ "execution_count": 3, "metadata": { "papermill": { - "duration": 0.027562, - "end_time": "2020-04-29T07:27:05.941731", + "duration": 0.023961, + "end_time": "2020-04-30T20:39:31.546006", "exception": false, - "start_time": "2020-04-29T07:27:05.914169", + "start_time": "2020-04-30T20:39:31.522045", "status": "completed" }, "tags": [ @@ -59,7 +59,7 @@ "outputs": [], "source": [ "# Parameters\n", - "PAPERMILL_INPUT_PATH = \"/tmp/1dxcr_ip/notebooks/process/download-covidtracking-data.ipynb\"\n", + "PAPERMILL_INPUT_PATH = \"/tmp/q32lk3hk/notebooks/process/download-covidtracking-data.ipynb\"\n", "PAPERMILL_OUTPUT_PATH = \"runs/download-covidtracking-data.runs.ipynb\"\n", "out_folder = \"data/covidtracking\"\n" ] @@ -68,10 +68,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.008265, - "end_time": "2020-04-29T07:27:05.964251", + "duration": 0.005595, + "end_time": "2020-04-30T20:39:31.559832", "exception": false, - "start_time": "2020-04-29T07:27:05.955986", + "start_time": "2020-04-30T20:39:31.554237", "status": "completed" }, "tags": [] @@ -87,10 +87,10 @@ "execution_count": 4, "metadata": { "papermill": { - "duration": 0.580459, - "end_time": "2020-04-29T07:27:06.551574", + "duration": 2.797905, + "end_time": "2020-04-30T20:39:34.363335", "exception": false, - "start_time": "2020-04-29T07:27:05.971115", + "start_time": "2020-04-30T20:39:31.565430", "status": "completed" }, "tags": [] @@ -107,10 +107,10 @@ "execution_count": 5, "metadata": { "papermill": { - "duration": 0.028027, - "end_time": "2020-04-29T07:27:06.597151", + "duration": 0.028778, + "end_time": "2020-04-30T20:39:34.408538", "exception": false, - "start_time": "2020-04-29T07:27:06.569124", + "start_time": "2020-04-30T20:39:34.379760", "status": "completed" }, "tags": [] @@ -129,10 +129,10 @@ "execution_count": 6, "metadata": { "papermill": { - "duration": 0.074928, - "end_time": "2020-04-29T07:27:06.682377", + "duration": 0.072536, + "end_time": "2020-04-30T20:39:34.493093", "exception": false, - "start_time": "2020-04-29T07:27:06.607449", + "start_time": "2020-04-30T20:39:34.420557", "status": "completed" }, "tags": [] @@ -246,10 +246,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.007555, - "end_time": "2020-04-29T07:27:06.701924", + "duration": 0.007706, + "end_time": "2020-04-30T20:39:34.515266", "exception": false, - "start_time": "2020-04-29T07:27:06.694369", + "start_time": "2020-04-30T20:39:34.507560", "status": "completed" }, "tags": [] @@ -263,10 +263,10 @@ "execution_count": 7, "metadata": { "papermill": { - "duration": 0.308747, - "end_time": "2020-04-29T07:27:07.017018", + "duration": 0.12028, + "end_time": "2020-04-30T20:39:34.642136", "exception": false, - "start_time": "2020-04-29T07:27:06.708271", + "start_time": "2020-04-30T20:39:34.521856", "status": "completed" }, "tags": [] @@ -283,10 +283,10 @@ "execution_count": 8, "metadata": { "papermill": { - "duration": 0.031866, - "end_time": "2020-04-29T07:27:07.065408", + "duration": 0.031997, + "end_time": "2020-04-30T20:39:34.688913", "exception": false, - "start_time": "2020-04-29T07:27:07.033542", + "start_time": "2020-04-30T20:39:34.656916", "status": "completed" }, "tags": [] @@ -305,10 +305,10 @@ "execution_count": 9, "metadata": { "papermill": { - "duration": 0.118292, - "end_time": "2020-04-29T07:27:07.194849", + "duration": 0.11394, + "end_time": "2020-04-30T20:39:34.814486", "exception": false, - "start_time": "2020-04-29T07:27:07.076557", + "start_time": "2020-04-30T20:39:34.700546", "status": "completed" }, "tags": [] @@ -318,7 +318,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "3041 data points\n" + "3097 data points\n" ] }, { @@ -368,51 +368,51 @@ " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>20200428</td>\n", + " <td>20200429</td>\n", " <td>AK</td>\n", - " <td>351.0</td>\n", - " <td>16738.0</td>\n", + " <td>355.0</td>\n", + " <td>18764.0</td>\n", " <td>NaN</td>\n", - " <td>16.0</td>\n", + " <td>14.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", - " <td>17089.0</td>\n", - " <td>17089.0</td>\n", - " <td>17089.0</td>\n", + " <td>19119.0</td>\n", + " <td>19119.0</td>\n", + " <td>19119.0</td>\n", " <td>2</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", - " <td>827.0</td>\n", - " <td>6.0</td>\n", - " <td>833.0</td>\n", + " <td>2026.0</td>\n", + " <td>4.0</td>\n", + " <td>2030.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>20200428</td>\n", + " <td>20200429</td>\n", " <td>AL</td>\n", - " <td>6687.0</td>\n", - " <td>69140.0</td>\n", + " <td>6842.0</td>\n", + " <td>73607.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>911.0</td>\n", + " <td>945.0</td>\n", " <td>NaN</td>\n", " <td>335.0</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", - " <td>911.0</td>\n", - " <td>75827.0</td>\n", - " <td>75827.0</td>\n", - " <td>75827.0</td>\n", + " <td>945.0</td>\n", + " <td>80449.0</td>\n", + " <td>80449.0</td>\n", + " <td>80449.0</td>\n", " <td>1</td>\n", - " <td>20.0</td>\n", - " <td>39.0</td>\n", - " <td>1642.0</td>\n", - " <td>188.0</td>\n", - " <td>1830.0</td>\n", + " <td>3.0</td>\n", + " <td>34.0</td>\n", + " <td>4467.0</td>\n", + " <td>155.0</td>\n", + " <td>4622.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -421,24 +421,24 @@ ], "text/plain": [ " date state positive negative pending hospitalizedCurrently \\\n", - "0 20200428 AK 351.0 16738.0 NaN 16.0 \n", - "1 20200428 AL 6687.0 69140.0 NaN NaN \n", + "0 20200429 AK 355.0 18764.0 NaN 14.0 \n", + "1 20200429 AL 6842.0 73607.0 NaN NaN \n", "\n", " hospitalizedCumulative inIcuCurrently inIcuCumulative \\\n", "0 NaN NaN NaN \n", - "1 911.0 NaN 335.0 \n", + "1 945.0 NaN 335.0 \n", "\n", " onVentilatorCurrently ... hospitalized total totalTestResults \\\n", - "0 NaN ... NaN 17089.0 17089.0 \n", - "1 NaN ... 911.0 75827.0 75827.0 \n", + "0 NaN ... NaN 19119.0 19119.0 \n", + "1 NaN ... 945.0 80449.0 80449.0 \n", "\n", " posNeg fips deathIncrease hospitalizedIncrease negativeIncrease \\\n", - "0 17089.0 2 0.0 0.0 827.0 \n", - "1 75827.0 1 20.0 39.0 1642.0 \n", + "0 19119.0 2 0.0 0.0 2026.0 \n", + "1 80449.0 1 3.0 34.0 4467.0 \n", "\n", " positiveIncrease totalTestResultsIncrease \n", - "0 6.0 833.0 \n", - "1 188.0 1830.0 \n", + "0 4.0 2030.0 \n", + "1 155.0 4622.0 \n", "\n", "[2 rows x 25 columns]" ] @@ -474,18 +474,18 @@ "version": "3.7.3" }, "papermill": { - "duration": 7.505897, - "end_time": "2020-04-29T07:27:07.518570", + "duration": 5.073446, + "end_time": "2020-04-30T20:39:35.130808", "environment_variables": {}, "exception": null, - "input_path": "/tmp/1dxcr_ip/notebooks/process/download-covidtracking-data.ipynb", + "input_path": "/tmp/q32lk3hk/notebooks/process/download-covidtracking-data.ipynb", "output_path": "runs/download-covidtracking-data.runs.ipynb", "parameters": { - "PAPERMILL_INPUT_PATH": "/tmp/1dxcr_ip/notebooks/process/download-covidtracking-data.ipynb", + "PAPERMILL_INPUT_PATH": "/tmp/q32lk3hk/notebooks/process/download-covidtracking-data.ipynb", "PAPERMILL_OUTPUT_PATH": "runs/download-covidtracking-data.runs.ipynb", "out_folder": "data/covidtracking" }, - "start_time": "2020-04-29T07:27:00.012673", + "start_time": "2020-04-30T20:39:30.057362", "version": "1.1.0" } }, -- GitLab