From 1819b9646d9c7fb6360d7a9e7eb7aa79607e0c62 Mon Sep 17 00:00:00 2001 From: "CR (covid cron)" <beepbop@example.com> Date: Tue, 7 Jul 2020 07:30:47 +0000 Subject: [PATCH] renku rerun data/covidtracking/states-metadata.json data/covidtracking/states-daily.json --- .../bb2335d7ac1a434593b661882db2586c.cwl | 120 +++++++++++++++++ data/covidtracking/states-daily.json | 4 +- data/covidtracking/states-metadata.json | 4 +- runs/download-covidtracking-data.runs.ipynb | 126 +++++++++--------- 4 files changed, 187 insertions(+), 67 deletions(-) create mode 100644 .renku/workflow/bb2335d7ac1a434593b661882db2586c.cwl diff --git a/.renku/workflow/bb2335d7ac1a434593b661882db2586c.cwl b/.renku/workflow/bb2335d7ac1a434593b661882db2586c.cwl new file mode 100644 index 000000000..b611ac623 --- /dev/null +++ b/.renku/workflow/bb2335d7ac1a434593b661882db2586c.cwl @@ -0,0 +1,120 @@ +class: Workflow +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: states-metadata.json + streamable: false + type: string + input_2: + default: out_folder + streamable: false + type: string + input_3: + default: data/covidtracking + streamable: false + type: string + input_4: + default: + class: File + path: ../../notebooks/process/download-covidtracking-data.ipynb + streamable: false + type: File + input_5: + default: runs/download-covidtracking-data.runs.ipynb + streamable: false + type: string + input_6: + default: states-daily.json + streamable: false + type: string +outputs: + output_0: + outputSource: step_2/output_1 + streamable: false + type: Directory + output_1: + outputSource: step_2/output_0 + streamable: false + type: File +requirements: [] +steps: + step_1: + in: + filename: input_1 + input_directory: step_2/output_1 + out: + - output_file + run: + arguments: [] + baseCommand: + - 'true' + class: CommandLineTool + cwlVersion: v1.0 + hints: [] + inputs: + filename: + default: states-metadata.json + streamable: false + type: string + input_directory: + streamable: false + type: Directory + outputs: + output_file: + outputBinding: + glob: $(inputs.filename) + streamable: false + type: File + permanentFailCodes: [] + requirements: + - &id001 + class: InlineJavascriptRequirement + - &id002 + class: InitialWorkDirRequirement + listing: $(inputs.input_directory.listing) + successCodes: [] + temporaryFailCodes: [] + step_2: + in: + input_1: input_2 + input_2: input_3 + input_3: input_4 + input_4: input_5 + out: + - output_0 + - output_1 + run: a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl + step_3: + in: + filename: input_6 + input_directory: step_2/output_1 + out: + - output_file + run: + arguments: [] + baseCommand: + - 'true' + class: CommandLineTool + cwlVersion: v1.0 + hints: [] + inputs: + filename: + default: states-daily.json + streamable: false + type: string + input_directory: + streamable: false + type: Directory + outputs: + output_file: + outputBinding: + glob: $(inputs.filename) + streamable: false + type: File + permanentFailCodes: [] + requirements: + - *id001 + - *id002 + successCodes: [] + temporaryFailCodes: [] diff --git a/data/covidtracking/states-daily.json b/data/covidtracking/states-daily.json index 411721fb6..cf5f5e800 100644 --- a/data/covidtracking/states-daily.json +++ b/data/covidtracking/states-daily.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7e53336c842fa3a355296423a25f3a6bc492cd60c5710cfdda1791997f056ac1 -size 6054091 +oid sha256:5cd5a007ff5c5c6c2161bf6850242f9405fc042147c858a7baa5f35f950d49d2 +size 6104134 diff --git a/data/covidtracking/states-metadata.json b/data/covidtracking/states-metadata.json index 4a9c34c5d..7c9a156ae 100644 --- a/data/covidtracking/states-metadata.json +++ b/data/covidtracking/states-metadata.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a25b65241a50ddf91000e761c6398111582f85b7a1db085779febf7f212dc3c6 -size 46183 +oid sha256:6872350aab8a8a64d3e1223af01e7043ab7e04886074a1fc196b5dac451f651b +size 46029 diff --git a/runs/download-covidtracking-data.runs.ipynb b/runs/download-covidtracking-data.runs.ipynb index 2413cbdd8..a887411b4 100644 --- a/runs/download-covidtracking-data.runs.ipynb +++ b/runs/download-covidtracking-data.runs.ipynb @@ -5,10 +5,10 @@ "execution_count": 1, "metadata": { "papermill": { - "duration": 0.747101, - "end_time": "2020-07-06T07:29:15.076639", + "duration": 3.637171, + "end_time": "2020-07-07T07:30:45.431401", "exception": false, - "start_time": "2020-07-06T07:29:14.329538", + "start_time": "2020-07-07T07:30:41.794230", "status": "completed" }, "tags": [] @@ -25,10 +25,10 @@ "execution_count": 2, "metadata": { "papermill": { - "duration": 0.019468, - "end_time": "2020-07-06T07:29:15.106675", + "duration": 0.023769, + "end_time": "2020-07-07T07:30:45.467502", "exception": false, - "start_time": "2020-07-06T07:29:15.087207", + "start_time": "2020-07-07T07:30:45.443733", "status": "completed" }, "tags": [ @@ -46,10 +46,10 @@ "execution_count": 3, "metadata": { "papermill": { - "duration": 0.018296, - "end_time": "2020-07-06T07:29:15.135980", + "duration": 0.018739, + "end_time": "2020-07-07T07:30:45.497602", "exception": false, - "start_time": "2020-07-06T07:29:15.117684", + "start_time": "2020-07-07T07:30:45.478863", "status": "completed" }, "tags": [ @@ -59,7 +59,7 @@ "outputs": [], "source": [ "# Parameters\n", - "PAPERMILL_INPUT_PATH = \"/tmp/h0x8je9l/notebooks/process/download-covidtracking-data.ipynb\"\n", + "PAPERMILL_INPUT_PATH = \"/tmp/gm5bis_b/notebooks/process/download-covidtracking-data.ipynb\"\n", "PAPERMILL_OUTPUT_PATH = \"runs/download-covidtracking-data.runs.ipynb\"\n", "out_folder = \"data/covidtracking\"\n" ] @@ -68,10 +68,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.008174, - "end_time": "2020-07-06T07:29:15.152704", + "duration": 0.008088, + "end_time": "2020-07-07T07:30:45.514927", "exception": false, - "start_time": "2020-07-06T07:29:15.144530", + "start_time": "2020-07-07T07:30:45.506839", "status": "completed" }, "tags": [] @@ -87,10 +87,10 @@ "execution_count": 4, "metadata": { "papermill": { - "duration": 0.099707, - "end_time": "2020-07-06T07:29:15.259501", + "duration": 0.125387, + "end_time": "2020-07-07T07:30:45.648993", "exception": false, - "start_time": "2020-07-06T07:29:15.159794", + "start_time": "2020-07-07T07:30:45.523606", "status": "completed" }, "tags": [] @@ -107,10 +107,10 @@ "execution_count": 5, "metadata": { "papermill": { - "duration": 0.021748, - "end_time": "2020-07-06T07:29:15.292314", + "duration": 0.019024, + "end_time": "2020-07-07T07:30:45.678715", "exception": false, - "start_time": "2020-07-06T07:29:15.270566", + "start_time": "2020-07-07T07:30:45.659691", "status": "completed" }, "tags": [] @@ -129,10 +129,10 @@ "execution_count": 6, "metadata": { "papermill": { - "duration": 0.077001, - "end_time": "2020-07-06T07:29:15.380358", + "duration": 0.067853, + "end_time": "2020-07-07T07:30:45.754067", "exception": false, - "start_time": "2020-07-06T07:29:15.303357", + "start_time": "2020-07-07T07:30:45.686214", "status": "completed" }, "tags": [] @@ -183,7 +183,7 @@ " <tr>\n", " <th>0</th>\n", " <td>AK</td>\n", - " <td>Total tests are taken from the annotations on ...</td>\n", + " <td>Negatives = (Totals – Positives)\\nPositives oc...</td>\n", " <td>http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...</td>\n", " <td>http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...</td>\n", " <td>https://alaska-dhss.maps.arcgis.com/apps/opsda...</td>\n", @@ -214,7 +214,7 @@ ], "text/plain": [ " state notes \\\n", - "0 AK Total tests are taken from the annotations on ... \n", + "0 AK Negatives = (Totals – Positives)\\nPositives oc... \n", "1 AL Negatives = (Totals - Positives) \\nPositives o... \n", "\n", " covid19Site \\\n", @@ -249,10 +249,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.008199, - "end_time": "2020-07-06T07:29:15.400172", + "duration": 0.009115, + "end_time": "2020-07-07T07:30:45.774803", "exception": false, - "start_time": "2020-07-06T07:29:15.391973", + "start_time": "2020-07-07T07:30:45.765688", "status": "completed" }, "tags": [] @@ -266,10 +266,10 @@ "execution_count": 7, "metadata": { "papermill": { - "duration": 0.201465, - "end_time": "2020-07-06T07:29:15.609838", + "duration": 0.254218, + "end_time": "2020-07-07T07:30:46.038719", "exception": false, - "start_time": "2020-07-06T07:29:15.408373", + "start_time": "2020-07-07T07:30:45.784501", "status": "completed" }, "tags": [] @@ -286,10 +286,10 @@ "execution_count": 8, "metadata": { "papermill": { - "duration": 0.02893, - "end_time": "2020-07-06T07:29:15.651163", + "duration": 0.026474, + "end_time": "2020-07-07T07:30:46.077713", "exception": false, - "start_time": "2020-07-06T07:29:15.622233", + "start_time": "2020-07-07T07:30:46.051239", "status": "completed" }, "tags": [] @@ -308,10 +308,10 @@ "execution_count": 9, "metadata": { "papermill": { - "duration": 0.271056, - "end_time": "2020-07-06T07:29:15.935127", + "duration": 0.267747, + "end_time": "2020-07-07T07:30:46.356918", "exception": false, - "start_time": "2020-07-06T07:29:15.664071", + "start_time": "2020-07-07T07:30:46.089171", "status": "completed" }, "tags": [] @@ -321,7 +321,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "6849 data points\n" + "6905 data points\n" ] }, { @@ -371,21 +371,21 @@ " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>20200705</td>\n", + " <td>20200706</td>\n", " <td>AK</td>\n", - " <td>1111.0</td>\n", - " <td>121621.0</td>\n", + " <td>1138.0</td>\n", + " <td>122615.0</td>\n", " <td>NaN</td>\n", - " <td>23.0</td>\n", + " <td>19.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>3.0</td>\n", " <td>...</td>\n", - " <td>122732</td>\n", + " <td>123753</td>\n", " <td>0</td>\n", " <td>0</td>\n", - " <td>60747b92beb9c600e7cb30085266b382691b2c3a</td>\n", + " <td>6337485c1dab1a0798672c8aa81f714d2b339df2</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", @@ -395,21 +395,21 @@ " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>20200705</td>\n", + " <td>20200706</td>\n", " <td>AL</td>\n", - " <td>43953.0</td>\n", - " <td>405933.0</td>\n", + " <td>44878.0</td>\n", + " <td>410217.0</td>\n", " <td>NaN</td>\n", - " <td>885.0</td>\n", - " <td>2909.0</td>\n", + " <td>1016.0</td>\n", + " <td>2914.0</td>\n", " <td>NaN</td>\n", " <td>843.0</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", - " <td>449886</td>\n", + " <td>455095</td>\n", " <td>0</td>\n", - " <td>3</td>\n", - " <td>261d349a43f69648cc120a21229dcccb34d1f9e2</td>\n", + " <td>5</td>\n", + " <td>d191311d4fceb2d234266af63c6554fdc93492e4</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", @@ -424,20 +424,20 @@ ], "text/plain": [ " date state positive negative pending hospitalizedCurrently \\\n", - "0 20200705 AK 1111.0 121621.0 NaN 23.0 \n", - "1 20200705 AL 43953.0 405933.0 NaN 885.0 \n", + "0 20200706 AK 1138.0 122615.0 NaN 19.0 \n", + "1 20200706 AL 44878.0 410217.0 NaN 1016.0 \n", "\n", " hospitalizedCumulative inIcuCurrently inIcuCumulative \\\n", "0 NaN NaN NaN \n", - "1 2909.0 NaN 843.0 \n", + "1 2914.0 NaN 843.0 \n", "\n", " onVentilatorCurrently ... posNeg deathIncrease hospitalizedIncrease \\\n", - "0 3.0 ... 122732 0 0 \n", - "1 NaN ... 449886 0 3 \n", + "0 3.0 ... 123753 0 0 \n", + "1 NaN ... 455095 0 5 \n", "\n", " hash commercialScore \\\n", - "0 60747b92beb9c600e7cb30085266b382691b2c3a 0 \n", - "1 261d349a43f69648cc120a21229dcccb34d1f9e2 0 \n", + "0 6337485c1dab1a0798672c8aa81f714d2b339df2 0 \n", + "1 d191311d4fceb2d234266af63c6554fdc93492e4 0 \n", "\n", " negativeRegularScore negativeScore positiveScore score grade \n", "0 0 0 0 0 \n", @@ -477,18 +477,18 @@ "version": "3.7.3" }, "papermill": { - "duration": 2.905163, - "end_time": "2020-07-06T07:29:16.258387", + "duration": 7.084326, + "end_time": "2020-07-07T07:30:46.680158", "environment_variables": {}, "exception": null, - "input_path": "/tmp/h0x8je9l/notebooks/process/download-covidtracking-data.ipynb", + "input_path": "/tmp/gm5bis_b/notebooks/process/download-covidtracking-data.ipynb", "output_path": "runs/download-covidtracking-data.runs.ipynb", "parameters": { - "PAPERMILL_INPUT_PATH": "/tmp/h0x8je9l/notebooks/process/download-covidtracking-data.ipynb", + "PAPERMILL_INPUT_PATH": "/tmp/gm5bis_b/notebooks/process/download-covidtracking-data.ipynb", "PAPERMILL_OUTPUT_PATH": "runs/download-covidtracking-data.runs.ipynb", "out_folder": "data/covidtracking" }, - "start_time": "2020-07-06T07:29:13.353224", + "start_time": "2020-07-07T07:30:39.595832", "version": "1.1.0" } }, -- GitLab