From 2a41feb4913244d3720ffeda2a5c63665ba6eb7a Mon Sep 17 00:00:00 2001 From: "CR (covid cron)" <beepbop@example.com> Date: Wed, 3 Jun 2020 15:45:18 +0000 Subject: [PATCH] renku rerun data/covidtracking/states-metadata.json data/covidtracking/states-daily.json --- .../06187256ab5b48729e96df2e6340d7df.cwl | 120 ++++++++++ data/covidtracking/states-daily.json | 4 +- data/covidtracking/states-metadata.json | 4 +- runs/download-covidtracking-data.runs.ipynb | 220 +++++++++--------- 4 files changed, 232 insertions(+), 116 deletions(-) create mode 100644 .renku/workflow/06187256ab5b48729e96df2e6340d7df.cwl diff --git a/.renku/workflow/06187256ab5b48729e96df2e6340d7df.cwl b/.renku/workflow/06187256ab5b48729e96df2e6340d7df.cwl new file mode 100644 index 000000000..549fc8691 --- /dev/null +++ b/.renku/workflow/06187256ab5b48729e96df2e6340d7df.cwl @@ -0,0 +1,120 @@ +class: Workflow +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: states-metadata.json + streamable: false + type: string + input_2: + default: states-daily.json + streamable: false + type: string + input_3: + default: out_folder + streamable: false + type: string + input_4: + default: data/covidtracking + streamable: false + type: string + input_5: + default: + class: File + path: ../../notebooks/process/download-covidtracking-data.ipynb + streamable: false + type: File + input_6: + default: runs/download-covidtracking-data.runs.ipynb + streamable: false + type: string +outputs: + output_2: + outputSource: step_3/output_1 + streamable: false + type: Directory + output_3: + outputSource: step_3/output_0 + streamable: false + type: File +requirements: [] +steps: + step_1: + in: + filename: input_1 + input_directory: step_3/output_1 + out: + - output_file + run: + arguments: [] + baseCommand: + - 'true' + class: CommandLineTool + cwlVersion: v1.0 + hints: [] + inputs: + filename: + default: states-metadata.json + streamable: false + type: string + input_directory: + streamable: false + type: Directory + outputs: + output_file: + outputBinding: + glob: $(inputs.filename) + streamable: false + type: File + permanentFailCodes: [] + requirements: + - &id001 + class: InlineJavascriptRequirement + - &id002 + class: InitialWorkDirRequirement + listing: $(inputs.input_directory.listing) + successCodes: [] + temporaryFailCodes: [] + step_2: + in: + filename: input_2 + input_directory: step_3/output_1 + out: + - output_file + run: + arguments: [] + baseCommand: + - 'true' + class: CommandLineTool + cwlVersion: v1.0 + hints: [] + inputs: + filename: + default: states-daily.json + streamable: false + type: string + input_directory: + streamable: false + type: Directory + outputs: + output_file: + outputBinding: + glob: $(inputs.filename) + streamable: false + type: File + permanentFailCodes: [] + requirements: + - *id001 + - *id002 + successCodes: [] + temporaryFailCodes: [] + step_3: + in: + input_1: input_3 + input_2: input_4 + input_3: input_5 + input_4: input_6 + out: + - output_1 + - output_0 + run: a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl diff --git a/data/covidtracking/states-daily.json b/data/covidtracking/states-daily.json index 27dfd82d2..8b8ad50a5 100644 --- a/data/covidtracking/states-daily.json +++ b/data/covidtracking/states-daily.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b53d5ec861e859d9c4b5d92c824a94cf34fccdc411feee3fe450ca1b3566e6aa -size 2945856 +oid sha256:20d05d8bf54fbe25d465293949d98baa8217af8455274ebf331eaa9fdecfc7b5 +size 3896218 diff --git a/data/covidtracking/states-metadata.json b/data/covidtracking/states-metadata.json index 095f192b9..82c95999b 100644 --- a/data/covidtracking/states-metadata.json +++ b/data/covidtracking/states-metadata.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:23628a019bf4b14d081e8b8519be4be9ee4712078cae76e8f455f72c066ffb9c -size 35330 +oid sha256:106cf31b2776e3d9c810972b8819576e6b9069017c01c9baf326178128121813 +size 34737 diff --git a/runs/download-covidtracking-data.runs.ipynb b/runs/download-covidtracking-data.runs.ipynb index 2bbab0ab4..7b7ab2ac3 100644 --- a/runs/download-covidtracking-data.runs.ipynb +++ b/runs/download-covidtracking-data.runs.ipynb @@ -5,10 +5,10 @@ "execution_count": 1, "metadata": { "papermill": { - "duration": 0.52214, - "end_time": "2020-06-02T08:11:09.503843", + "duration": 3.35233, + "end_time": "2020-06-03T15:45:09.318776", "exception": false, - "start_time": "2020-06-02T08:11:08.981703", + "start_time": "2020-06-03T15:45:05.966446", "status": "completed" }, "tags": [] @@ -25,10 +25,10 @@ "execution_count": 2, "metadata": { "papermill": { - "duration": 0.021722, - "end_time": "2020-06-02T08:11:09.539612", + "duration": 0.0259, + "end_time": "2020-06-03T15:45:09.361558", "exception": false, - "start_time": "2020-06-02T08:11:09.517890", + "start_time": "2020-06-03T15:45:09.335658", "status": "completed" }, "tags": [ @@ -46,10 +46,10 @@ "execution_count": 3, "metadata": { "papermill": { - "duration": 0.022346, - "end_time": "2020-06-02T08:11:09.570298", + "duration": 0.023503, + "end_time": "2020-06-03T15:45:09.397029", "exception": false, - "start_time": "2020-06-02T08:11:09.547952", + "start_time": "2020-06-03T15:45:09.373526", "status": "completed" }, "tags": [ @@ -59,7 +59,7 @@ "outputs": [], "source": [ "# Parameters\n", - "PAPERMILL_INPUT_PATH = \"/tmp/_29it5g5/notebooks/process/download-covidtracking-data.ipynb\"\n", + "PAPERMILL_INPUT_PATH = \"/tmp/eqrts4dp/notebooks/process/download-covidtracking-data.ipynb\"\n", "PAPERMILL_OUTPUT_PATH = \"runs/download-covidtracking-data.runs.ipynb\"\n", "out_folder = \"data/covidtracking\"\n" ] @@ -68,10 +68,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.006388, - "end_time": "2020-06-02T08:11:09.585324", + "duration": 0.006779, + "end_time": "2020-06-03T15:45:09.411335", "exception": false, - "start_time": "2020-06-02T08:11:09.578936", + "start_time": "2020-06-03T15:45:09.404556", "status": "completed" }, "tags": [] @@ -87,10 +87,10 @@ "execution_count": 4, "metadata": { "papermill": { - "duration": 0.620735, - "end_time": "2020-06-02T08:11:10.212779", + "duration": 0.597563, + "end_time": "2020-06-03T15:45:10.014708", "exception": false, - "start_time": "2020-06-02T08:11:09.592044", + "start_time": "2020-06-03T15:45:09.417145", "status": "completed" }, "tags": [] @@ -107,10 +107,10 @@ "execution_count": 5, "metadata": { "papermill": { - "duration": 0.028753, - "end_time": "2020-06-02T08:11:10.257183", + "duration": 0.03125, + "end_time": "2020-06-03T15:45:10.062585", "exception": false, - "start_time": "2020-06-02T08:11:10.228430", + "start_time": "2020-06-03T15:45:10.031335", "status": "completed" }, "tags": [] @@ -129,10 +129,10 @@ "execution_count": 6, "metadata": { "papermill": { - "duration": 0.048608, - "end_time": "2020-06-02T08:11:10.315976", + "duration": 0.091143, + "end_time": "2020-06-03T15:45:10.164517", "exception": false, - "start_time": "2020-06-02T08:11:10.267368", + "start_time": "2020-06-03T15:45:10.073374", "status": "completed" }, "tags": [] @@ -167,52 +167,52 @@ " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>state</th>\n", - " <th>covid19SiteOld</th>\n", + " <th>notes</th>\n", " <th>covid19Site</th>\n", " <th>covid19SiteSecondary</th>\n", " <th>twitter</th>\n", + " <th>covid19SiteOld</th>\n", + " <th>name</th>\n", + " <th>fips</th>\n", " <th>pui</th>\n", " <th>pum</th>\n", - " <th>notes</th>\n", - " <th>fips</th>\n", - " <th>name</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>AK</td>\n", - " <td>http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...</td>\n", + " <td>Total tests are taken from the annotations on ...</td>\n", " <td>http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...</td>\n", " <td>http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...</td>\n", " <td>@Alaska_DHSS</td>\n", - " <td>All data</td>\n", - " <td>False</td>\n", - " <td>Total tests are taken from the annotations on ...</td>\n", - " <td>2</td>\n", + " <td>http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...</td>\n", " <td>Alaska</td>\n", + " <td>2</td>\n", + " <td></td>\n", + " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>AL</td>\n", - " <td>http://www.alabamapublichealth.gov/infectiousd...</td>\n", + " <td>Negatives = (Totals - Positives) \\nPositives o...</td>\n", " <td>https://alpublichealth.maps.arcgis.com/apps/op...</td>\n", " <td>https://dph1.adph.state.al.us/covid-19/</td>\n", " <td>@alpublichealth</td>\n", - " <td>No data</td>\n", - " <td>False</td>\n", - " <td>Negatives = (Totals - Positives) \\nPositives o...</td>\n", - " <td>1</td>\n", + " <td>http://www.alabamapublichealth.gov/infectiousd...</td>\n", " <td>Alabama</td>\n", + " <td>1</td>\n", + " <td></td>\n", + " <td>False</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ - " state covid19SiteOld \\\n", - "0 AK http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-... \n", - "1 AL http://www.alabamapublichealth.gov/infectiousd... \n", + " state notes \\\n", + "0 AK Total tests are taken from the annotations on ... \n", + "1 AL Negatives = (Totals - Positives) \\nPositives o... \n", "\n", " covid19Site \\\n", "0 http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-... \n", @@ -222,13 +222,9 @@ "0 http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-... @Alaska_DHSS \n", "1 https://dph1.adph.state.al.us/covid-19/ @alpublichealth \n", "\n", - " pui pum notes fips \\\n", - "0 All data False Total tests are taken from the annotations on ... 2 \n", - "1 No data False Negatives = (Totals - Positives) \\nPositives o... 1 \n", - "\n", - " name \n", - "0 Alaska \n", - "1 Alabama " + " covid19SiteOld name fips pui pum \n", + "0 http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-... Alaska 2 False \n", + "1 http://www.alabamapublichealth.gov/infectiousd... Alabama 1 False " ] }, "execution_count": 6, @@ -246,10 +242,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.00642, - "end_time": "2020-06-02T08:11:10.329275", + "duration": 0.008537, + "end_time": "2020-06-03T15:45:10.188145", "exception": false, - "start_time": "2020-06-02T08:11:10.322855", + "start_time": "2020-06-03T15:45:10.179608", "status": "completed" }, "tags": [] @@ -263,10 +259,10 @@ "execution_count": 7, "metadata": { "papermill": { - "duration": 5.270376, - "end_time": "2020-06-02T08:11:15.606304", + "duration": 6.860154, + "end_time": "2020-06-03T15:45:17.054999", "exception": false, - "start_time": "2020-06-02T08:11:10.335928", + "start_time": "2020-06-03T15:45:10.194845", "status": "completed" }, "tags": [] @@ -283,10 +279,10 @@ "execution_count": 8, "metadata": { "papermill": { - "duration": 0.033573, - "end_time": "2020-06-02T08:11:15.657902", + "duration": 0.036298, + "end_time": "2020-06-03T15:45:17.109685", "exception": false, - "start_time": "2020-06-02T08:11:15.624329", + "start_time": "2020-06-03T15:45:17.073387", "status": "completed" }, "tags": [] @@ -305,10 +301,10 @@ "execution_count": 9, "metadata": { "papermill": { - "duration": 0.157194, - "end_time": "2020-06-02T08:11:15.830240", + "duration": 0.224468, + "end_time": "2020-06-03T15:45:17.346605", "exception": false, - "start_time": "2020-06-02T08:11:15.673046", + "start_time": "2020-06-03T15:45:17.122137", "status": "completed" }, "tags": [] @@ -318,7 +314,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "4945 data points\n" + "5001 data points\n" ] }, { @@ -353,94 +349,94 @@ " <th>inIcuCumulative</th>\n", " <th>onVentilatorCurrently</th>\n", " <th>...</th>\n", - " <th>hospitalized</th>\n", - " <th>total</th>\n", - " <th>totalTestResults</th>\n", " <th>posNeg</th>\n", - " <th>fips</th>\n", " <th>deathIncrease</th>\n", " <th>hospitalizedIncrease</th>\n", - " <th>negativeIncrease</th>\n", - " <th>positiveIncrease</th>\n", - " <th>totalTestResultsIncrease</th>\n", + " <th>hash</th>\n", + " <th>commercialScore</th>\n", + " <th>negativeRegularScore</th>\n", + " <th>negativeScore</th>\n", + " <th>positiveScore</th>\n", + " <th>score</th>\n", + " <th>grade</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>20200601</td>\n", + " <td>20200602</td>\n", " <td>AK</td>\n", - " <td>467.0</td>\n", - " <td>53723.0</td>\n", + " <td>487.0</td>\n", + " <td>55716.0</td>\n", " <td>NaN</td>\n", " <td>10.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>1.0</td>\n", - " <td>...</td>\n", - " <td>NaN</td>\n", - " <td>54190</td>\n", - " <td>54190</td>\n", - " <td>54190</td>\n", - " <td>2</td>\n", - " <td>0.0</td>\n", " <td>0.0</td>\n", - " <td>2462.0</td>\n", - " <td>33.0</td>\n", - " <td>2495.0</td>\n", + " <td>...</td>\n", + " <td>56203</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>8bfc576f3ce0c5438919ab2a672255d0630714ec</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td></td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>20200601</td>\n", + " <td>20200602</td>\n", " <td>AL</td>\n", - " <td>18363.0</td>\n", + " <td>18642.0</td>\n", " <td>205160.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>1856.0</td>\n", + " <td>1879.0</td>\n", " <td>NaN</td>\n", " <td>591.0</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", - " <td>1856.0</td>\n", - " <td>223523</td>\n", - " <td>223523</td>\n", - " <td>223523</td>\n", - " <td>1</td>\n", - " <td>15.0</td>\n", - " <td>12.0</td>\n", - " <td>5510.0</td>\n", - " <td>460.0</td>\n", - " <td>5970.0</td>\n", + " <td>223802</td>\n", + " <td>5</td>\n", + " <td>23</td>\n", + " <td>e925d317dd6bba774728abaf5db247f3c5983394</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td></td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>2 rows × 27 columns</p>\n", + "<p>2 rows × 35 columns</p>\n", "</div>" ], "text/plain": [ " date state positive negative pending hospitalizedCurrently \\\n", - "0 20200601 AK 467.0 53723.0 NaN 10.0 \n", - "1 20200601 AL 18363.0 205160.0 NaN NaN \n", + "0 20200602 AK 487.0 55716.0 NaN 10.0 \n", + "1 20200602 AL 18642.0 205160.0 NaN NaN \n", "\n", " hospitalizedCumulative inIcuCurrently inIcuCumulative \\\n", "0 NaN NaN NaN \n", - "1 1856.0 NaN 591.0 \n", + "1 1879.0 NaN 591.0 \n", "\n", - " onVentilatorCurrently ... hospitalized total totalTestResults posNeg \\\n", - "0 1.0 ... NaN 54190 54190 54190 \n", - "1 NaN ... 1856.0 223523 223523 223523 \n", + " onVentilatorCurrently ... posNeg deathIncrease hospitalizedIncrease \\\n", + "0 0.0 ... 56203 0 0 \n", + "1 NaN ... 223802 5 23 \n", "\n", - " fips deathIncrease hospitalizedIncrease negativeIncrease \\\n", - "0 2 0.0 0.0 2462.0 \n", - "1 1 15.0 12.0 5510.0 \n", + " hash commercialScore \\\n", + "0 8bfc576f3ce0c5438919ab2a672255d0630714ec 0 \n", + "1 e925d317dd6bba774728abaf5db247f3c5983394 0 \n", "\n", - " positiveIncrease totalTestResultsIncrease \n", - "0 33.0 2495.0 \n", - "1 460.0 5970.0 \n", + " negativeRegularScore negativeScore positiveScore score grade \n", + "0 0 0 0 0 \n", + "1 0 0 0 0 \n", "\n", - "[2 rows x 27 columns]" + "[2 rows x 35 columns]" ] }, "execution_count": 9, @@ -474,18 +470,18 @@ "version": "3.7.3" }, "papermill": { - "duration": 8.164183, - "end_time": "2020-06-02T08:11:16.160309", + "duration": 13.234479, + "end_time": "2020-06-03T15:45:17.775676", "environment_variables": {}, "exception": null, - "input_path": "/tmp/_29it5g5/notebooks/process/download-covidtracking-data.ipynb", + "input_path": "/tmp/eqrts4dp/notebooks/process/download-covidtracking-data.ipynb", "output_path": "runs/download-covidtracking-data.runs.ipynb", "parameters": { - "PAPERMILL_INPUT_PATH": "/tmp/_29it5g5/notebooks/process/download-covidtracking-data.ipynb", + "PAPERMILL_INPUT_PATH": "/tmp/eqrts4dp/notebooks/process/download-covidtracking-data.ipynb", "PAPERMILL_OUTPUT_PATH": "runs/download-covidtracking-data.runs.ipynb", "out_folder": "data/covidtracking" }, - "start_time": "2020-06-02T08:11:07.996126", + "start_time": "2020-06-03T15:45:04.541197", "version": "1.1.0" } }, -- GitLab