From cef673ed4b7cb495ce0244a0c88a030c8eb26a1c Mon Sep 17 00:00:00 2001 From: "CR (covid cron)" <cramakri+covid-cron@ethz.ch> Date: Mon, 4 May 2020 07:26:30 +0000 Subject: [PATCH] renku rerun data/covidtracking/states-metadata.json data/covidtracking/states-daily.json --- .../eafea58b25074956a49ec30e7eacf69c.cwl | 120 ++++++++++++++ data/covidtracking/states-daily.json | 4 +- data/covidtracking/states-metadata.json | 4 +- runs/download-covidtracking-data.runs.ipynb | 150 +++++++++--------- 4 files changed, 199 insertions(+), 79 deletions(-) create mode 100644 .renku/workflow/eafea58b25074956a49ec30e7eacf69c.cwl diff --git a/.renku/workflow/eafea58b25074956a49ec30e7eacf69c.cwl b/.renku/workflow/eafea58b25074956a49ec30e7eacf69c.cwl new file mode 100644 index 000000000..6d230d792 --- /dev/null +++ b/.renku/workflow/eafea58b25074956a49ec30e7eacf69c.cwl @@ -0,0 +1,120 @@ +class: Workflow +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: states-daily.json + streamable: false + type: string + input_2: + default: out_folder + streamable: false + type: string + input_3: + default: data/covidtracking + streamable: false + type: string + input_4: + default: + class: File + path: ../../notebooks/process/download-covidtracking-data.ipynb + streamable: false + type: File + input_5: + default: runs/download-covidtracking-data.runs.ipynb + streamable: false + type: string + input_6: + default: states-metadata.json + streamable: false + type: string +outputs: + output_0: + outputSource: step_2/output_1 + streamable: false + type: Directory + output_3: + outputSource: step_2/output_0 + streamable: false + type: File +requirements: [] +steps: + step_1: + in: + filename: input_1 + input_directory: step_2/output_1 + out: + - output_file + run: + arguments: [] + baseCommand: + - 'true' + class: CommandLineTool + cwlVersion: v1.0 + hints: [] + inputs: + filename: + default: states-daily.json + streamable: false + type: string + input_directory: + streamable: false + type: Directory + outputs: + output_file: + outputBinding: + glob: $(inputs.filename) + streamable: false + type: File + permanentFailCodes: [] + requirements: + - &id001 + class: InlineJavascriptRequirement + - &id002 + class: InitialWorkDirRequirement + listing: $(inputs.input_directory.listing) + successCodes: [] + temporaryFailCodes: [] + step_2: + in: + input_1: input_2 + input_2: input_3 + input_3: input_4 + input_4: input_5 + out: + - output_0 + - output_1 + run: a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl + step_3: + in: + filename: input_6 + input_directory: step_2/output_1 + out: + - output_file + run: + arguments: [] + baseCommand: + - 'true' + class: CommandLineTool + cwlVersion: v1.0 + hints: [] + inputs: + filename: + default: states-metadata.json + streamable: false + type: string + input_directory: + streamable: false + type: Directory + outputs: + output_file: + outputBinding: + glob: $(inputs.filename) + streamable: false + type: File + permanentFailCodes: [] + requirements: + - *id001 + - *id002 + successCodes: [] + temporaryFailCodes: [] diff --git a/data/covidtracking/states-daily.json b/data/covidtracking/states-daily.json index 97be60ccc..57b07c246 100644 --- a/data/covidtracking/states-daily.json +++ b/data/covidtracking/states-daily.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e1102ef9267f340ae62b93f940708d6297fdaaec5313adf7cba0794771a9ad8 -size 1593641 +oid sha256:b7c90c75dbb62ebcfa4019f6698557c4acdc66f60dd38743de6e07532890d85b +size 1688402 diff --git a/data/covidtracking/states-metadata.json b/data/covidtracking/states-metadata.json index 41e316a87..94dff9b27 100644 --- a/data/covidtracking/states-metadata.json +++ b/data/covidtracking/states-metadata.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:31d29c16ffd9d9633328bfb356d54d905ae32d0466881e688c59fb621478315c -size 30002 +oid sha256:fe943178a763c70ed1e5c40b7fb427085e3dec0685d0a62d58c8900cc59ee866 +size 30004 diff --git a/runs/download-covidtracking-data.runs.ipynb b/runs/download-covidtracking-data.runs.ipynb index 01a050db1..40745aac0 100644 --- a/runs/download-covidtracking-data.runs.ipynb +++ b/runs/download-covidtracking-data.runs.ipynb @@ -5,10 +5,10 @@ "execution_count": 1, "metadata": { "papermill": { - "duration": 4.072813, - "end_time": "2020-05-01T07:25:46.025680", + "duration": 4.393669, + "end_time": "2020-05-04T07:26:24.914679", "exception": false, - "start_time": "2020-05-01T07:25:41.952867", + "start_time": "2020-05-04T07:26:20.521010", "status": "completed" }, "tags": [] @@ -25,10 +25,10 @@ "execution_count": 2, "metadata": { "papermill": { - "duration": 0.020829, - "end_time": "2020-05-01T07:25:46.057658", + "duration": 0.022004, + "end_time": "2020-05-04T07:26:24.950371", "exception": false, - "start_time": "2020-05-01T07:25:46.036829", + "start_time": "2020-05-04T07:26:24.928367", "status": "completed" }, "tags": [ @@ -46,10 +46,10 @@ "execution_count": 3, "metadata": { "papermill": { - "duration": 0.020129, - "end_time": "2020-05-01T07:25:46.087901", + "duration": 0.023206, + "end_time": "2020-05-04T07:26:24.987502", "exception": false, - "start_time": "2020-05-01T07:25:46.067772", + "start_time": "2020-05-04T07:26:24.964296", "status": "completed" }, "tags": [ @@ -59,7 +59,7 @@ "outputs": [], "source": [ "# Parameters\n", - "PAPERMILL_INPUT_PATH = \"/tmp/3jpq_9ab/notebooks/process/download-covidtracking-data.ipynb\"\n", + "PAPERMILL_INPUT_PATH = \"/tmp/096zx1nj/notebooks/process/download-covidtracking-data.ipynb\"\n", "PAPERMILL_OUTPUT_PATH = \"runs/download-covidtracking-data.runs.ipynb\"\n", "out_folder = \"data/covidtracking\"\n" ] @@ -68,10 +68,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.00855, - "end_time": "2020-05-01T07:25:46.106414", + "duration": 0.007349, + "end_time": "2020-05-04T07:26:25.005845", "exception": false, - "start_time": "2020-05-01T07:25:46.097864", + "start_time": "2020-05-04T07:26:24.998496", "status": "completed" }, "tags": [] @@ -87,10 +87,10 @@ "execution_count": 4, "metadata": { "papermill": { - "duration": 2.129772, - "end_time": "2020-05-01T07:25:48.243538", + "duration": 0.746032, + "end_time": "2020-05-04T07:26:25.758556", "exception": false, - "start_time": "2020-05-01T07:25:46.113766", + "start_time": "2020-05-04T07:26:25.012524", "status": "completed" }, "tags": [] @@ -107,10 +107,10 @@ "execution_count": 5, "metadata": { "papermill": { - "duration": 0.020541, - "end_time": "2020-05-01T07:25:48.277610", + "duration": 0.02621, + "end_time": "2020-05-04T07:26:25.798336", "exception": false, - "start_time": "2020-05-01T07:25:48.257069", + "start_time": "2020-05-04T07:26:25.772126", "status": "completed" }, "tags": [] @@ -129,10 +129,10 @@ "execution_count": 6, "metadata": { "papermill": { - "duration": 0.078951, - "end_time": "2020-05-01T07:25:48.366486", + "duration": 0.095572, + "end_time": "2020-05-04T07:26:25.902478", "exception": false, - "start_time": "2020-05-01T07:25:48.287535", + "start_time": "2020-05-04T07:26:25.806906", "status": "completed" }, "tags": [] @@ -197,7 +197,7 @@ " <td>AL</td>\n", " <td>http://www.alabamapublichealth.gov/infectiousd...</td>\n", " <td>https://alpublichealth.maps.arcgis.com/apps/op...</td>\n", - " <td>None</td>\n", + " <td>https://dph1.adph.state.al.us/covid-19/</td>\n", " <td>@alpublichealth</td>\n", " <td>No data</td>\n", " <td>False</td>\n", @@ -220,7 +220,7 @@ "\n", " covid19SiteSecondary twitter \\\n", "0 http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-... @Alaska_DHSS \n", - "1 None @alpublichealth \n", + "1 https://dph1.adph.state.al.us/covid-19/ @alpublichealth \n", "\n", " pui pum notes fips \\\n", "0 All data False Total tests are taken from the annotations on ... 2 \n", @@ -246,10 +246,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.010968, - "end_time": "2020-05-01T07:25:48.389022", + "duration": 0.012483, + "end_time": "2020-05-04T07:26:25.929200", "exception": false, - "start_time": "2020-05-01T07:25:48.378054", + "start_time": "2020-05-04T07:26:25.916717", "status": "completed" }, "tags": [] @@ -263,10 +263,10 @@ "execution_count": 7, "metadata": { "papermill": { - "duration": 0.169674, - "end_time": "2020-05-01T07:25:48.566917", + "duration": 3.183963, + "end_time": "2020-05-04T07:26:29.129690", "exception": false, - "start_time": "2020-05-01T07:25:48.397243", + "start_time": "2020-05-04T07:26:25.945727", "status": "completed" }, "tags": [] @@ -283,10 +283,10 @@ "execution_count": 8, "metadata": { "papermill": { - "duration": 0.028052, - "end_time": "2020-05-01T07:25:48.606888", + "duration": 0.032787, + "end_time": "2020-05-04T07:26:29.180183", "exception": false, - "start_time": "2020-05-01T07:25:48.578836", + "start_time": "2020-05-04T07:26:29.147396", "status": "completed" }, "tags": [] @@ -305,10 +305,10 @@ "execution_count": 9, "metadata": { "papermill": { - "duration": 0.136337, - "end_time": "2020-05-01T07:25:48.759085", + "duration": 0.124053, + "end_time": "2020-05-04T07:26:29.315529", "exception": false, - "start_time": "2020-05-01T07:25:48.622748", + "start_time": "2020-05-04T07:26:29.191476", "status": "completed" }, "tags": [] @@ -318,7 +318,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "3153 data points\n" + "3321 data points\n" ] }, { @@ -368,51 +368,51 @@ " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>20200430</td>\n", + " <td>20200503</td>\n", " <td>AK</td>\n", - " <td>355.0</td>\n", - " <td>18764.0</td>\n", + " <td>368.0</td>\n", + " <td>21210.0</td>\n", " <td>NaN</td>\n", - " <td>19.0</td>\n", + " <td>12.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", - " <td>19119.0</td>\n", - " <td>19119.0</td>\n", - " <td>19119.0</td>\n", + " <td>21578.0</td>\n", + " <td>21578.0</td>\n", + " <td>21578.0</td>\n", " <td>2</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", + " <td>176.0</td>\n", + " <td>3.0</td>\n", + " <td>179.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>20200430</td>\n", + " <td>20200503</td>\n", " <td>AL</td>\n", - " <td>7019.0</td>\n", - " <td>80177.0</td>\n", + " <td>7725.0</td>\n", + " <td>84775.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>978.0</td>\n", + " <td>1035.0</td>\n", " <td>NaN</td>\n", - " <td>335.0</td>\n", + " <td>403.0</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", - " <td>978.0</td>\n", - " <td>87196.0</td>\n", - " <td>87196.0</td>\n", - " <td>87196.0</td>\n", + " <td>1035.0</td>\n", + " <td>92500.0</td>\n", + " <td>92500.0</td>\n", + " <td>92500.0</td>\n", " <td>1</td>\n", - " <td>24.0</td>\n", - " <td>33.0</td>\n", - " <td>6570.0</td>\n", - " <td>177.0</td>\n", - " <td>6747.0</td>\n", + " <td>2.0</td>\n", + " <td>12.0</td>\n", + " <td>0.0</td>\n", + " <td>291.0</td>\n", + " <td>291.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -421,24 +421,24 @@ ], "text/plain": [ " date state positive negative pending hospitalizedCurrently \\\n", - "0 20200430 AK 355.0 18764.0 NaN 19.0 \n", - "1 20200430 AL 7019.0 80177.0 NaN NaN \n", + "0 20200503 AK 368.0 21210.0 NaN 12.0 \n", + "1 20200503 AL 7725.0 84775.0 NaN NaN \n", "\n", " hospitalizedCumulative inIcuCurrently inIcuCumulative \\\n", "0 NaN NaN NaN \n", - "1 978.0 NaN 335.0 \n", + "1 1035.0 NaN 403.0 \n", "\n", " onVentilatorCurrently ... hospitalized total totalTestResults \\\n", - "0 NaN ... NaN 19119.0 19119.0 \n", - "1 NaN ... 978.0 87196.0 87196.0 \n", + "0 NaN ... NaN 21578.0 21578.0 \n", + "1 NaN ... 1035.0 92500.0 92500.0 \n", "\n", " posNeg fips deathIncrease hospitalizedIncrease negativeIncrease \\\n", - "0 19119.0 2 0.0 0.0 0.0 \n", - "1 87196.0 1 24.0 33.0 6570.0 \n", + "0 21578.0 2 0.0 0.0 176.0 \n", + "1 92500.0 1 2.0 12.0 0.0 \n", "\n", " positiveIncrease totalTestResultsIncrease \n", - "0 0.0 0.0 \n", - "1 177.0 6747.0 \n", + "0 3.0 179.0 \n", + "1 291.0 291.0 \n", "\n", "[2 rows x 25 columns]" ] @@ -474,18 +474,18 @@ "version": "3.7.3" }, "papermill": { - "duration": 9.063913, - "end_time": "2020-05-01T07:25:49.180476", + "duration": 11.122625, + "end_time": "2020-05-04T07:26:29.643520", "environment_variables": {}, "exception": null, - "input_path": "/tmp/3jpq_9ab/notebooks/process/download-covidtracking-data.ipynb", + "input_path": "/tmp/096zx1nj/notebooks/process/download-covidtracking-data.ipynb", "output_path": "runs/download-covidtracking-data.runs.ipynb", "parameters": { - "PAPERMILL_INPUT_PATH": "/tmp/3jpq_9ab/notebooks/process/download-covidtracking-data.ipynb", + "PAPERMILL_INPUT_PATH": "/tmp/096zx1nj/notebooks/process/download-covidtracking-data.ipynb", "PAPERMILL_OUTPUT_PATH": "runs/download-covidtracking-data.runs.ipynb", "out_folder": "data/covidtracking" }, - "start_time": "2020-05-01T07:25:40.116563", + "start_time": "2020-05-04T07:26:18.520895", "version": "1.1.0" } }, -- GitLab