From c41914b8c268fefbe02a83efeca6383b869e6b7c Mon Sep 17 00:00:00 2001 From: "CR (covid cron)" <cramakri+covid-cron@ethz.ch> Date: Sat, 28 Mar 2020 08:23:24 +0000 Subject: [PATCH] renku rerun data/covidtracking/states-metadata.json data/covidtracking/states-daily.json --- .../f978984fbc6e4820a1ab25a108a4040a.cwl | 120 ++++++++++++++ data/covidtracking/states-daily.json | 4 +- data/covidtracking/states-metadata.json | 4 +- runs/download-covidtracking-data.runs.ipynb | 156 +++++++++--------- 4 files changed, 205 insertions(+), 79 deletions(-) create mode 100644 .renku/workflow/f978984fbc6e4820a1ab25a108a4040a.cwl diff --git a/.renku/workflow/f978984fbc6e4820a1ab25a108a4040a.cwl b/.renku/workflow/f978984fbc6e4820a1ab25a108a4040a.cwl new file mode 100644 index 0000000..a70cab1 --- /dev/null +++ b/.renku/workflow/f978984fbc6e4820a1ab25a108a4040a.cwl @@ -0,0 +1,120 @@ +class: Workflow +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: out_folder + streamable: false + type: string + input_2: + default: data/covidtracking + streamable: false + type: string + input_3: + default: + class: File + path: ../../notebooks/process/download-covidtracking-data.ipynb + streamable: false + type: File + input_4: + default: runs/download-covidtracking-data.runs.ipynb + streamable: false + type: string + input_5: + default: states-metadata.json + streamable: false + type: string + input_6: + default: states-daily.json + streamable: false + type: string +outputs: + output_1: + outputSource: step_1/output_1 + streamable: false + type: Directory + output_2: + outputSource: step_1/output_0 + streamable: false + type: File +requirements: [] +steps: + step_1: + in: + input_1: input_1 + input_2: input_2 + input_3: input_3 + input_4: input_4 + out: + - output_1 + - output_0 + run: a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl + step_2: + in: + filename: input_5 + input_directory: step_1/output_1 + out: + - output_file + run: + arguments: [] + baseCommand: + - 'true' + class: CommandLineTool + cwlVersion: v1.0 + hints: [] + inputs: + filename: + default: states-metadata.json + streamable: false + type: string + input_directory: + streamable: false + type: Directory + outputs: + output_file: + outputBinding: + glob: $(inputs.filename) + streamable: false + type: File + permanentFailCodes: [] + requirements: + - &id001 + class: InlineJavascriptRequirement + - &id002 + class: InitialWorkDirRequirement + listing: $(inputs.input_directory.listing) + successCodes: [] + temporaryFailCodes: [] + step_3: + in: + filename: input_6 + input_directory: step_1/output_1 + out: + - output_file + run: + arguments: [] + baseCommand: + - 'true' + class: CommandLineTool + cwlVersion: v1.0 + hints: [] + inputs: + filename: + default: states-daily.json + streamable: false + type: string + input_directory: + streamable: false + type: Directory + outputs: + output_file: + outputBinding: + glob: $(inputs.filename) + streamable: false + type: File + permanentFailCodes: [] + requirements: + - *id001 + - *id002 + successCodes: [] + temporaryFailCodes: [] diff --git a/data/covidtracking/states-daily.json b/data/covidtracking/states-daily.json index 0a4b0ae..5163fa6 100644 --- a/data/covidtracking/states-daily.json +++ b/data/covidtracking/states-daily.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3d03480d703438efd09b64766bd30ad384a4d3056c5e923be15301c00853f5ee -size 339161 +oid sha256:49daa3567fbddb38f172831d9a251fdf13847222e00514f939f8b237a65b05cd +size 430713 diff --git a/data/covidtracking/states-metadata.json b/data/covidtracking/states-metadata.json index 92063a0..d9f3ca9 100644 --- a/data/covidtracking/states-metadata.json +++ b/data/covidtracking/states-metadata.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43ff3cd676e580173075c19bca6629c8187291a2d23ef9d435df634c2555e1df -size 27040 +oid sha256:5d977ad0a4ed8f67000b0489996ac94d77fbe00371c702ee38c7696776a4e68c +size 27032 diff --git a/runs/download-covidtracking-data.runs.ipynb b/runs/download-covidtracking-data.runs.ipynb index a3c1fb2..5b8ae1d 100644 --- a/runs/download-covidtracking-data.runs.ipynb +++ b/runs/download-covidtracking-data.runs.ipynb @@ -5,10 +5,10 @@ "execution_count": 1, "metadata": { "papermill": { - "duration": 3.733962, - "end_time": "2020-03-27T08:22:23.632192", + "duration": 0.510563, + "end_time": "2020-03-28T08:23:22.828576", "exception": false, - "start_time": "2020-03-27T08:22:19.898230", + "start_time": "2020-03-28T08:23:22.318013", "status": "completed" }, "tags": [] @@ -25,10 +25,10 @@ "execution_count": 2, "metadata": { "papermill": { - "duration": 0.021455, - "end_time": "2020-03-27T08:22:23.668176", + "duration": 0.016784, + "end_time": "2020-03-28T08:23:22.855238", "exception": false, - "start_time": "2020-03-27T08:22:23.646721", + "start_time": "2020-03-28T08:23:22.838454", "status": "completed" }, "tags": [ @@ -46,10 +46,10 @@ "execution_count": 3, "metadata": { "papermill": { - "duration": 0.020398, - "end_time": "2020-03-27T08:22:23.698819", + "duration": 0.021893, + "end_time": "2020-03-28T08:23:22.884664", "exception": false, - "start_time": "2020-03-27T08:22:23.678421", + "start_time": "2020-03-28T08:23:22.862771", "status": "completed" }, "tags": [ @@ -59,7 +59,7 @@ "outputs": [], "source": [ "# Parameters\n", - "PAPERMILL_INPUT_PATH = \"/tmp/cpvikc_3/notebooks/process/download-covidtracking-data.ipynb\"\n", + "PAPERMILL_INPUT_PATH = \"/tmp/lrv5vlu8/notebooks/process/download-covidtracking-data.ipynb\"\n", "PAPERMILL_OUTPUT_PATH = \"runs/download-covidtracking-data.runs.ipynb\"\n", "out_folder = \"data/covidtracking\"\n" ] @@ -68,10 +68,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.007642, - "end_time": "2020-03-27T08:22:23.714336", + "duration": 0.006745, + "end_time": "2020-03-28T08:23:22.903069", "exception": false, - "start_time": "2020-03-27T08:22:23.706694", + "start_time": "2020-03-28T08:23:22.896324", "status": "completed" }, "tags": [] @@ -87,10 +87,10 @@ "execution_count": 4, "metadata": { "papermill": { - "duration": 0.42213, - "end_time": "2020-03-27T08:22:24.144107", + "duration": 0.232971, + "end_time": "2020-03-28T08:23:23.143017", "exception": false, - "start_time": "2020-03-27T08:22:23.721977", + "start_time": "2020-03-28T08:23:22.910046", "status": "completed" }, "tags": [] @@ -107,10 +107,10 @@ "execution_count": 5, "metadata": { "papermill": { - "duration": 0.023995, - "end_time": "2020-03-27T08:22:24.182589", + "duration": 0.017333, + "end_time": "2020-03-28T08:23:23.171508", "exception": false, - "start_time": "2020-03-27T08:22:24.158594", + "start_time": "2020-03-28T08:23:23.154175", "status": "completed" }, "tags": [] @@ -129,10 +129,10 @@ "execution_count": 6, "metadata": { "papermill": { - "duration": 0.06725, - "end_time": "2020-03-27T08:22:24.260714", + "duration": 0.063441, + "end_time": "2020-03-28T08:23:23.242653", "exception": false, - "start_time": "2020-03-27T08:22:24.193464", + "start_time": "2020-03-28T08:23:23.179212", "status": "completed" }, "tags": [] @@ -188,7 +188,7 @@ " <td>@Alaska_DHSS</td>\n", " <td>All data</td>\n", " <td>False</td>\n", - " <td>We count the reported number as \"persons teste...</td>\n", + " <td>Total tests are taken from the annotations on ...</td>\n", " <td>2</td>\n", " <td>Alaska</td>\n", " </tr>\n", @@ -223,7 +223,7 @@ "1 None @alpublichealth \n", "\n", " pui pum notes fips \\\n", - "0 All data False We count the reported number as \"persons teste... 2 \n", + "0 All data False Total tests are taken from the annotations on ... 2 \n", "1 No data False Negatives = Totals - Positives. Positives seem... 1 \n", "\n", " name \n", @@ -246,10 +246,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.008786, - "end_time": "2020-03-27T08:22:24.278947", + "duration": 0.007652, + "end_time": "2020-03-28T08:23:23.259216", "exception": false, - "start_time": "2020-03-27T08:22:24.270161", + "start_time": "2020-03-28T08:23:23.251564", "status": "completed" }, "tags": [] @@ -263,10 +263,10 @@ "execution_count": 7, "metadata": { "papermill": { - "duration": 0.167447, - "end_time": "2020-03-27T08:22:24.455064", + "duration": 0.238704, + "end_time": "2020-03-28T08:23:23.505934", "exception": false, - "start_time": "2020-03-27T08:22:24.287617", + "start_time": "2020-03-28T08:23:23.267230", "status": "completed" }, "tags": [] @@ -283,10 +283,10 @@ "execution_count": 8, "metadata": { "papermill": { - "duration": 0.022025, - "end_time": "2020-03-27T08:22:24.490030", + "duration": 0.022386, + "end_time": "2020-03-28T08:23:23.536735", "exception": false, - "start_time": "2020-03-27T08:22:24.468005", + "start_time": "2020-03-28T08:23:23.514349", "status": "completed" }, "tags": [] @@ -305,10 +305,10 @@ "execution_count": 9, "metadata": { "papermill": { - "duration": 0.075398, - "end_time": "2020-03-27T08:22:24.575765", + "duration": 0.074418, + "end_time": "2020-03-28T08:23:23.620961", "exception": false, - "start_time": "2020-03-27T08:22:24.500367", + "start_time": "2020-03-28T08:23:23.546543", "status": "completed" }, "tags": [] @@ -318,7 +318,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "1149 data points\n" + "1205 data points\n" ] }, { @@ -350,8 +350,10 @@ " <th>hospitalized</th>\n", " <th>death</th>\n", " <th>total</th>\n", + " <th>hash</th>\n", " <th>dateChecked</th>\n", " <th>totalTestResults</th>\n", + " <th>fips</th>\n", " <th>deathIncrease</th>\n", " <th>hospitalizedIncrease</th>\n", " <th>negativeIncrease</th>\n", @@ -362,39 +364,43 @@ " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>20200326</td>\n", + " <td>20200327</td>\n", " <td>AK</td>\n", - " <td>59.0</td>\n", - " <td>1801.0</td>\n", - " <td>NaN</td>\n", + " <td>69.0</td>\n", + " <td>2319.0</td>\n", + " <td>13.0</td>\n", " <td>3.0</td>\n", " <td>1.0</td>\n", - " <td>1860</td>\n", - " <td>2020-03-26T20:00:00Z</td>\n", - " <td>1860</td>\n", + " <td>2401</td>\n", + " <td>588f633d59494e4d58466fccfa9628c98568396b</td>\n", + " <td>2020-03-27T20:00:00Z</td>\n", + " <td>2388</td>\n", + " <td>2</td>\n", " <td>0.0</td>\n", - " <td>2.0</td>\n", - " <td>152.0</td>\n", - " <td>17.0</td>\n", - " <td>169.0</td>\n", + " <td>0.0</td>\n", + " <td>518.0</td>\n", + " <td>10.0</td>\n", + " <td>528.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>20200326</td>\n", + " <td>20200327</td>\n", " <td>AL</td>\n", - " <td>506.0</td>\n", - " <td>3593.0</td>\n", + " <td>587.0</td>\n", + " <td>4184.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>1.0</td>\n", - " <td>4099</td>\n", - " <td>2020-03-26T20:00:00Z</td>\n", - " <td>4099</td>\n", - " <td>1.0</td>\n", + " <td>3.0</td>\n", + " <td>4771</td>\n", + " <td>c3883f10c8760fdd9d196c5e2483205434bc4e0f</td>\n", + " <td>2020-03-27T20:00:00Z</td>\n", + " <td>4771</td>\n", + " <td>1</td>\n", + " <td>2.0</td>\n", " <td>0.0</td>\n", - " <td>1064.0</td>\n", - " <td>223.0</td>\n", - " <td>1287.0</td>\n", + " <td>591.0</td>\n", + " <td>81.0</td>\n", + " <td>672.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -402,20 +408,20 @@ ], "text/plain": [ " date state positive negative pending hospitalized death total \\\n", - "0 20200326 AK 59.0 1801.0 NaN 3.0 1.0 1860 \n", - "1 20200326 AL 506.0 3593.0 NaN NaN 1.0 4099 \n", + "0 20200327 AK 69.0 2319.0 13.0 3.0 1.0 2401 \n", + "1 20200327 AL 587.0 4184.0 NaN NaN 3.0 4771 \n", "\n", - " dateChecked totalTestResults deathIncrease \\\n", - "0 2020-03-26T20:00:00Z 1860 0.0 \n", - "1 2020-03-26T20:00:00Z 4099 1.0 \n", + " hash dateChecked \\\n", + "0 588f633d59494e4d58466fccfa9628c98568396b 2020-03-27T20:00:00Z \n", + "1 c3883f10c8760fdd9d196c5e2483205434bc4e0f 2020-03-27T20:00:00Z \n", "\n", - " hospitalizedIncrease negativeIncrease positiveIncrease \\\n", - "0 2.0 152.0 17.0 \n", - "1 0.0 1064.0 223.0 \n", + " totalTestResults fips deathIncrease hospitalizedIncrease \\\n", + "0 2388 2 0.0 0.0 \n", + "1 4771 1 2.0 0.0 \n", "\n", - " totalTestResultsIncrease \n", - "0 169.0 \n", - "1 1287.0 " + " negativeIncrease positiveIncrease totalTestResultsIncrease \n", + "0 518.0 10.0 528.0 \n", + "1 591.0 81.0 672.0 " ] }, "execution_count": 9, @@ -449,18 +455,18 @@ "version": "3.7.3" }, "papermill": { - "duration": 6.40933, - "end_time": "2020-03-27T08:22:24.898756", + "duration": 2.566078, + "end_time": "2020-03-28T08:23:23.945976", "environment_variables": {}, "exception": null, - "input_path": "/tmp/cpvikc_3/notebooks/process/download-covidtracking-data.ipynb", + "input_path": "/tmp/lrv5vlu8/notebooks/process/download-covidtracking-data.ipynb", "output_path": "runs/download-covidtracking-data.runs.ipynb", "parameters": { - "PAPERMILL_INPUT_PATH": "/tmp/cpvikc_3/notebooks/process/download-covidtracking-data.ipynb", + "PAPERMILL_INPUT_PATH": "/tmp/lrv5vlu8/notebooks/process/download-covidtracking-data.ipynb", "PAPERMILL_OUTPUT_PATH": "runs/download-covidtracking-data.runs.ipynb", "out_folder": "data/covidtracking" }, - "start_time": "2020-03-27T08:22:18.489426", + "start_time": "2020-03-28T08:23:21.379898", "version": "1.1.0" } }, -- GitLab