From ccc37edffe6d9ca8ea4b8a229d1d20c42bc4b658 Mon Sep 17 00:00:00 2001 From: Chandrasekhar Ramakrishnan <cramakri@ethz.ch> Date: Thu, 26 Mar 2020 12:38:34 +0000 Subject: [PATCH] renku run papermill -p out_folder ./data/covidtracking/ --inject-paths notebooks/process/download-covidtracking-data.ipynb runs/download-covidtracking-data.runs.ipynb --- ...560c41a54f5aa307ce5f3c5effe5_papermill.cwl | 70 ++++++++ data/covidtracking/states-daily.json | 4 +- data/covidtracking/states-metadata.json | 4 +- runs/download-covidtracking-data.runs.ipynb | 157 +++++++++++------- 4 files changed, 169 insertions(+), 66 deletions(-) create mode 100644 .renku/workflow/a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl diff --git a/.renku/workflow/a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl b/.renku/workflow/a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl new file mode 100644 index 00000000..8ab0a3b5 --- /dev/null +++ b/.renku/workflow/a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl @@ -0,0 +1,70 @@ +arguments: [] +baseCommand: +- papermill +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: out_folder + inputBinding: + position: 1 + prefix: -p + separate: true + shellQuote: true + streamable: false + type: string + input_2: + default: data/covidtracking + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: string + input_3: + default: + class: File + path: ../../notebooks/process/download-covidtracking-data.ipynb + inputBinding: + position: 3 + prefix: --inject-paths + separate: true + shellQuote: true + streamable: false + type: File + input_4: + default: runs/download-covidtracking-data.runs.ipynb + inputBinding: + position: 4 + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_4) + streamable: false + type: File + output_1: + outputBinding: + glob: $(inputs.input_2) + streamable: false + type: Directory +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: runs + writable: true + - entry: '$({"listing": [], "class": "Directory"})' + entryname: data/covidtracking + writable: true + - entry: $(inputs.input_3) + entryname: notebooks/process/download-covidtracking-data.ipynb + writable: false +successCodes: [] +temporaryFailCodes: [] diff --git a/data/covidtracking/states-daily.json b/data/covidtracking/states-daily.json index ee2b6bc8..de5cd65d 100644 --- a/data/covidtracking/states-daily.json +++ b/data/covidtracking/states-daily.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b0927c74f3dbb27dacfddb47b00d2c543f8ec932625dc1502b3d048fcc2668e -size 160445 +oid sha256:a6103d449cc2fd40e6dcdb9b4778817576adfde055d94c1152a5e73bda6a3e98 +size 322323 diff --git a/data/covidtracking/states-metadata.json b/data/covidtracking/states-metadata.json index 08dcd867..a7a99acd 100644 --- a/data/covidtracking/states-metadata.json +++ b/data/covidtracking/states-metadata.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02330f655200284fbdaef1b3d4272cc84385c648ce7c3b73d910a0cacc3fa529 -size 25509 +oid sha256:12f495f7b6df119f781bdada37c7e80646b43458ae3b595ccbb83878980a6640 +size 27032 diff --git a/runs/download-covidtracking-data.runs.ipynb b/runs/download-covidtracking-data.runs.ipynb index 5c6e420d..c1ef0273 100644 --- a/runs/download-covidtracking-data.runs.ipynb +++ b/runs/download-covidtracking-data.runs.ipynb @@ -5,10 +5,10 @@ "execution_count": 1, "metadata": { "papermill": { - "duration": 0.564716, - "end_time": "2020-03-25T08:22:56.714459", + "duration": 0.506363, + "end_time": "2020-03-26T12:38:33.422061", "exception": false, - "start_time": "2020-03-25T08:22:56.149743", + "start_time": "2020-03-26T12:38:32.915698", "status": "completed" }, "tags": [] @@ -25,10 +25,10 @@ "execution_count": 2, "metadata": { "papermill": { - "duration": 0.019507, - "end_time": "2020-03-25T08:22:56.745116", + "duration": 0.020958, + "end_time": "2020-03-26T12:38:33.458041", "exception": false, - "start_time": "2020-03-25T08:22:56.725609", + "start_time": "2020-03-26T12:38:33.437083", "status": "completed" }, "tags": [ @@ -46,10 +46,10 @@ "execution_count": 3, "metadata": { "papermill": { - "duration": 0.018572, - "end_time": "2020-03-25T08:22:56.773076", + "duration": 0.023542, + "end_time": "2020-03-26T12:38:33.490779", "exception": false, - "start_time": "2020-03-25T08:22:56.754504", + "start_time": "2020-03-26T12:38:33.467237", "status": "completed" }, "tags": [ @@ -59,19 +59,19 @@ "outputs": [], "source": [ "# Parameters\n", - "PAPERMILL_INPUT_PATH = \"/tmp/e18fw9c9/notebooks/process/download-covidtracking-data.ipynb\"\n", + "PAPERMILL_INPUT_PATH = \"notebooks/process/download-covidtracking-data.ipynb\"\n", "PAPERMILL_OUTPUT_PATH = \"runs/download-covidtracking-data.runs.ipynb\"\n", - "out_folder = \"data/covidtracking\"\n" + "out_folder = \"./data/covidtracking/\"\n" ] }, { "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.007795, - "end_time": "2020-03-25T08:22:56.790849", + "duration": 0.007637, + "end_time": "2020-03-26T12:38:33.508933", "exception": false, - "start_time": "2020-03-25T08:22:56.783054", + "start_time": "2020-03-26T12:38:33.501296", "status": "completed" }, "tags": [] @@ -87,10 +87,10 @@ "execution_count": 4, "metadata": { "papermill": { - "duration": 1.020789, - "end_time": "2020-03-25T08:22:57.818863", + "duration": 0.211295, + "end_time": "2020-03-26T12:38:33.726495", "exception": false, - "start_time": "2020-03-25T08:22:56.798074", + "start_time": "2020-03-26T12:38:33.515200", "status": "completed" }, "tags": [] @@ -107,10 +107,10 @@ "execution_count": 5, "metadata": { "papermill": { - "duration": 0.022913, - "end_time": "2020-03-25T08:22:57.852810", + "duration": 0.025576, + "end_time": "2020-03-26T12:38:33.768948", "exception": false, - "start_time": "2020-03-25T08:22:57.829897", + "start_time": "2020-03-26T12:38:33.743372", "status": "completed" }, "tags": [] @@ -129,10 +129,10 @@ "execution_count": 6, "metadata": { "papermill": { - "duration": 0.066514, - "end_time": "2020-03-25T08:22:57.927763", + "duration": 0.073344, + "end_time": "2020-03-26T12:38:33.849972", "exception": false, - "start_time": "2020-03-25T08:22:57.861249", + "start_time": "2020-03-26T12:38:33.776628", "status": "completed" }, "tags": [] @@ -174,6 +174,7 @@ " <th>pui</th>\n", " <th>pum</th>\n", " <th>notes</th>\n", + " <th>fips</th>\n", " <th>name</th>\n", " </tr>\n", " </thead>\n", @@ -188,6 +189,7 @@ " <td>All data</td>\n", " <td>False</td>\n", " <td>We count the reported number as \"persons teste...</td>\n", + " <td>2</td>\n", " <td>Alaska</td>\n", " </tr>\n", " <tr>\n", @@ -200,6 +202,7 @@ " <td>No data</td>\n", " <td>False</td>\n", " <td>Last update time taken from [main page](http:/...</td>\n", + " <td>1</td>\n", " <td>Alabama</td>\n", " </tr>\n", " </tbody>\n", @@ -219,9 +222,13 @@ "0 http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-... @Alaska_DHSS \n", "1 None @alpublichealth \n", "\n", - " pui pum notes name \n", - "0 All data False We count the reported number as \"persons teste... Alaska \n", - "1 No data False Last update time taken from [main page](http:/... Alabama " + " pui pum notes fips \\\n", + "0 All data False We count the reported number as \"persons teste... 2 \n", + "1 No data False Last update time taken from [main page](http:/... 1 \n", + "\n", + " name \n", + "0 Alaska \n", + "1 Alabama " ] }, "execution_count": 6, @@ -239,10 +246,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.008089, - "end_time": "2020-03-25T08:22:57.947587", + "duration": 0.007572, + "end_time": "2020-03-26T12:38:33.873621", "exception": false, - "start_time": "2020-03-25T08:22:57.939498", + "start_time": "2020-03-26T12:38:33.866049", "status": "completed" }, "tags": [] @@ -256,10 +263,10 @@ "execution_count": 7, "metadata": { "papermill": { - "duration": 0.184993, - "end_time": "2020-03-25T08:22:58.140940", + "duration": 0.178588, + "end_time": "2020-03-26T12:38:34.058549", "exception": false, - "start_time": "2020-03-25T08:22:57.955947", + "start_time": "2020-03-26T12:38:33.879961", "status": "completed" }, "tags": [] @@ -276,10 +283,10 @@ "execution_count": 8, "metadata": { "papermill": { - "duration": 0.022977, - "end_time": "2020-03-25T08:22:58.177014", + "duration": 0.027495, + "end_time": "2020-03-26T12:38:34.102339", "exception": false, - "start_time": "2020-03-25T08:22:58.154037", + "start_time": "2020-03-26T12:38:34.074844", "status": "completed" }, "tags": [] @@ -298,10 +305,10 @@ "execution_count": 9, "metadata": { "papermill": { - "duration": 0.060602, - "end_time": "2020-03-25T08:22:58.251228", + "duration": 0.076898, + "end_time": "2020-03-26T12:38:34.188602", "exception": false, - "start_time": "2020-03-25T08:22:58.190626", + "start_time": "2020-03-26T12:38:34.111704", "status": "completed" }, "tags": [] @@ -311,7 +318,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "1037 data points\n" + "1093 data points\n" ] }, { @@ -344,32 +351,50 @@ " <th>death</th>\n", " <th>total</th>\n", " <th>dateChecked</th>\n", + " <th>totalTestResults</th>\n", + " <th>deathIncrease</th>\n", + " <th>hospitalizedIncrease</th>\n", + " <th>negativeIncrease</th>\n", + " <th>positiveIncrease</th>\n", + " <th>totalTestResultsIncrease</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>20200324</td>\n", + " <td>20200325</td>\n", " <td>AK</td>\n", - " <td>36.0</td>\n", - " <td>986.0</td>\n", - " <td>NaN</td>\n", - " <td>0.0</td>\n", + " <td>42.0</td>\n", + " <td>1649.0</td>\n", " <td>NaN</td>\n", - " <td>1022</td>\n", - " <td>2020-03-24T20:00:00Z</td>\n", + " <td>1.0</td>\n", + " <td>1.0</td>\n", + " <td>1691</td>\n", + " <td>2020-03-25T20:00:00Z</td>\n", + " <td>1691</td>\n", + " <td>1.0</td>\n", + " <td>1.0</td>\n", + " <td>663.0</td>\n", + " <td>6.0</td>\n", + " <td>669.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>20200324</td>\n", + " <td>20200325</td>\n", " <td>AL</td>\n", - " <td>215.0</td>\n", - " <td>2106.0</td>\n", + " <td>283.0</td>\n", + " <td>2529.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0.0</td>\n", - " <td>2321</td>\n", - " <td>2020-03-24T20:00:00Z</td>\n", + " <td>2812</td>\n", + " <td>2020-03-25T20:00:00Z</td>\n", + " <td>2812</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>423.0</td>\n", + " <td>68.0</td>\n", + " <td>491.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -377,12 +402,20 @@ ], "text/plain": [ " date state positive negative pending hospitalized death total \\\n", - "0 20200324 AK 36.0 986.0 NaN 0.0 NaN 1022 \n", - "1 20200324 AL 215.0 2106.0 NaN NaN 0.0 2321 \n", + "0 20200325 AK 42.0 1649.0 NaN 1.0 1.0 1691 \n", + "1 20200325 AL 283.0 2529.0 NaN NaN 0.0 2812 \n", + "\n", + " dateChecked totalTestResults deathIncrease \\\n", + "0 2020-03-25T20:00:00Z 1691 1.0 \n", + "1 2020-03-25T20:00:00Z 2812 0.0 \n", + "\n", + " hospitalizedIncrease negativeIncrease positiveIncrease \\\n", + "0 1.0 663.0 6.0 \n", + "1 0.0 423.0 68.0 \n", "\n", - " dateChecked \n", - "0 2020-03-24T20:00:00Z \n", - "1 2020-03-24T20:00:00Z " + " totalTestResultsIncrease \n", + "0 669.0 \n", + "1 491.0 " ] }, "execution_count": 9, @@ -416,18 +449,18 @@ "version": "3.7.3" }, "papermill": { - "duration": 3.460914, - "end_time": "2020-03-25T08:22:58.575452", + "duration": 2.475212, + "end_time": "2020-03-26T12:38:34.517904", "environment_variables": {}, "exception": null, - "input_path": "/tmp/e18fw9c9/notebooks/process/download-covidtracking-data.ipynb", + "input_path": "notebooks/process/download-covidtracking-data.ipynb", "output_path": "runs/download-covidtracking-data.runs.ipynb", "parameters": { - "PAPERMILL_INPUT_PATH": "/tmp/e18fw9c9/notebooks/process/download-covidtracking-data.ipynb", + "PAPERMILL_INPUT_PATH": "notebooks/process/download-covidtracking-data.ipynb", "PAPERMILL_OUTPUT_PATH": "runs/download-covidtracking-data.runs.ipynb", - "out_folder": "data/covidtracking" + "out_folder": "./data/covidtracking/" }, - "start_time": "2020-03-25T08:22:55.114538", + "start_time": "2020-03-26T12:38:32.042692", "version": "1.1.0" } }, -- GitLab