From 53866142e71514d0da6f87cf8f9310e5047dcae2 Mon Sep 17 00:00:00 2001 From: "CR (covid cron)" <cramakri+covid-cron@ethz.ch> Date: Tue, 31 Mar 2020 08:24:36 +0000 Subject: [PATCH] renku rerun data/covidtracking/states-metadata.json data/covidtracking/states-daily.json --- .../264902f3f10248e59668d6b58573c1a9.cwl | 148 ++++++++++++++++++ data/covidtracking/states-daily.json | 4 +- data/covidtracking/states-metadata.json | 4 +- runs/download-covidtracking-data.runs.ipynb | 146 ++++++++--------- 4 files changed, 225 insertions(+), 77 deletions(-) create mode 100644 .renku/workflow/264902f3f10248e59668d6b58573c1a9.cwl diff --git a/.renku/workflow/264902f3f10248e59668d6b58573c1a9.cwl b/.renku/workflow/264902f3f10248e59668d6b58573c1a9.cwl new file mode 100644 index 0000000..75ad11f --- /dev/null +++ b/.renku/workflow/264902f3f10248e59668d6b58573c1a9.cwl @@ -0,0 +1,148 @@ +class: Workflow +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: out_folder + streamable: false + type: string + input_10: + default: runs/download-covidtracking-data.runs.ipynb + streamable: false + type: string + input_2: + default: data/covidtracking + streamable: false + type: string + input_3: + default: + class: File + path: ../../notebooks/process/download-covidtracking-data.ipynb + streamable: false + type: File + input_4: + default: runs/download-covidtracking-data.runs.ipynb + streamable: false + type: string + input_5: + default: states-daily.json + streamable: false + type: string + input_6: + default: states-metadata.json + streamable: false + type: string + input_7: + default: out_folder + streamable: false + type: string + input_8: + default: data/covidtracking + streamable: false + type: string + input_9: + default: + class: File + path: ../../notebooks/process/download-covidtracking-data.ipynb + streamable: false + type: File +outputs: + output_0: + outputSource: step_1/output_0 + streamable: false + type: File + output_1: + outputSource: step_1/output_1 + streamable: false + type: Directory +requirements: [] +steps: + step_1: + in: + input_1: input_1 + input_2: input_2 + input_3: input_3 + input_4: input_4 + out: + - output_0 + - output_1 + run: a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl + step_2: + in: + filename: input_5 + input_directory: step_4/output_1 + out: + - output_file + run: + arguments: [] + baseCommand: + - 'true' + class: CommandLineTool + cwlVersion: v1.0 + hints: [] + inputs: + filename: + default: states-daily.json + streamable: false + type: string + input_directory: + streamable: false + type: Directory + outputs: + output_file: + outputBinding: + glob: $(inputs.filename) + streamable: false + type: File + permanentFailCodes: [] + requirements: + - &id001 + class: InlineJavascriptRequirement + - &id002 + class: InitialWorkDirRequirement + listing: $(inputs.input_directory.listing) + successCodes: [] + temporaryFailCodes: [] + step_3: + in: + filename: input_6 + input_directory: step_1/output_1 + out: + - output_file + run: + arguments: [] + baseCommand: + - 'true' + class: CommandLineTool + cwlVersion: v1.0 + hints: [] + inputs: + filename: + default: states-metadata.json + streamable: false + type: string + input_directory: + streamable: false + type: Directory + outputs: + output_file: + outputBinding: + glob: $(inputs.filename) + streamable: false + type: File + permanentFailCodes: [] + requirements: + - *id001 + - *id002 + successCodes: [] + temporaryFailCodes: [] + step_4: + in: + input_1: input_7 + input_2: input_8 + input_3: input_9 + input_4: input_10 + out: + - output_0 + - output_1 + run: a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl diff --git a/data/covidtracking/states-daily.json b/data/covidtracking/states-daily.json index 551a972..f6d605a 100644 --- a/data/covidtracking/states-daily.json +++ b/data/covidtracking/states-daily.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:801b058f6d058226e08bf7199653a1684747727adcb9c15864858faf6d76ad09 -size 471445 +oid sha256:56ac30fed3a75c442559f4d888dadb545890e7ac864db45a1eb466a31ad4090f +size 491877 diff --git a/data/covidtracking/states-metadata.json b/data/covidtracking/states-metadata.json index 3acf4ba..464bdcf 100644 --- a/data/covidtracking/states-metadata.json +++ b/data/covidtracking/states-metadata.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f70a22f2cacc03590ce4e2be83e8af64cc64b863639fac327f07d32470c2213 -size 25750 +oid sha256:d7100b6f41ef1cfea9a6a995dc202bcb62aa8110ff6b5605c05e7ba1fd721794 +size 25296 diff --git a/runs/download-covidtracking-data.runs.ipynb b/runs/download-covidtracking-data.runs.ipynb index 9075747..4f842f2 100644 --- a/runs/download-covidtracking-data.runs.ipynb +++ b/runs/download-covidtracking-data.runs.ipynb @@ -5,10 +5,10 @@ "execution_count": 1, "metadata": { "papermill": { - "duration": 0.895891, - "end_time": "2020-03-30T19:54:09.980998", + "duration": 0.471733, + "end_time": "2020-03-31T08:24:30.934689", "exception": false, - "start_time": "2020-03-30T19:54:09.085107", + "start_time": "2020-03-31T08:24:30.462956", "status": "completed" }, "tags": [] @@ -25,10 +25,10 @@ "execution_count": 2, "metadata": { "papermill": { - "duration": 0.017628, - "end_time": "2020-03-30T19:54:10.013048", + "duration": 0.026018, + "end_time": "2020-03-31T08:24:30.978110", "exception": false, - "start_time": "2020-03-30T19:54:09.995420", + "start_time": "2020-03-31T08:24:30.952092", "status": "completed" }, "tags": [ @@ -46,10 +46,10 @@ "execution_count": 3, "metadata": { "papermill": { - "duration": 0.018745, - "end_time": "2020-03-30T19:54:10.038251", + "duration": 0.023835, + "end_time": "2020-03-31T08:24:31.015903", "exception": false, - "start_time": "2020-03-30T19:54:10.019506", + "start_time": "2020-03-31T08:24:30.992068", "status": "completed" }, "tags": [ @@ -59,7 +59,7 @@ "outputs": [], "source": [ "# Parameters\n", - "PAPERMILL_INPUT_PATH = \"/tmp/hjxho1bm/notebooks/process/download-covidtracking-data.ipynb\"\n", + "PAPERMILL_INPUT_PATH = \"/tmp/anascddl/notebooks/process/download-covidtracking-data.ipynb\"\n", "PAPERMILL_OUTPUT_PATH = \"runs/download-covidtracking-data.runs.ipynb\"\n", "out_folder = \"data/covidtracking\"\n" ] @@ -68,10 +68,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.009672, - "end_time": "2020-03-30T19:54:10.055815", + "duration": 0.005891, + "end_time": "2020-03-31T08:24:31.031893", "exception": false, - "start_time": "2020-03-30T19:54:10.046143", + "start_time": "2020-03-31T08:24:31.026002", "status": "completed" }, "tags": [] @@ -87,10 +87,10 @@ "execution_count": 4, "metadata": { "papermill": { - "duration": 0.273596, - "end_time": "2020-03-30T19:54:10.335256", + "duration": 0.748106, + "end_time": "2020-03-31T08:24:31.785977", "exception": false, - "start_time": "2020-03-30T19:54:10.061660", + "start_time": "2020-03-31T08:24:31.037871", "status": "completed" }, "tags": [] @@ -107,10 +107,10 @@ "execution_count": 5, "metadata": { "papermill": { - "duration": 0.029596, - "end_time": "2020-03-30T19:54:10.380145", + "duration": 0.025387, + "end_time": "2020-03-31T08:24:31.825517", "exception": false, - "start_time": "2020-03-30T19:54:10.350549", + "start_time": "2020-03-31T08:24:31.800130", "status": "completed" }, "tags": [] @@ -129,10 +129,10 @@ "execution_count": 6, "metadata": { "papermill": { - "duration": 0.07619, - "end_time": "2020-03-30T19:54:10.471394", + "duration": 0.072397, + "end_time": "2020-03-31T08:24:31.905584", "exception": false, - "start_time": "2020-03-30T19:54:10.395204", + "start_time": "2020-03-31T08:24:31.833187", "status": "completed" }, "tags": [] @@ -246,10 +246,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.008022, - "end_time": "2020-03-30T19:54:10.492176", + "duration": 0.00763, + "end_time": "2020-03-31T08:24:31.927988", "exception": false, - "start_time": "2020-03-30T19:54:10.484154", + "start_time": "2020-03-31T08:24:31.920358", "status": "completed" }, "tags": [] @@ -263,10 +263,10 @@ "execution_count": 7, "metadata": { "papermill": { - "duration": 0.344233, - "end_time": "2020-03-30T19:54:10.842942", + "duration": 0.330782, + "end_time": "2020-03-31T08:24:32.265799", "exception": false, - "start_time": "2020-03-30T19:54:10.498709", + "start_time": "2020-03-31T08:24:31.935017", "status": "completed" }, "tags": [] @@ -283,10 +283,10 @@ "execution_count": 8, "metadata": { "papermill": { - "duration": 0.029138, - "end_time": "2020-03-30T19:54:10.887021", + "duration": 0.029205, + "end_time": "2020-03-31T08:24:32.310572", "exception": false, - "start_time": "2020-03-30T19:54:10.857883", + "start_time": "2020-03-31T08:24:32.281367", "status": "completed" }, "tags": [] @@ -305,10 +305,10 @@ "execution_count": 9, "metadata": { "papermill": { - "duration": 0.086462, - "end_time": "2020-03-30T19:54:10.983913", + "duration": 0.07432, + "end_time": "2020-03-31T08:24:32.406024", "exception": false, - "start_time": "2020-03-30T19:54:10.897451", + "start_time": "2020-03-31T08:24:32.331704", "status": "completed" }, "tags": [] @@ -318,7 +318,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "1317 data points\n" + "1373 data points\n" ] }, { @@ -364,43 +364,43 @@ " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>20200329</td>\n", + " <td>20200330</td>\n", " <td>AK</td>\n", - " <td>102.0</td>\n", - " <td>3232.0</td>\n", + " <td>114.0</td>\n", + " <td>3540.0</td>\n", " <td>NaN</td>\n", - " <td>6.0</td>\n", - " <td>2.0</td>\n", - " <td>3334</td>\n", - " <td>d4c0789e67f59e98176a9ea96200ed348161c6d4</td>\n", - " <td>2020-03-29T20:00:00Z</td>\n", - " <td>3334</td>\n", + " <td>7.0</td>\n", + " <td>3.0</td>\n", + " <td>3654</td>\n", + " <td>01a1c96fd2ed214d8747ab778c2fec7203c8cd2f</td>\n", + " <td>2020-03-30T20:00:00Z</td>\n", + " <td>3654</td>\n", " <td>2</td>\n", - " <td>0.0</td>\n", " <td>1.0</td>\n", - " <td>396.0</td>\n", - " <td>17.0</td>\n", - " <td>413.0</td>\n", + " <td>1.0</td>\n", + " <td>308.0</td>\n", + " <td>12.0</td>\n", + " <td>320.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>20200329</td>\n", + " <td>20200330</td>\n", " <td>AL</td>\n", - " <td>806.0</td>\n", - " <td>4184.0</td>\n", + " <td>859.0</td>\n", + " <td>5694.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", - " <td>4.0</td>\n", - " <td>4990</td>\n", - " <td>9dbf0b598d35897b1f6857899d0a834990f4ec51</td>\n", - " <td>2020-03-29T20:00:00Z</td>\n", - " <td>4990</td>\n", + " <td>6.0</td>\n", + " <td>6553</td>\n", + " <td>1ced1dbd9879f8bbc4b1f7b7876b82611895d58e</td>\n", + " <td>2020-03-30T20:00:00Z</td>\n", + " <td>6553</td>\n", " <td>1</td>\n", - " <td>1.0</td>\n", - " <td>0.0</td>\n", + " <td>2.0</td>\n", " <td>0.0</td>\n", - " <td>110.0</td>\n", - " <td>110.0</td>\n", + " <td>1510.0</td>\n", + " <td>53.0</td>\n", + " <td>1563.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -408,20 +408,20 @@ ], "text/plain": [ " date state positive negative pending hospitalized death total \\\n", - "0 20200329 AK 102.0 3232.0 NaN 6.0 2.0 3334 \n", - "1 20200329 AL 806.0 4184.0 NaN NaN 4.0 4990 \n", + "0 20200330 AK 114.0 3540.0 NaN 7.0 3.0 3654 \n", + "1 20200330 AL 859.0 5694.0 NaN NaN 6.0 6553 \n", "\n", " hash dateChecked \\\n", - "0 d4c0789e67f59e98176a9ea96200ed348161c6d4 2020-03-29T20:00:00Z \n", - "1 9dbf0b598d35897b1f6857899d0a834990f4ec51 2020-03-29T20:00:00Z \n", + "0 01a1c96fd2ed214d8747ab778c2fec7203c8cd2f 2020-03-30T20:00:00Z \n", + "1 1ced1dbd9879f8bbc4b1f7b7876b82611895d58e 2020-03-30T20:00:00Z \n", "\n", " totalTestResults fips deathIncrease hospitalizedIncrease \\\n", - "0 3334 2 0.0 1.0 \n", - "1 4990 1 1.0 0.0 \n", + "0 3654 2 1.0 1.0 \n", + "1 6553 1 2.0 0.0 \n", "\n", " negativeIncrease positiveIncrease totalTestResultsIncrease \n", - "0 396.0 17.0 413.0 \n", - "1 0.0 110.0 110.0 " + "0 308.0 12.0 320.0 \n", + "1 1510.0 53.0 1563.0 " ] }, "execution_count": 9, @@ -455,18 +455,18 @@ "version": "3.7.3" }, "papermill": { - "duration": 3.267562, - "end_time": "2020-03-30T19:54:11.409079", + "duration": 3.290092, + "end_time": "2020-03-31T08:24:32.748032", "environment_variables": {}, "exception": null, - "input_path": "/tmp/hjxho1bm/notebooks/process/download-covidtracking-data.ipynb", + "input_path": "/tmp/anascddl/notebooks/process/download-covidtracking-data.ipynb", "output_path": "runs/download-covidtracking-data.runs.ipynb", "parameters": { - "PAPERMILL_INPUT_PATH": "/tmp/hjxho1bm/notebooks/process/download-covidtracking-data.ipynb", + "PAPERMILL_INPUT_PATH": "/tmp/anascddl/notebooks/process/download-covidtracking-data.ipynb", "PAPERMILL_OUTPUT_PATH": "runs/download-covidtracking-data.runs.ipynb", "out_folder": "data/covidtracking" }, - "start_time": "2020-03-30T19:54:08.141517", + "start_time": "2020-03-31T08:24:29.457940", "version": "1.1.0" } }, -- GitLab