From ccc37edffe6d9ca8ea4b8a229d1d20c42bc4b658 Mon Sep 17 00:00:00 2001
From: Chandrasekhar Ramakrishnan <cramakri@ethz.ch>
Date: Thu, 26 Mar 2020 12:38:34 +0000
Subject: [PATCH] renku run papermill -p out_folder ./data/covidtracking/
 --inject-paths notebooks/process/download-covidtracking-data.ipynb
 runs/download-covidtracking-data.runs.ipynb

---
 ...560c41a54f5aa307ce5f3c5effe5_papermill.cwl |  70 ++++++++
 data/covidtracking/states-daily.json          |   4 +-
 data/covidtracking/states-metadata.json       |   4 +-
 runs/download-covidtracking-data.runs.ipynb   | 157 +++++++++++-------
 4 files changed, 169 insertions(+), 66 deletions(-)
 create mode 100644 .renku/workflow/a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl

diff --git a/.renku/workflow/a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl b/.renku/workflow/a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl
new file mode 100644
index 00000000..8ab0a3b5
--- /dev/null
+++ b/.renku/workflow/a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl
@@ -0,0 +1,70 @@
+arguments: []
+baseCommand:
+- papermill
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default: out_folder
+    inputBinding:
+      position: 1
+      prefix: -p
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+  input_2:
+    default: data/covidtracking
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+  input_3:
+    default:
+      class: File
+      path: ../../notebooks/process/download-covidtracking-data.ipynb
+    inputBinding:
+      position: 3
+      prefix: --inject-paths
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default: runs/download-covidtracking-data.runs.ipynb
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_4)
+    streamable: false
+    type: File
+  output_1:
+    outputBinding:
+      glob: $(inputs.input_2)
+    streamable: false
+    type: Directory
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: runs
+    writable: true
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/covidtracking
+    writable: true
+  - entry: $(inputs.input_3)
+    entryname: notebooks/process/download-covidtracking-data.ipynb
+    writable: false
+successCodes: []
+temporaryFailCodes: []
diff --git a/data/covidtracking/states-daily.json b/data/covidtracking/states-daily.json
index ee2b6bc8..de5cd65d 100644
--- a/data/covidtracking/states-daily.json
+++ b/data/covidtracking/states-daily.json
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b0927c74f3dbb27dacfddb47b00d2c543f8ec932625dc1502b3d048fcc2668e
-size 160445
+oid sha256:a6103d449cc2fd40e6dcdb9b4778817576adfde055d94c1152a5e73bda6a3e98
+size 322323
diff --git a/data/covidtracking/states-metadata.json b/data/covidtracking/states-metadata.json
index 08dcd867..a7a99acd 100644
--- a/data/covidtracking/states-metadata.json
+++ b/data/covidtracking/states-metadata.json
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02330f655200284fbdaef1b3d4272cc84385c648ce7c3b73d910a0cacc3fa529
-size 25509
+oid sha256:12f495f7b6df119f781bdada37c7e80646b43458ae3b595ccbb83878980a6640
+size 27032
diff --git a/runs/download-covidtracking-data.runs.ipynb b/runs/download-covidtracking-data.runs.ipynb
index 5c6e420d..c1ef0273 100644
--- a/runs/download-covidtracking-data.runs.ipynb
+++ b/runs/download-covidtracking-data.runs.ipynb
@@ -5,10 +5,10 @@
    "execution_count": 1,
    "metadata": {
     "papermill": {
-     "duration": 0.564716,
-     "end_time": "2020-03-25T08:22:56.714459",
+     "duration": 0.506363,
+     "end_time": "2020-03-26T12:38:33.422061",
      "exception": false,
-     "start_time": "2020-03-25T08:22:56.149743",
+     "start_time": "2020-03-26T12:38:32.915698",
      "status": "completed"
     },
     "tags": []
@@ -25,10 +25,10 @@
    "execution_count": 2,
    "metadata": {
     "papermill": {
-     "duration": 0.019507,
-     "end_time": "2020-03-25T08:22:56.745116",
+     "duration": 0.020958,
+     "end_time": "2020-03-26T12:38:33.458041",
      "exception": false,
-     "start_time": "2020-03-25T08:22:56.725609",
+     "start_time": "2020-03-26T12:38:33.437083",
      "status": "completed"
     },
     "tags": [
@@ -46,10 +46,10 @@
    "execution_count": 3,
    "metadata": {
     "papermill": {
-     "duration": 0.018572,
-     "end_time": "2020-03-25T08:22:56.773076",
+     "duration": 0.023542,
+     "end_time": "2020-03-26T12:38:33.490779",
      "exception": false,
-     "start_time": "2020-03-25T08:22:56.754504",
+     "start_time": "2020-03-26T12:38:33.467237",
      "status": "completed"
     },
     "tags": [
@@ -59,19 +59,19 @@
    "outputs": [],
    "source": [
     "# Parameters\n",
-    "PAPERMILL_INPUT_PATH = \"/tmp/e18fw9c9/notebooks/process/download-covidtracking-data.ipynb\"\n",
+    "PAPERMILL_INPUT_PATH = \"notebooks/process/download-covidtracking-data.ipynb\"\n",
     "PAPERMILL_OUTPUT_PATH = \"runs/download-covidtracking-data.runs.ipynb\"\n",
-    "out_folder = \"data/covidtracking\"\n"
+    "out_folder = \"./data/covidtracking/\"\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.007795,
-     "end_time": "2020-03-25T08:22:56.790849",
+     "duration": 0.007637,
+     "end_time": "2020-03-26T12:38:33.508933",
      "exception": false,
-     "start_time": "2020-03-25T08:22:56.783054",
+     "start_time": "2020-03-26T12:38:33.501296",
      "status": "completed"
     },
     "tags": []
@@ -87,10 +87,10 @@
    "execution_count": 4,
    "metadata": {
     "papermill": {
-     "duration": 1.020789,
-     "end_time": "2020-03-25T08:22:57.818863",
+     "duration": 0.211295,
+     "end_time": "2020-03-26T12:38:33.726495",
      "exception": false,
-     "start_time": "2020-03-25T08:22:56.798074",
+     "start_time": "2020-03-26T12:38:33.515200",
      "status": "completed"
     },
     "tags": []
@@ -107,10 +107,10 @@
    "execution_count": 5,
    "metadata": {
     "papermill": {
-     "duration": 0.022913,
-     "end_time": "2020-03-25T08:22:57.852810",
+     "duration": 0.025576,
+     "end_time": "2020-03-26T12:38:33.768948",
      "exception": false,
-     "start_time": "2020-03-25T08:22:57.829897",
+     "start_time": "2020-03-26T12:38:33.743372",
      "status": "completed"
     },
     "tags": []
@@ -129,10 +129,10 @@
    "execution_count": 6,
    "metadata": {
     "papermill": {
-     "duration": 0.066514,
-     "end_time": "2020-03-25T08:22:57.927763",
+     "duration": 0.073344,
+     "end_time": "2020-03-26T12:38:33.849972",
      "exception": false,
-     "start_time": "2020-03-25T08:22:57.861249",
+     "start_time": "2020-03-26T12:38:33.776628",
      "status": "completed"
     },
     "tags": []
@@ -174,6 +174,7 @@
        "      <th>pui</th>\n",
        "      <th>pum</th>\n",
        "      <th>notes</th>\n",
+       "      <th>fips</th>\n",
        "      <th>name</th>\n",
        "    </tr>\n",
        "  </thead>\n",
@@ -188,6 +189,7 @@
        "      <td>All data</td>\n",
        "      <td>False</td>\n",
        "      <td>We count the reported number as \"persons teste...</td>\n",
+       "      <td>2</td>\n",
        "      <td>Alaska</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -200,6 +202,7 @@
        "      <td>No data</td>\n",
        "      <td>False</td>\n",
        "      <td>Last update time taken from [main page](http:/...</td>\n",
+       "      <td>1</td>\n",
        "      <td>Alabama</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -219,9 +222,13 @@
        "0  http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...     @Alaska_DHSS   \n",
        "1                                               None  @alpublichealth   \n",
        "\n",
-       "        pui    pum                                              notes     name  \n",
-       "0  All data  False  We count the reported number as \"persons teste...   Alaska  \n",
-       "1   No data  False  Last update time taken from [main page](http:/...  Alabama  "
+       "        pui    pum                                              notes  fips  \\\n",
+       "0  All data  False  We count the reported number as \"persons teste...     2   \n",
+       "1   No data  False  Last update time taken from [main page](http:/...     1   \n",
+       "\n",
+       "      name  \n",
+       "0   Alaska  \n",
+       "1  Alabama  "
       ]
      },
      "execution_count": 6,
@@ -239,10 +246,10 @@
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.008089,
-     "end_time": "2020-03-25T08:22:57.947587",
+     "duration": 0.007572,
+     "end_time": "2020-03-26T12:38:33.873621",
      "exception": false,
-     "start_time": "2020-03-25T08:22:57.939498",
+     "start_time": "2020-03-26T12:38:33.866049",
      "status": "completed"
     },
     "tags": []
@@ -256,10 +263,10 @@
    "execution_count": 7,
    "metadata": {
     "papermill": {
-     "duration": 0.184993,
-     "end_time": "2020-03-25T08:22:58.140940",
+     "duration": 0.178588,
+     "end_time": "2020-03-26T12:38:34.058549",
      "exception": false,
-     "start_time": "2020-03-25T08:22:57.955947",
+     "start_time": "2020-03-26T12:38:33.879961",
      "status": "completed"
     },
     "tags": []
@@ -276,10 +283,10 @@
    "execution_count": 8,
    "metadata": {
     "papermill": {
-     "duration": 0.022977,
-     "end_time": "2020-03-25T08:22:58.177014",
+     "duration": 0.027495,
+     "end_time": "2020-03-26T12:38:34.102339",
      "exception": false,
-     "start_time": "2020-03-25T08:22:58.154037",
+     "start_time": "2020-03-26T12:38:34.074844",
      "status": "completed"
     },
     "tags": []
@@ -298,10 +305,10 @@
    "execution_count": 9,
    "metadata": {
     "papermill": {
-     "duration": 0.060602,
-     "end_time": "2020-03-25T08:22:58.251228",
+     "duration": 0.076898,
+     "end_time": "2020-03-26T12:38:34.188602",
      "exception": false,
-     "start_time": "2020-03-25T08:22:58.190626",
+     "start_time": "2020-03-26T12:38:34.111704",
      "status": "completed"
     },
     "tags": []
@@ -311,7 +318,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "1037 data points\n"
+      "1093 data points\n"
      ]
     },
     {
@@ -344,32 +351,50 @@
        "      <th>death</th>\n",
        "      <th>total</th>\n",
        "      <th>dateChecked</th>\n",
+       "      <th>totalTestResults</th>\n",
+       "      <th>deathIncrease</th>\n",
+       "      <th>hospitalizedIncrease</th>\n",
+       "      <th>negativeIncrease</th>\n",
+       "      <th>positiveIncrease</th>\n",
+       "      <th>totalTestResultsIncrease</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>20200324</td>\n",
+       "      <td>20200325</td>\n",
        "      <td>AK</td>\n",
-       "      <td>36.0</td>\n",
-       "      <td>986.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>0.0</td>\n",
+       "      <td>42.0</td>\n",
+       "      <td>1649.0</td>\n",
        "      <td>NaN</td>\n",
-       "      <td>1022</td>\n",
-       "      <td>2020-03-24T20:00:00Z</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1691</td>\n",
+       "      <td>2020-03-25T20:00:00Z</td>\n",
+       "      <td>1691</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>663.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>669.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>20200324</td>\n",
+       "      <td>20200325</td>\n",
        "      <td>AL</td>\n",
-       "      <td>215.0</td>\n",
-       "      <td>2106.0</td>\n",
+       "      <td>283.0</td>\n",
+       "      <td>2529.0</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>2321</td>\n",
-       "      <td>2020-03-24T20:00:00Z</td>\n",
+       "      <td>2812</td>\n",
+       "      <td>2020-03-25T20:00:00Z</td>\n",
+       "      <td>2812</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>423.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>491.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -377,12 +402,20 @@
       ],
       "text/plain": [
        "       date state  positive  negative  pending  hospitalized  death  total  \\\n",
-       "0  20200324    AK      36.0     986.0      NaN           0.0    NaN   1022   \n",
-       "1  20200324    AL     215.0    2106.0      NaN           NaN    0.0   2321   \n",
+       "0  20200325    AK      42.0    1649.0      NaN           1.0    1.0   1691   \n",
+       "1  20200325    AL     283.0    2529.0      NaN           NaN    0.0   2812   \n",
+       "\n",
+       "            dateChecked  totalTestResults  deathIncrease  \\\n",
+       "0  2020-03-25T20:00:00Z              1691            1.0   \n",
+       "1  2020-03-25T20:00:00Z              2812            0.0   \n",
+       "\n",
+       "   hospitalizedIncrease  negativeIncrease  positiveIncrease  \\\n",
+       "0                   1.0             663.0               6.0   \n",
+       "1                   0.0             423.0              68.0   \n",
        "\n",
-       "            dateChecked  \n",
-       "0  2020-03-24T20:00:00Z  \n",
-       "1  2020-03-24T20:00:00Z  "
+       "   totalTestResultsIncrease  \n",
+       "0                     669.0  \n",
+       "1                     491.0  "
       ]
      },
      "execution_count": 9,
@@ -416,18 +449,18 @@
    "version": "3.7.3"
   },
   "papermill": {
-   "duration": 3.460914,
-   "end_time": "2020-03-25T08:22:58.575452",
+   "duration": 2.475212,
+   "end_time": "2020-03-26T12:38:34.517904",
    "environment_variables": {},
    "exception": null,
-   "input_path": "/tmp/e18fw9c9/notebooks/process/download-covidtracking-data.ipynb",
+   "input_path": "notebooks/process/download-covidtracking-data.ipynb",
    "output_path": "runs/download-covidtracking-data.runs.ipynb",
    "parameters": {
-    "PAPERMILL_INPUT_PATH": "/tmp/e18fw9c9/notebooks/process/download-covidtracking-data.ipynb",
+    "PAPERMILL_INPUT_PATH": "notebooks/process/download-covidtracking-data.ipynb",
     "PAPERMILL_OUTPUT_PATH": "runs/download-covidtracking-data.runs.ipynb",
-    "out_folder": "data/covidtracking"
+    "out_folder": "./data/covidtracking/"
    },
-   "start_time": "2020-03-25T08:22:55.114538",
+   "start_time": "2020-03-26T12:38:32.042692",
    "version": "1.1.0"
   }
  },
-- 
GitLab