From 281ef1c79302e289181560bb906a117cc95f477f Mon Sep 17 00:00:00 2001
From: Chandrasekhar Ramakrishnan <cramakri@ethz.ch>
Date: Fri, 20 Mar 2020 09:12:18 +0000
Subject: [PATCH] renku rerun data/covidtracking/states-metadata.json
 data/covidtracking/states-daily.json

---
 .../e96f412310ff4616a3c3a2169cc4a07b.cwl      | 120 +++++++++++++++
 data/covidtracking/states-daily.json          |   4 +-
 data/covidtracking/states-metadata.json       |   4 +-
 runs/download-covidtracking-data.runs.ipynb   | 137 +++++++++---------
 4 files changed, 196 insertions(+), 69 deletions(-)
 create mode 100644 .renku/workflow/e96f412310ff4616a3c3a2169cc4a07b.cwl

diff --git a/.renku/workflow/e96f412310ff4616a3c3a2169cc4a07b.cwl b/.renku/workflow/e96f412310ff4616a3c3a2169cc4a07b.cwl
new file mode 100644
index 00000000..85b96f52
--- /dev/null
+++ b/.renku/workflow/e96f412310ff4616a3c3a2169cc4a07b.cwl
@@ -0,0 +1,120 @@
+class: Workflow
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default: states-daily.json
+    streamable: false
+    type: string
+  input_2:
+    default: out_folder
+    streamable: false
+    type: string
+  input_3:
+    default: data/covidtracking
+    streamable: false
+    type: string
+  input_4:
+    default:
+      class: File
+      path: ../../notebooks/process/download-covidtracking-data.ipynb
+    streamable: false
+    type: File
+  input_5:
+    default: runs/download-covidtracking-data.runs.ipynb
+    streamable: false
+    type: string
+  input_6:
+    default: states-metadata.json
+    streamable: false
+    type: string
+outputs:
+  output_2:
+    outputSource: step_2/output_0
+    streamable: false
+    type: File
+  output_3:
+    outputSource: step_2/output_1
+    streamable: false
+    type: Directory
+requirements: []
+steps:
+  step_1:
+    in:
+      filename: input_1
+      input_directory: step_2/output_1
+    out:
+    - output_file
+    run:
+      arguments: []
+      baseCommand:
+      - 'true'
+      class: CommandLineTool
+      cwlVersion: v1.0
+      hints: []
+      inputs:
+        filename:
+          default: states-daily.json
+          streamable: false
+          type: string
+        input_directory:
+          streamable: false
+          type: Directory
+      outputs:
+        output_file:
+          outputBinding:
+            glob: $(inputs.filename)
+          streamable: false
+          type: File
+      permanentFailCodes: []
+      requirements:
+      - &id001
+        class: InlineJavascriptRequirement
+      - &id002
+        class: InitialWorkDirRequirement
+        listing: $(inputs.input_directory.listing)
+      successCodes: []
+      temporaryFailCodes: []
+  step_2:
+    in:
+      input_1: input_2
+      input_2: input_3
+      input_3: input_4
+      input_4: input_5
+    out:
+    - output_1
+    - output_0
+    run: ff5f474d38144c5d882c0036bd1059c2_papermill.cwl
+  step_3:
+    in:
+      filename: input_6
+      input_directory: step_2/output_1
+    out:
+    - output_file
+    run:
+      arguments: []
+      baseCommand:
+      - 'true'
+      class: CommandLineTool
+      cwlVersion: v1.0
+      hints: []
+      inputs:
+        filename:
+          default: states-metadata.json
+          streamable: false
+          type: string
+        input_directory:
+          streamable: false
+          type: Directory
+      outputs:
+        output_file:
+          outputBinding:
+            glob: $(inputs.filename)
+          streamable: false
+          type: File
+      permanentFailCodes: []
+      requirements:
+      - *id001
+      - *id002
+      successCodes: []
+      temporaryFailCodes: []
diff --git a/data/covidtracking/states-daily.json b/data/covidtracking/states-daily.json
index df5a0190..d54ef635 100644
--- a/data/covidtracking/states-daily.json
+++ b/data/covidtracking/states-daily.json
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f1d3f6f266f353ca5f3e0f78a256b9fe2eba974db337fd67d054d9bd633b17bf
-size 93953
+oid sha256:487f9dcaed9e5a43fa3f6685a7b7e8f6c3fb98b5658debf8ef23a758a84f2962
+size 101680
diff --git a/data/covidtracking/states-metadata.json b/data/covidtracking/states-metadata.json
index 7d4bcdac..d2c8a6c4 100644
--- a/data/covidtracking/states-metadata.json
+++ b/data/covidtracking/states-metadata.json
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ff2228b277b9cf60b8a3cb9aac9e1c31aa8cefe455579f28058cec6ae338215a
-size 20123
+oid sha256:e6bb5b35059c3d346ca35ae90e2512bfacff2621fb4d747cda217799d2832b38
+size 21884
diff --git a/runs/download-covidtracking-data.runs.ipynb b/runs/download-covidtracking-data.runs.ipynb
index e759b144..086bdde7 100644
--- a/runs/download-covidtracking-data.runs.ipynb
+++ b/runs/download-covidtracking-data.runs.ipynb
@@ -5,10 +5,10 @@
    "execution_count": 1,
    "metadata": {
     "papermill": {
-     "duration": 0.470882,
-     "end_time": "2020-03-18T21:45:34.449674",
+     "duration": 0.582742,
+     "end_time": "2020-03-20T09:12:16.167813",
      "exception": false,
-     "start_time": "2020-03-18T21:45:33.978792",
+     "start_time": "2020-03-20T09:12:15.585071",
      "status": "completed"
     },
     "tags": []
@@ -25,10 +25,10 @@
    "execution_count": 2,
    "metadata": {
     "papermill": {
-     "duration": 0.016909,
-     "end_time": "2020-03-18T21:45:34.477193",
+     "duration": 0.017681,
+     "end_time": "2020-03-20T09:12:16.194097",
      "exception": false,
-     "start_time": "2020-03-18T21:45:34.460284",
+     "start_time": "2020-03-20T09:12:16.176416",
      "status": "completed"
     },
     "tags": [
@@ -46,10 +46,10 @@
    "execution_count": 3,
    "metadata": {
     "papermill": {
-     "duration": 0.019263,
-     "end_time": "2020-03-18T21:45:34.503659",
+     "duration": 0.019941,
+     "end_time": "2020-03-20T09:12:16.222810",
      "exception": false,
-     "start_time": "2020-03-18T21:45:34.484396",
+     "start_time": "2020-03-20T09:12:16.202869",
      "status": "completed"
     },
     "tags": [
@@ -59,19 +59,19 @@
    "outputs": [],
    "source": [
     "# Parameters\n",
-    "PAPERMILL_INPUT_PATH = \"notebooks/process/download-covidtracking-data.ipynb\"\n",
+    "PAPERMILL_INPUT_PATH = \"/tmp/tq93huw7/notebooks/process/download-covidtracking-data.ipynb\"\n",
     "PAPERMILL_OUTPUT_PATH = \"runs/download-covidtracking-data.runs.ipynb\"\n",
-    "out_folder = \"./data/covidtracking/\"\n"
+    "out_folder = \"data/covidtracking\"\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.007558,
-     "end_time": "2020-03-18T21:45:34.520426",
+     "duration": 0.006852,
+     "end_time": "2020-03-20T09:12:16.239661",
      "exception": false,
-     "start_time": "2020-03-18T21:45:34.512868",
+     "start_time": "2020-03-20T09:12:16.232809",
      "status": "completed"
     },
     "tags": []
@@ -87,10 +87,10 @@
    "execution_count": 4,
    "metadata": {
     "papermill": {
-     "duration": 1.425345,
-     "end_time": "2020-03-18T21:45:35.952888",
+     "duration": 1.543652,
+     "end_time": "2020-03-20T09:12:17.791818",
      "exception": false,
-     "start_time": "2020-03-18T21:45:34.527543",
+     "start_time": "2020-03-20T09:12:16.248166",
      "status": "completed"
     },
     "tags": []
@@ -107,10 +107,10 @@
    "execution_count": 5,
    "metadata": {
     "papermill": {
-     "duration": 0.019056,
-     "end_time": "2020-03-18T21:45:35.983865",
+     "duration": 0.019479,
+     "end_time": "2020-03-20T09:12:17.820781",
      "exception": false,
-     "start_time": "2020-03-18T21:45:35.964809",
+     "start_time": "2020-03-20T09:12:17.801302",
      "status": "completed"
     },
     "tags": []
@@ -129,10 +129,10 @@
    "execution_count": 6,
    "metadata": {
     "papermill": {
-     "duration": 0.057062,
-     "end_time": "2020-03-18T21:45:36.050251",
+     "duration": 0.067162,
+     "end_time": "2020-03-20T09:12:17.897197",
      "exception": false,
-     "start_time": "2020-03-18T21:45:35.993189",
+     "start_time": "2020-03-20T09:12:17.830035",
      "status": "completed"
     },
     "tags": []
@@ -167,8 +167,9 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>state</th>\n",
-       "      <th>dataSite</th>\n",
+       "      <th>covid19SiteOld</th>\n",
        "      <th>covid19Site</th>\n",
+       "      <th>covid19SiteSecondary</th>\n",
        "      <th>twitter</th>\n",
        "      <th>pui</th>\n",
        "      <th>pum</th>\n",
@@ -182,6 +183,7 @@
        "      <td>AK</td>\n",
        "      <td>http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...</td>\n",
        "      <td>http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...</td>\n",
+       "      <td>NaN</td>\n",
        "      <td>@Alaska_DHSS</td>\n",
        "      <td>All data</td>\n",
        "      <td>False</td>\n",
@@ -192,7 +194,8 @@
        "      <th>1</th>\n",
        "      <td>AL</td>\n",
        "      <td>http://www.alabamapublichealth.gov/infectiousd...</td>\n",
-       "      <td>http://www.alabamapublichealth.gov/infectiousd...</td>\n",
+       "      <td>https://alpublichealth.maps.arcgis.com/apps/op...</td>\n",
+       "      <td>NaN</td>\n",
        "      <td>@alpublichealth</td>\n",
        "      <td>No data</td>\n",
        "      <td>False</td>\n",
@@ -204,17 +207,21 @@
        "</div>"
       ],
       "text/plain": [
-       "  state                                           dataSite  \\\n",
+       "  state                                     covid19SiteOld  \\\n",
        "0    AK  http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...   \n",
        "1    AL  http://www.alabamapublichealth.gov/infectiousd...   \n",
        "\n",
-       "                                         covid19Site          twitter  \\\n",
-       "0  http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...     @Alaska_DHSS   \n",
-       "1  http://www.alabamapublichealth.gov/infectiousd...  @alpublichealth   \n",
+       "                                         covid19Site  covid19SiteSecondary  \\\n",
+       "0  http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...                   NaN   \n",
+       "1  https://alpublichealth.maps.arcgis.com/apps/op...                   NaN   \n",
+       "\n",
+       "           twitter       pui    pum  \\\n",
+       "0     @Alaska_DHSS  All data  False   \n",
+       "1  @alpublichealth   No data  False   \n",
        "\n",
-       "        pui    pum                                              notes     name  \n",
-       "0  All data  False  Unclear if their reported number means \"person...   Alaska  \n",
-       "1   No data  False                     Last negative count from 3/16.  Alabama  "
+       "                                               notes     name  \n",
+       "0  Unclear if their reported number means \"person...   Alaska  \n",
+       "1                     Last negative count from 3/16.  Alabama  "
       ]
      },
      "execution_count": 6,
@@ -232,10 +239,10 @@
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.011241,
-     "end_time": "2020-03-18T21:45:36.070581",
+     "duration": 0.010887,
+     "end_time": "2020-03-20T09:12:17.919775",
      "exception": false,
-     "start_time": "2020-03-18T21:45:36.059340",
+     "start_time": "2020-03-20T09:12:17.908888",
      "status": "completed"
     },
     "tags": []
@@ -249,10 +256,10 @@
    "execution_count": 7,
    "metadata": {
     "papermill": {
-     "duration": 1.154443,
-     "end_time": "2020-03-18T21:45:37.237753",
+     "duration": 0.134228,
+     "end_time": "2020-03-20T09:12:18.065352",
      "exception": false,
-     "start_time": "2020-03-18T21:45:36.083310",
+     "start_time": "2020-03-20T09:12:17.931124",
      "status": "completed"
     },
     "tags": []
@@ -269,10 +276,10 @@
    "execution_count": 8,
    "metadata": {
     "papermill": {
-     "duration": 0.020027,
-     "end_time": "2020-03-18T21:45:37.268813",
+     "duration": 0.02218,
+     "end_time": "2020-03-20T09:12:18.097912",
      "exception": false,
-     "start_time": "2020-03-18T21:45:37.248786",
+     "start_time": "2020-03-20T09:12:18.075732",
      "status": "completed"
     },
     "tags": []
@@ -291,10 +298,10 @@
    "execution_count": 9,
    "metadata": {
     "papermill": {
-     "duration": 0.055853,
-     "end_time": "2020-03-18T21:45:37.334581",
+     "duration": 0.058368,
+     "end_time": "2020-03-20T09:12:18.166166",
      "exception": false,
-     "start_time": "2020-03-18T21:45:37.278728",
+     "start_time": "2020-03-20T09:12:18.107798",
      "status": "completed"
     },
     "tags": []
@@ -304,7 +311,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "701 data points\n"
+      "757 data points\n"
      ]
     },
     {
@@ -341,25 +348,25 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>20200318</td>\n",
+       "      <td>20200319</td>\n",
        "      <td>AK</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>406.0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>400.0</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
-       "      <td>412.0</td>\n",
-       "      <td>2020-03-18T20:00:00Z</td>\n",
+       "      <td>406</td>\n",
+       "      <td>2020-03-19T20:00:00Z</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>20200318</td>\n",
+       "      <td>20200319</td>\n",
        "      <td>AL</td>\n",
-       "      <td>46.0</td>\n",
+       "      <td>68</td>\n",
        "      <td>28.0</td>\n",
        "      <td>NaN</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>74.0</td>\n",
-       "      <td>2020-03-18T20:00:00Z</td>\n",
+       "      <td>96</td>\n",
+       "      <td>2020-03-19T20:00:00Z</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -367,12 +374,12 @@
       ],
       "text/plain": [
        "       date state  positive  negative  pending  death  total  \\\n",
-       "0  20200318    AK       6.0     406.0      NaN    NaN  412.0   \n",
-       "1  20200318    AL      46.0      28.0      NaN    0.0   74.0   \n",
+       "0  20200319    AK         6     400.0      NaN    NaN    406   \n",
+       "1  20200319    AL        68      28.0      NaN    0.0     96   \n",
        "\n",
        "            dateChecked  \n",
-       "0  2020-03-18T20:00:00Z  \n",
-       "1  2020-03-18T20:00:00Z  "
+       "0  2020-03-19T20:00:00Z  \n",
+       "1  2020-03-19T20:00:00Z  "
       ]
      },
      "execution_count": 9,
@@ -403,21 +410,21 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.6"
+   "version": "3.7.3"
   },
   "papermill": {
-   "duration": 4.518691,
-   "end_time": "2020-03-18T21:45:37.656675",
+   "duration": 3.92686,
+   "end_time": "2020-03-20T09:12:18.486365",
    "environment_variables": {},
    "exception": null,
-   "input_path": "notebooks/process/download-covidtracking-data.ipynb",
+   "input_path": "/tmp/tq93huw7/notebooks/process/download-covidtracking-data.ipynb",
    "output_path": "runs/download-covidtracking-data.runs.ipynb",
    "parameters": {
-    "PAPERMILL_INPUT_PATH": "notebooks/process/download-covidtracking-data.ipynb",
+    "PAPERMILL_INPUT_PATH": "/tmp/tq93huw7/notebooks/process/download-covidtracking-data.ipynb",
     "PAPERMILL_OUTPUT_PATH": "runs/download-covidtracking-data.runs.ipynb",
-    "out_folder": "./data/covidtracking/"
+    "out_folder": "data/covidtracking"
    },
-   "start_time": "2020-03-18T21:45:33.137984",
+   "start_time": "2020-03-20T09:12:14.559505",
    "version": "1.1.0"
   }
  },
-- 
GitLab