From 2a41feb4913244d3720ffeda2a5c63665ba6eb7a Mon Sep 17 00:00:00 2001
From: "CR (covid cron)" <beepbop@example.com>
Date: Wed, 3 Jun 2020 15:45:18 +0000
Subject: [PATCH] renku rerun data/covidtracking/states-metadata.json
 data/covidtracking/states-daily.json

---
 .../06187256ab5b48729e96df2e6340d7df.cwl      | 120 ++++++++++
 data/covidtracking/states-daily.json          |   4 +-
 data/covidtracking/states-metadata.json       |   4 +-
 runs/download-covidtracking-data.runs.ipynb   | 220 +++++++++---------
 4 files changed, 232 insertions(+), 116 deletions(-)
 create mode 100644 .renku/workflow/06187256ab5b48729e96df2e6340d7df.cwl

diff --git a/.renku/workflow/06187256ab5b48729e96df2e6340d7df.cwl b/.renku/workflow/06187256ab5b48729e96df2e6340d7df.cwl
new file mode 100644
index 000000000..549fc8691
--- /dev/null
+++ b/.renku/workflow/06187256ab5b48729e96df2e6340d7df.cwl
@@ -0,0 +1,120 @@
+class: Workflow
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default: states-metadata.json
+    streamable: false
+    type: string
+  input_2:
+    default: states-daily.json
+    streamable: false
+    type: string
+  input_3:
+    default: out_folder
+    streamable: false
+    type: string
+  input_4:
+    default: data/covidtracking
+    streamable: false
+    type: string
+  input_5:
+    default:
+      class: File
+      path: ../../notebooks/process/download-covidtracking-data.ipynb
+    streamable: false
+    type: File
+  input_6:
+    default: runs/download-covidtracking-data.runs.ipynb
+    streamable: false
+    type: string
+outputs:
+  output_2:
+    outputSource: step_3/output_1
+    streamable: false
+    type: Directory
+  output_3:
+    outputSource: step_3/output_0
+    streamable: false
+    type: File
+requirements: []
+steps:
+  step_1:
+    in:
+      filename: input_1
+      input_directory: step_3/output_1
+    out:
+    - output_file
+    run:
+      arguments: []
+      baseCommand:
+      - 'true'
+      class: CommandLineTool
+      cwlVersion: v1.0
+      hints: []
+      inputs:
+        filename:
+          default: states-metadata.json
+          streamable: false
+          type: string
+        input_directory:
+          streamable: false
+          type: Directory
+      outputs:
+        output_file:
+          outputBinding:
+            glob: $(inputs.filename)
+          streamable: false
+          type: File
+      permanentFailCodes: []
+      requirements:
+      - &id001
+        class: InlineJavascriptRequirement
+      - &id002
+        class: InitialWorkDirRequirement
+        listing: $(inputs.input_directory.listing)
+      successCodes: []
+      temporaryFailCodes: []
+  step_2:
+    in:
+      filename: input_2
+      input_directory: step_3/output_1
+    out:
+    - output_file
+    run:
+      arguments: []
+      baseCommand:
+      - 'true'
+      class: CommandLineTool
+      cwlVersion: v1.0
+      hints: []
+      inputs:
+        filename:
+          default: states-daily.json
+          streamable: false
+          type: string
+        input_directory:
+          streamable: false
+          type: Directory
+      outputs:
+        output_file:
+          outputBinding:
+            glob: $(inputs.filename)
+          streamable: false
+          type: File
+      permanentFailCodes: []
+      requirements:
+      - *id001
+      - *id002
+      successCodes: []
+      temporaryFailCodes: []
+  step_3:
+    in:
+      input_1: input_3
+      input_2: input_4
+      input_3: input_5
+      input_4: input_6
+    out:
+    - output_1
+    - output_0
+    run: a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl
diff --git a/data/covidtracking/states-daily.json b/data/covidtracking/states-daily.json
index 27dfd82d2..8b8ad50a5 100644
--- a/data/covidtracking/states-daily.json
+++ b/data/covidtracking/states-daily.json
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b53d5ec861e859d9c4b5d92c824a94cf34fccdc411feee3fe450ca1b3566e6aa
-size 2945856
+oid sha256:20d05d8bf54fbe25d465293949d98baa8217af8455274ebf331eaa9fdecfc7b5
+size 3896218
diff --git a/data/covidtracking/states-metadata.json b/data/covidtracking/states-metadata.json
index 095f192b9..82c95999b 100644
--- a/data/covidtracking/states-metadata.json
+++ b/data/covidtracking/states-metadata.json
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:23628a019bf4b14d081e8b8519be4be9ee4712078cae76e8f455f72c066ffb9c
-size 35330
+oid sha256:106cf31b2776e3d9c810972b8819576e6b9069017c01c9baf326178128121813
+size 34737
diff --git a/runs/download-covidtracking-data.runs.ipynb b/runs/download-covidtracking-data.runs.ipynb
index 2bbab0ab4..7b7ab2ac3 100644
--- a/runs/download-covidtracking-data.runs.ipynb
+++ b/runs/download-covidtracking-data.runs.ipynb
@@ -5,10 +5,10 @@
    "execution_count": 1,
    "metadata": {
     "papermill": {
-     "duration": 0.52214,
-     "end_time": "2020-06-02T08:11:09.503843",
+     "duration": 3.35233,
+     "end_time": "2020-06-03T15:45:09.318776",
      "exception": false,
-     "start_time": "2020-06-02T08:11:08.981703",
+     "start_time": "2020-06-03T15:45:05.966446",
      "status": "completed"
     },
     "tags": []
@@ -25,10 +25,10 @@
    "execution_count": 2,
    "metadata": {
     "papermill": {
-     "duration": 0.021722,
-     "end_time": "2020-06-02T08:11:09.539612",
+     "duration": 0.0259,
+     "end_time": "2020-06-03T15:45:09.361558",
      "exception": false,
-     "start_time": "2020-06-02T08:11:09.517890",
+     "start_time": "2020-06-03T15:45:09.335658",
      "status": "completed"
     },
     "tags": [
@@ -46,10 +46,10 @@
    "execution_count": 3,
    "metadata": {
     "papermill": {
-     "duration": 0.022346,
-     "end_time": "2020-06-02T08:11:09.570298",
+     "duration": 0.023503,
+     "end_time": "2020-06-03T15:45:09.397029",
      "exception": false,
-     "start_time": "2020-06-02T08:11:09.547952",
+     "start_time": "2020-06-03T15:45:09.373526",
      "status": "completed"
     },
     "tags": [
@@ -59,7 +59,7 @@
    "outputs": [],
    "source": [
     "# Parameters\n",
-    "PAPERMILL_INPUT_PATH = \"/tmp/_29it5g5/notebooks/process/download-covidtracking-data.ipynb\"\n",
+    "PAPERMILL_INPUT_PATH = \"/tmp/eqrts4dp/notebooks/process/download-covidtracking-data.ipynb\"\n",
     "PAPERMILL_OUTPUT_PATH = \"runs/download-covidtracking-data.runs.ipynb\"\n",
     "out_folder = \"data/covidtracking\"\n"
    ]
@@ -68,10 +68,10 @@
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.006388,
-     "end_time": "2020-06-02T08:11:09.585324",
+     "duration": 0.006779,
+     "end_time": "2020-06-03T15:45:09.411335",
      "exception": false,
-     "start_time": "2020-06-02T08:11:09.578936",
+     "start_time": "2020-06-03T15:45:09.404556",
      "status": "completed"
     },
     "tags": []
@@ -87,10 +87,10 @@
    "execution_count": 4,
    "metadata": {
     "papermill": {
-     "duration": 0.620735,
-     "end_time": "2020-06-02T08:11:10.212779",
+     "duration": 0.597563,
+     "end_time": "2020-06-03T15:45:10.014708",
      "exception": false,
-     "start_time": "2020-06-02T08:11:09.592044",
+     "start_time": "2020-06-03T15:45:09.417145",
      "status": "completed"
     },
     "tags": []
@@ -107,10 +107,10 @@
    "execution_count": 5,
    "metadata": {
     "papermill": {
-     "duration": 0.028753,
-     "end_time": "2020-06-02T08:11:10.257183",
+     "duration": 0.03125,
+     "end_time": "2020-06-03T15:45:10.062585",
      "exception": false,
-     "start_time": "2020-06-02T08:11:10.228430",
+     "start_time": "2020-06-03T15:45:10.031335",
      "status": "completed"
     },
     "tags": []
@@ -129,10 +129,10 @@
    "execution_count": 6,
    "metadata": {
     "papermill": {
-     "duration": 0.048608,
-     "end_time": "2020-06-02T08:11:10.315976",
+     "duration": 0.091143,
+     "end_time": "2020-06-03T15:45:10.164517",
      "exception": false,
-     "start_time": "2020-06-02T08:11:10.267368",
+     "start_time": "2020-06-03T15:45:10.073374",
      "status": "completed"
     },
     "tags": []
@@ -167,52 +167,52 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>state</th>\n",
-       "      <th>covid19SiteOld</th>\n",
+       "      <th>notes</th>\n",
        "      <th>covid19Site</th>\n",
        "      <th>covid19SiteSecondary</th>\n",
        "      <th>twitter</th>\n",
+       "      <th>covid19SiteOld</th>\n",
+       "      <th>name</th>\n",
+       "      <th>fips</th>\n",
        "      <th>pui</th>\n",
        "      <th>pum</th>\n",
-       "      <th>notes</th>\n",
-       "      <th>fips</th>\n",
-       "      <th>name</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>AK</td>\n",
-       "      <td>http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...</td>\n",
+       "      <td>Total tests are taken from the annotations on ...</td>\n",
        "      <td>http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...</td>\n",
        "      <td>http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...</td>\n",
        "      <td>@Alaska_DHSS</td>\n",
-       "      <td>All data</td>\n",
-       "      <td>False</td>\n",
-       "      <td>Total tests are taken from the annotations on ...</td>\n",
-       "      <td>2</td>\n",
+       "      <td>http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...</td>\n",
        "      <td>Alaska</td>\n",
+       "      <td>2</td>\n",
+       "      <td></td>\n",
+       "      <td>False</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>AL</td>\n",
-       "      <td>http://www.alabamapublichealth.gov/infectiousd...</td>\n",
+       "      <td>Negatives = (Totals - Positives) \\nPositives o...</td>\n",
        "      <td>https://alpublichealth.maps.arcgis.com/apps/op...</td>\n",
        "      <td>https://dph1.adph.state.al.us/covid-19/</td>\n",
        "      <td>@alpublichealth</td>\n",
-       "      <td>No data</td>\n",
-       "      <td>False</td>\n",
-       "      <td>Negatives = (Totals - Positives) \\nPositives o...</td>\n",
-       "      <td>1</td>\n",
+       "      <td>http://www.alabamapublichealth.gov/infectiousd...</td>\n",
        "      <td>Alabama</td>\n",
+       "      <td>1</td>\n",
+       "      <td></td>\n",
+       "      <td>False</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "  state                                     covid19SiteOld  \\\n",
-       "0    AK  http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...   \n",
-       "1    AL  http://www.alabamapublichealth.gov/infectiousd...   \n",
+       "  state                                              notes  \\\n",
+       "0    AK  Total tests are taken from the annotations on ...   \n",
+       "1    AL  Negatives = (Totals - Positives) \\nPositives o...   \n",
        "\n",
        "                                         covid19Site  \\\n",
        "0  http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...   \n",
@@ -222,13 +222,9 @@
        "0  http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...     @Alaska_DHSS   \n",
        "1            https://dph1.adph.state.al.us/covid-19/  @alpublichealth   \n",
        "\n",
-       "        pui    pum                                              notes  fips  \\\n",
-       "0  All data  False  Total tests are taken from the annotations on ...     2   \n",
-       "1   No data  False  Negatives = (Totals - Positives) \\nPositives o...     1   \n",
-       "\n",
-       "      name  \n",
-       "0   Alaska  \n",
-       "1  Alabama  "
+       "                                      covid19SiteOld     name  fips pui    pum  \n",
+       "0  http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...   Alaska     2      False  \n",
+       "1  http://www.alabamapublichealth.gov/infectiousd...  Alabama     1      False  "
       ]
      },
      "execution_count": 6,
@@ -246,10 +242,10 @@
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.00642,
-     "end_time": "2020-06-02T08:11:10.329275",
+     "duration": 0.008537,
+     "end_time": "2020-06-03T15:45:10.188145",
      "exception": false,
-     "start_time": "2020-06-02T08:11:10.322855",
+     "start_time": "2020-06-03T15:45:10.179608",
      "status": "completed"
     },
     "tags": []
@@ -263,10 +259,10 @@
    "execution_count": 7,
    "metadata": {
     "papermill": {
-     "duration": 5.270376,
-     "end_time": "2020-06-02T08:11:15.606304",
+     "duration": 6.860154,
+     "end_time": "2020-06-03T15:45:17.054999",
      "exception": false,
-     "start_time": "2020-06-02T08:11:10.335928",
+     "start_time": "2020-06-03T15:45:10.194845",
      "status": "completed"
     },
     "tags": []
@@ -283,10 +279,10 @@
    "execution_count": 8,
    "metadata": {
     "papermill": {
-     "duration": 0.033573,
-     "end_time": "2020-06-02T08:11:15.657902",
+     "duration": 0.036298,
+     "end_time": "2020-06-03T15:45:17.109685",
      "exception": false,
-     "start_time": "2020-06-02T08:11:15.624329",
+     "start_time": "2020-06-03T15:45:17.073387",
      "status": "completed"
     },
     "tags": []
@@ -305,10 +301,10 @@
    "execution_count": 9,
    "metadata": {
     "papermill": {
-     "duration": 0.157194,
-     "end_time": "2020-06-02T08:11:15.830240",
+     "duration": 0.224468,
+     "end_time": "2020-06-03T15:45:17.346605",
      "exception": false,
-     "start_time": "2020-06-02T08:11:15.673046",
+     "start_time": "2020-06-03T15:45:17.122137",
      "status": "completed"
     },
     "tags": []
@@ -318,7 +314,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "4945 data points\n"
+      "5001 data points\n"
      ]
     },
     {
@@ -353,94 +349,94 @@
        "      <th>inIcuCumulative</th>\n",
        "      <th>onVentilatorCurrently</th>\n",
        "      <th>...</th>\n",
-       "      <th>hospitalized</th>\n",
-       "      <th>total</th>\n",
-       "      <th>totalTestResults</th>\n",
        "      <th>posNeg</th>\n",
-       "      <th>fips</th>\n",
        "      <th>deathIncrease</th>\n",
        "      <th>hospitalizedIncrease</th>\n",
-       "      <th>negativeIncrease</th>\n",
-       "      <th>positiveIncrease</th>\n",
-       "      <th>totalTestResultsIncrease</th>\n",
+       "      <th>hash</th>\n",
+       "      <th>commercialScore</th>\n",
+       "      <th>negativeRegularScore</th>\n",
+       "      <th>negativeScore</th>\n",
+       "      <th>positiveScore</th>\n",
+       "      <th>score</th>\n",
+       "      <th>grade</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>20200601</td>\n",
+       "      <td>20200602</td>\n",
        "      <td>AK</td>\n",
-       "      <td>467.0</td>\n",
-       "      <td>53723.0</td>\n",
+       "      <td>487.0</td>\n",
+       "      <td>55716.0</td>\n",
        "      <td>NaN</td>\n",
        "      <td>10.0</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>54190</td>\n",
-       "      <td>54190</td>\n",
-       "      <td>54190</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>2462.0</td>\n",
-       "      <td>33.0</td>\n",
-       "      <td>2495.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>56203</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>8bfc576f3ce0c5438919ab2a672255d0630714ec</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td></td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>20200601</td>\n",
+       "      <td>20200602</td>\n",
        "      <td>AL</td>\n",
-       "      <td>18363.0</td>\n",
+       "      <td>18642.0</td>\n",
        "      <td>205160.0</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
-       "      <td>1856.0</td>\n",
+       "      <td>1879.0</td>\n",
        "      <td>NaN</td>\n",
        "      <td>591.0</td>\n",
        "      <td>NaN</td>\n",
        "      <td>...</td>\n",
-       "      <td>1856.0</td>\n",
-       "      <td>223523</td>\n",
-       "      <td>223523</td>\n",
-       "      <td>223523</td>\n",
-       "      <td>1</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>12.0</td>\n",
-       "      <td>5510.0</td>\n",
-       "      <td>460.0</td>\n",
-       "      <td>5970.0</td>\n",
+       "      <td>223802</td>\n",
+       "      <td>5</td>\n",
+       "      <td>23</td>\n",
+       "      <td>e925d317dd6bba774728abaf5db247f3c5983394</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td></td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>2 rows × 27 columns</p>\n",
+       "<p>2 rows × 35 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
        "       date state  positive  negative  pending  hospitalizedCurrently  \\\n",
-       "0  20200601    AK     467.0   53723.0      NaN                   10.0   \n",
-       "1  20200601    AL   18363.0  205160.0      NaN                    NaN   \n",
+       "0  20200602    AK     487.0   55716.0      NaN                   10.0   \n",
+       "1  20200602    AL   18642.0  205160.0      NaN                    NaN   \n",
        "\n",
        "   hospitalizedCumulative  inIcuCurrently  inIcuCumulative  \\\n",
        "0                     NaN             NaN              NaN   \n",
-       "1                  1856.0             NaN            591.0   \n",
+       "1                  1879.0             NaN            591.0   \n",
        "\n",
-       "   onVentilatorCurrently  ...  hospitalized   total totalTestResults  posNeg  \\\n",
-       "0                    1.0  ...           NaN   54190            54190   54190   \n",
-       "1                    NaN  ...        1856.0  223523           223523  223523   \n",
+       "   onVentilatorCurrently  ...  posNeg  deathIncrease hospitalizedIncrease  \\\n",
+       "0                    0.0  ...   56203              0                    0   \n",
+       "1                    NaN  ...  223802              5                   23   \n",
        "\n",
-       "  fips deathIncrease  hospitalizedIncrease  negativeIncrease  \\\n",
-       "0    2           0.0                   0.0            2462.0   \n",
-       "1    1          15.0                  12.0            5510.0   \n",
+       "                                       hash commercialScore  \\\n",
+       "0  8bfc576f3ce0c5438919ab2a672255d0630714ec               0   \n",
+       "1  e925d317dd6bba774728abaf5db247f3c5983394               0   \n",
        "\n",
-       "   positiveIncrease  totalTestResultsIncrease  \n",
-       "0              33.0                    2495.0  \n",
-       "1             460.0                    5970.0  \n",
+       "  negativeRegularScore  negativeScore  positiveScore score  grade  \n",
+       "0                    0              0              0     0         \n",
+       "1                    0              0              0     0         \n",
        "\n",
-       "[2 rows x 27 columns]"
+       "[2 rows x 35 columns]"
       ]
      },
      "execution_count": 9,
@@ -474,18 +470,18 @@
    "version": "3.7.3"
   },
   "papermill": {
-   "duration": 8.164183,
-   "end_time": "2020-06-02T08:11:16.160309",
+   "duration": 13.234479,
+   "end_time": "2020-06-03T15:45:17.775676",
    "environment_variables": {},
    "exception": null,
-   "input_path": "/tmp/_29it5g5/notebooks/process/download-covidtracking-data.ipynb",
+   "input_path": "/tmp/eqrts4dp/notebooks/process/download-covidtracking-data.ipynb",
    "output_path": "runs/download-covidtracking-data.runs.ipynb",
    "parameters": {
-    "PAPERMILL_INPUT_PATH": "/tmp/_29it5g5/notebooks/process/download-covidtracking-data.ipynb",
+    "PAPERMILL_INPUT_PATH": "/tmp/eqrts4dp/notebooks/process/download-covidtracking-data.ipynb",
     "PAPERMILL_OUTPUT_PATH": "runs/download-covidtracking-data.runs.ipynb",
     "out_folder": "data/covidtracking"
    },
-   "start_time": "2020-06-02T08:11:07.996126",
+   "start_time": "2020-06-03T15:45:04.541197",
    "version": "1.1.0"
   }
  },
-- 
GitLab