From c41914b8c268fefbe02a83efeca6383b869e6b7c Mon Sep 17 00:00:00 2001
From: "CR (covid cron)" <cramakri+covid-cron@ethz.ch>
Date: Sat, 28 Mar 2020 08:23:24 +0000
Subject: [PATCH] renku rerun data/covidtracking/states-metadata.json
 data/covidtracking/states-daily.json

---
 .../f978984fbc6e4820a1ab25a108a4040a.cwl      | 120 ++++++++++++++
 data/covidtracking/states-daily.json          |   4 +-
 data/covidtracking/states-metadata.json       |   4 +-
 runs/download-covidtracking-data.runs.ipynb   | 156 +++++++++---------
 4 files changed, 205 insertions(+), 79 deletions(-)
 create mode 100644 .renku/workflow/f978984fbc6e4820a1ab25a108a4040a.cwl

diff --git a/.renku/workflow/f978984fbc6e4820a1ab25a108a4040a.cwl b/.renku/workflow/f978984fbc6e4820a1ab25a108a4040a.cwl
new file mode 100644
index 0000000..a70cab1
--- /dev/null
+++ b/.renku/workflow/f978984fbc6e4820a1ab25a108a4040a.cwl
@@ -0,0 +1,120 @@
+class: Workflow
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default: out_folder
+    streamable: false
+    type: string
+  input_2:
+    default: data/covidtracking
+    streamable: false
+    type: string
+  input_3:
+    default:
+      class: File
+      path: ../../notebooks/process/download-covidtracking-data.ipynb
+    streamable: false
+    type: File
+  input_4:
+    default: runs/download-covidtracking-data.runs.ipynb
+    streamable: false
+    type: string
+  input_5:
+    default: states-metadata.json
+    streamable: false
+    type: string
+  input_6:
+    default: states-daily.json
+    streamable: false
+    type: string
+outputs:
+  output_1:
+    outputSource: step_1/output_1
+    streamable: false
+    type: Directory
+  output_2:
+    outputSource: step_1/output_0
+    streamable: false
+    type: File
+requirements: []
+steps:
+  step_1:
+    in:
+      input_1: input_1
+      input_2: input_2
+      input_3: input_3
+      input_4: input_4
+    out:
+    - output_1
+    - output_0
+    run: a17d560c41a54f5aa307ce5f3c5effe5_papermill.cwl
+  step_2:
+    in:
+      filename: input_5
+      input_directory: step_1/output_1
+    out:
+    - output_file
+    run:
+      arguments: []
+      baseCommand:
+      - 'true'
+      class: CommandLineTool
+      cwlVersion: v1.0
+      hints: []
+      inputs:
+        filename:
+          default: states-metadata.json
+          streamable: false
+          type: string
+        input_directory:
+          streamable: false
+          type: Directory
+      outputs:
+        output_file:
+          outputBinding:
+            glob: $(inputs.filename)
+          streamable: false
+          type: File
+      permanentFailCodes: []
+      requirements:
+      - &id001
+        class: InlineJavascriptRequirement
+      - &id002
+        class: InitialWorkDirRequirement
+        listing: $(inputs.input_directory.listing)
+      successCodes: []
+      temporaryFailCodes: []
+  step_3:
+    in:
+      filename: input_6
+      input_directory: step_1/output_1
+    out:
+    - output_file
+    run:
+      arguments: []
+      baseCommand:
+      - 'true'
+      class: CommandLineTool
+      cwlVersion: v1.0
+      hints: []
+      inputs:
+        filename:
+          default: states-daily.json
+          streamable: false
+          type: string
+        input_directory:
+          streamable: false
+          type: Directory
+      outputs:
+        output_file:
+          outputBinding:
+            glob: $(inputs.filename)
+          streamable: false
+          type: File
+      permanentFailCodes: []
+      requirements:
+      - *id001
+      - *id002
+      successCodes: []
+      temporaryFailCodes: []
diff --git a/data/covidtracking/states-daily.json b/data/covidtracking/states-daily.json
index 0a4b0ae..5163fa6 100644
--- a/data/covidtracking/states-daily.json
+++ b/data/covidtracking/states-daily.json
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d03480d703438efd09b64766bd30ad384a4d3056c5e923be15301c00853f5ee
-size 339161
+oid sha256:49daa3567fbddb38f172831d9a251fdf13847222e00514f939f8b237a65b05cd
+size 430713
diff --git a/data/covidtracking/states-metadata.json b/data/covidtracking/states-metadata.json
index 92063a0..d9f3ca9 100644
--- a/data/covidtracking/states-metadata.json
+++ b/data/covidtracking/states-metadata.json
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43ff3cd676e580173075c19bca6629c8187291a2d23ef9d435df634c2555e1df
-size 27040
+oid sha256:5d977ad0a4ed8f67000b0489996ac94d77fbe00371c702ee38c7696776a4e68c
+size 27032
diff --git a/runs/download-covidtracking-data.runs.ipynb b/runs/download-covidtracking-data.runs.ipynb
index a3c1fb2..5b8ae1d 100644
--- a/runs/download-covidtracking-data.runs.ipynb
+++ b/runs/download-covidtracking-data.runs.ipynb
@@ -5,10 +5,10 @@
    "execution_count": 1,
    "metadata": {
     "papermill": {
-     "duration": 3.733962,
-     "end_time": "2020-03-27T08:22:23.632192",
+     "duration": 0.510563,
+     "end_time": "2020-03-28T08:23:22.828576",
      "exception": false,
-     "start_time": "2020-03-27T08:22:19.898230",
+     "start_time": "2020-03-28T08:23:22.318013",
      "status": "completed"
     },
     "tags": []
@@ -25,10 +25,10 @@
    "execution_count": 2,
    "metadata": {
     "papermill": {
-     "duration": 0.021455,
-     "end_time": "2020-03-27T08:22:23.668176",
+     "duration": 0.016784,
+     "end_time": "2020-03-28T08:23:22.855238",
      "exception": false,
-     "start_time": "2020-03-27T08:22:23.646721",
+     "start_time": "2020-03-28T08:23:22.838454",
      "status": "completed"
     },
     "tags": [
@@ -46,10 +46,10 @@
    "execution_count": 3,
    "metadata": {
     "papermill": {
-     "duration": 0.020398,
-     "end_time": "2020-03-27T08:22:23.698819",
+     "duration": 0.021893,
+     "end_time": "2020-03-28T08:23:22.884664",
      "exception": false,
-     "start_time": "2020-03-27T08:22:23.678421",
+     "start_time": "2020-03-28T08:23:22.862771",
      "status": "completed"
     },
     "tags": [
@@ -59,7 +59,7 @@
    "outputs": [],
    "source": [
     "# Parameters\n",
-    "PAPERMILL_INPUT_PATH = \"/tmp/cpvikc_3/notebooks/process/download-covidtracking-data.ipynb\"\n",
+    "PAPERMILL_INPUT_PATH = \"/tmp/lrv5vlu8/notebooks/process/download-covidtracking-data.ipynb\"\n",
     "PAPERMILL_OUTPUT_PATH = \"runs/download-covidtracking-data.runs.ipynb\"\n",
     "out_folder = \"data/covidtracking\"\n"
    ]
@@ -68,10 +68,10 @@
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.007642,
-     "end_time": "2020-03-27T08:22:23.714336",
+     "duration": 0.006745,
+     "end_time": "2020-03-28T08:23:22.903069",
      "exception": false,
-     "start_time": "2020-03-27T08:22:23.706694",
+     "start_time": "2020-03-28T08:23:22.896324",
      "status": "completed"
     },
     "tags": []
@@ -87,10 +87,10 @@
    "execution_count": 4,
    "metadata": {
     "papermill": {
-     "duration": 0.42213,
-     "end_time": "2020-03-27T08:22:24.144107",
+     "duration": 0.232971,
+     "end_time": "2020-03-28T08:23:23.143017",
      "exception": false,
-     "start_time": "2020-03-27T08:22:23.721977",
+     "start_time": "2020-03-28T08:23:22.910046",
      "status": "completed"
     },
     "tags": []
@@ -107,10 +107,10 @@
    "execution_count": 5,
    "metadata": {
     "papermill": {
-     "duration": 0.023995,
-     "end_time": "2020-03-27T08:22:24.182589",
+     "duration": 0.017333,
+     "end_time": "2020-03-28T08:23:23.171508",
      "exception": false,
-     "start_time": "2020-03-27T08:22:24.158594",
+     "start_time": "2020-03-28T08:23:23.154175",
      "status": "completed"
     },
     "tags": []
@@ -129,10 +129,10 @@
    "execution_count": 6,
    "metadata": {
     "papermill": {
-     "duration": 0.06725,
-     "end_time": "2020-03-27T08:22:24.260714",
+     "duration": 0.063441,
+     "end_time": "2020-03-28T08:23:23.242653",
      "exception": false,
-     "start_time": "2020-03-27T08:22:24.193464",
+     "start_time": "2020-03-28T08:23:23.179212",
      "status": "completed"
     },
     "tags": []
@@ -188,7 +188,7 @@
        "      <td>@Alaska_DHSS</td>\n",
        "      <td>All data</td>\n",
        "      <td>False</td>\n",
-       "      <td>We count the reported number as \"persons teste...</td>\n",
+       "      <td>Total tests are taken from the annotations on ...</td>\n",
        "      <td>2</td>\n",
        "      <td>Alaska</td>\n",
        "    </tr>\n",
@@ -223,7 +223,7 @@
        "1                                               None  @alpublichealth   \n",
        "\n",
        "        pui    pum                                              notes  fips  \\\n",
-       "0  All data  False  We count the reported number as \"persons teste...     2   \n",
+       "0  All data  False  Total tests are taken from the annotations on ...     2   \n",
        "1   No data  False  Negatives = Totals - Positives. Positives seem...     1   \n",
        "\n",
        "      name  \n",
@@ -246,10 +246,10 @@
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.008786,
-     "end_time": "2020-03-27T08:22:24.278947",
+     "duration": 0.007652,
+     "end_time": "2020-03-28T08:23:23.259216",
      "exception": false,
-     "start_time": "2020-03-27T08:22:24.270161",
+     "start_time": "2020-03-28T08:23:23.251564",
      "status": "completed"
     },
     "tags": []
@@ -263,10 +263,10 @@
    "execution_count": 7,
    "metadata": {
     "papermill": {
-     "duration": 0.167447,
-     "end_time": "2020-03-27T08:22:24.455064",
+     "duration": 0.238704,
+     "end_time": "2020-03-28T08:23:23.505934",
      "exception": false,
-     "start_time": "2020-03-27T08:22:24.287617",
+     "start_time": "2020-03-28T08:23:23.267230",
      "status": "completed"
     },
     "tags": []
@@ -283,10 +283,10 @@
    "execution_count": 8,
    "metadata": {
     "papermill": {
-     "duration": 0.022025,
-     "end_time": "2020-03-27T08:22:24.490030",
+     "duration": 0.022386,
+     "end_time": "2020-03-28T08:23:23.536735",
      "exception": false,
-     "start_time": "2020-03-27T08:22:24.468005",
+     "start_time": "2020-03-28T08:23:23.514349",
      "status": "completed"
     },
     "tags": []
@@ -305,10 +305,10 @@
    "execution_count": 9,
    "metadata": {
     "papermill": {
-     "duration": 0.075398,
-     "end_time": "2020-03-27T08:22:24.575765",
+     "duration": 0.074418,
+     "end_time": "2020-03-28T08:23:23.620961",
      "exception": false,
-     "start_time": "2020-03-27T08:22:24.500367",
+     "start_time": "2020-03-28T08:23:23.546543",
      "status": "completed"
     },
     "tags": []
@@ -318,7 +318,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "1149 data points\n"
+      "1205 data points\n"
      ]
     },
     {
@@ -350,8 +350,10 @@
        "      <th>hospitalized</th>\n",
        "      <th>death</th>\n",
        "      <th>total</th>\n",
+       "      <th>hash</th>\n",
        "      <th>dateChecked</th>\n",
        "      <th>totalTestResults</th>\n",
+       "      <th>fips</th>\n",
        "      <th>deathIncrease</th>\n",
        "      <th>hospitalizedIncrease</th>\n",
        "      <th>negativeIncrease</th>\n",
@@ -362,39 +364,43 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>20200326</td>\n",
+       "      <td>20200327</td>\n",
        "      <td>AK</td>\n",
-       "      <td>59.0</td>\n",
-       "      <td>1801.0</td>\n",
-       "      <td>NaN</td>\n",
+       "      <td>69.0</td>\n",
+       "      <td>2319.0</td>\n",
+       "      <td>13.0</td>\n",
        "      <td>3.0</td>\n",
        "      <td>1.0</td>\n",
-       "      <td>1860</td>\n",
-       "      <td>2020-03-26T20:00:00Z</td>\n",
-       "      <td>1860</td>\n",
+       "      <td>2401</td>\n",
+       "      <td>588f633d59494e4d58466fccfa9628c98568396b</td>\n",
+       "      <td>2020-03-27T20:00:00Z</td>\n",
+       "      <td>2388</td>\n",
+       "      <td>2</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>152.0</td>\n",
-       "      <td>17.0</td>\n",
-       "      <td>169.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>518.0</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>528.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>20200326</td>\n",
+       "      <td>20200327</td>\n",
        "      <td>AL</td>\n",
-       "      <td>506.0</td>\n",
-       "      <td>3593.0</td>\n",
+       "      <td>587.0</td>\n",
+       "      <td>4184.0</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>4099</td>\n",
-       "      <td>2020-03-26T20:00:00Z</td>\n",
-       "      <td>4099</td>\n",
-       "      <td>1.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>4771</td>\n",
+       "      <td>c3883f10c8760fdd9d196c5e2483205434bc4e0f</td>\n",
+       "      <td>2020-03-27T20:00:00Z</td>\n",
+       "      <td>4771</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>1064.0</td>\n",
-       "      <td>223.0</td>\n",
-       "      <td>1287.0</td>\n",
+       "      <td>591.0</td>\n",
+       "      <td>81.0</td>\n",
+       "      <td>672.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -402,20 +408,20 @@
       ],
       "text/plain": [
        "       date state  positive  negative  pending  hospitalized  death  total  \\\n",
-       "0  20200326    AK      59.0    1801.0      NaN           3.0    1.0   1860   \n",
-       "1  20200326    AL     506.0    3593.0      NaN           NaN    1.0   4099   \n",
+       "0  20200327    AK      69.0    2319.0     13.0           3.0    1.0   2401   \n",
+       "1  20200327    AL     587.0    4184.0      NaN           NaN    3.0   4771   \n",
        "\n",
-       "            dateChecked  totalTestResults  deathIncrease  \\\n",
-       "0  2020-03-26T20:00:00Z              1860            0.0   \n",
-       "1  2020-03-26T20:00:00Z              4099            1.0   \n",
+       "                                       hash           dateChecked  \\\n",
+       "0  588f633d59494e4d58466fccfa9628c98568396b  2020-03-27T20:00:00Z   \n",
+       "1  c3883f10c8760fdd9d196c5e2483205434bc4e0f  2020-03-27T20:00:00Z   \n",
        "\n",
-       "   hospitalizedIncrease  negativeIncrease  positiveIncrease  \\\n",
-       "0                   2.0             152.0              17.0   \n",
-       "1                   0.0            1064.0             223.0   \n",
+       "   totalTestResults  fips  deathIncrease  hospitalizedIncrease  \\\n",
+       "0              2388     2            0.0                   0.0   \n",
+       "1              4771     1            2.0                   0.0   \n",
        "\n",
-       "   totalTestResultsIncrease  \n",
-       "0                     169.0  \n",
-       "1                    1287.0  "
+       "   negativeIncrease  positiveIncrease  totalTestResultsIncrease  \n",
+       "0             518.0              10.0                     528.0  \n",
+       "1             591.0              81.0                     672.0  "
       ]
      },
      "execution_count": 9,
@@ -449,18 +455,18 @@
    "version": "3.7.3"
   },
   "papermill": {
-   "duration": 6.40933,
-   "end_time": "2020-03-27T08:22:24.898756",
+   "duration": 2.566078,
+   "end_time": "2020-03-28T08:23:23.945976",
    "environment_variables": {},
    "exception": null,
-   "input_path": "/tmp/cpvikc_3/notebooks/process/download-covidtracking-data.ipynb",
+   "input_path": "/tmp/lrv5vlu8/notebooks/process/download-covidtracking-data.ipynb",
    "output_path": "runs/download-covidtracking-data.runs.ipynb",
    "parameters": {
-    "PAPERMILL_INPUT_PATH": "/tmp/cpvikc_3/notebooks/process/download-covidtracking-data.ipynb",
+    "PAPERMILL_INPUT_PATH": "/tmp/lrv5vlu8/notebooks/process/download-covidtracking-data.ipynb",
     "PAPERMILL_OUTPUT_PATH": "runs/download-covidtracking-data.runs.ipynb",
     "out_folder": "data/covidtracking"
    },
-   "start_time": "2020-03-27T08:22:18.489426",
+   "start_time": "2020-03-28T08:23:21.379898",
    "version": "1.1.0"
   }
  },
-- 
GitLab