renku run papermill -p ts_folder ./data/covid-19_jhu-csse/ -p worldmap_path...

renku run papermill -p ts_folder ./data/covid-19_jhu-csse/ -p worldmap_path ./data/worldmap/country_centroids.csv -p out_folder ./data/geodata/ --inject-paths notebooks/process/CompileGeoData.ipynb runs/CompileGeoData.run.ipynb

renku run papermill -p ts_folder ./data/covid-19_jhu-csse/ -p worldmap_path...
renku run papermill -p ts_folder ./data/covid-19_jhu-csse/ -p worldmap_path ./data/worldmap/country_centroids.csv -p out_folder ./data/geodata/ --inject-paths notebooks/process/CompileGeoData.ipynb runs/CompileGeoData.run.ipynb
638d6c01 · Chandrasekhar Ramakrishnan · renku 0.9.1 · c987c341 · 638d6c01 · 638d6c01
Commit 638d6c01 authored 5 years ago by Chandrasekhar Ramakrishnan Committed by renku 0.9.1 5 years ago
--- a/.renku/workflow/a8b2f47629164158a118963ae58eea3b_papermill.cwl
+++ b/.renku/workflow/a8b2f47629164158a118963ae58eea3b_papermill.cwl
+arguments: []
+baseCommand:
+- papermill
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default: ts_folder
+    inputBinding:
+      position: 1
+      prefix: -p
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+  input_2:
+    default:
+      class: Directory
+      listing: []
+      path: ../../data/covid-19_jhu-csse
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: Directory
+  input_3:
+    default: worldmap_path
+    inputBinding:
+      position: 3
+      prefix: -p
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+  input_4:
+    default:
+      class: File
+      path: ../../data/worldmap/country_centroids.csv
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default: out_folder
+    inputBinding:
+      position: 5
+      prefix: -p
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+  input_6:
+    default:
+      class: Directory
+      listing: []
+      path: ../../data/geodata
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: Directory
+  input_7:
+    default:
+      class: File
+      path: ../../notebooks/process/CompileGeoData.ipynb
+    inputBinding:
+      position: 7
+      prefix: --inject-paths
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_8:
+    default: runs/CompileGeoData.run.ipynb
+    inputBinding:
+      position: 8
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_8)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: runs
+    writable: true
+  - entry: $(inputs.input_2)
+    entryname: data/covid-19_jhu-csse
+    writable: false
+  - entry: $(inputs.input_4)
+    entryname: data/worldmap/country_centroids.csv
+    writable: false
+  - entry: $(inputs.input_6)
+    entryname: data/geodata
+    writable: false
+  - entry: $(inputs.input_7)
+    entryname: notebooks/process/CompileGeoData.ipynb
+    writable: false
+successCodes: []
+temporaryFailCodes: []
--- a/runs/CompileGeoData.run.ipynb
+++ b/runs/CompileGeoData.run.ipynb
@@ -4,10 +4,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.016474,
+     "duration": 0.019915,
-     "end_time": "2020-03-18T17:20:53.504777",
+     "end_time": "2020-03-18T17:39:45.438621",
     "exception": false,
-     "start_time": "2020-03-18T17:20:53.488303",
+     "start_time": "2020-03-18T17:39:45.418706",
     "status": "completed"
    },
    "tags": []
@@ -23,10 +23,10 @@
   "execution_count": 1,
   "metadata": {
    "papermill": {
-     "duration": 0.329109,
+     "duration": 0.356977,
-     "end_time": "2020-03-18T17:20:53.844329",
+     "end_time": "2020-03-18T17:39:45.806137",
     "exception": false,
-     "start_time": "2020-03-18T17:20:53.515220",
+     "start_time": "2020-03-18T17:39:45.449160",
     "status": "completed"
    },
    "tags": []
@@ -42,10 +42,10 @@
   "execution_count": 2,
   "metadata": {
    "papermill": {
-     "duration": 0.018825,
+     "duration": 0.020489,
-     "end_time": "2020-03-18T17:20:53.878971",
+     "end_time": "2020-03-18T17:39:45.839942",
     "exception": false,
-     "start_time": "2020-03-18T17:20:53.860146",
+     "start_time": "2020-03-18T17:39:45.819453",
     "status": "completed"
    },
    "tags": []
@@ -62,10 +62,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.009852,
+     "duration": 0.010294,
-     "end_time": "2020-03-18T17:20:53.900179",
+     "end_time": "2020-03-18T17:39:45.862217",
     "exception": false,
-     "start_time": "2020-03-18T17:20:53.890327",
+     "start_time": "2020-03-18T17:39:45.851923",
     "status": "completed"
    },
    "tags": [
@@ -81,10 +81,10 @@
   "execution_count": 3,
   "metadata": {
    "papermill": {
-     "duration": 0.021389,
+     "duration": 0.023154,
-     "end_time": "2020-03-18T17:20:53.931545",
+     "end_time": "2020-03-18T17:39:45.909658",
     "exception": false,
-     "start_time": "2020-03-18T17:20:53.910156",
+     "start_time": "2020-03-18T17:39:45.886504",
     "status": "completed"
    },
    "tags": [
@@ -94,11 +94,11 @@
   "outputs": [],
   "source": [
    "# Parameters\n",
-    "PAPERMILL_INPUT_PATH = \"/tmp/dixae90v/notebooks/CompileGeoData.ipynb\"\n",
+    "PAPERMILL_INPUT_PATH = \"notebooks/process/CompileGeoData.ipynb\"\n",
    "PAPERMILL_OUTPUT_PATH = \"runs/CompileGeoData.run.ipynb\"\n",
-    "ts_folder = \"/tmp/dixae90v/data/covid-19_jhu-csse\"\n",
+    "ts_folder = \"./data/covid-19_jhu-csse/\"\n",
-    "worldmap_path = \"/tmp/dixae90v/data/worldmap/country_centroids.csv\"\n",
+    "worldmap_path = \"./data/worldmap/country_centroids.csv\"\n",
-    "out_folder = \"data/geodata\"\n"
+    "out_folder = \"./data/geodata/\"\n"
   ]
  },
  {
@@ -106,10 +106,10 @@
   "execution_count": 4,
   "metadata": {
    "papermill": {
-     "duration": 0.023787,
+     "duration": 0.022631,
-     "end_time": "2020-03-18T17:20:53.966187",
+     "end_time": "2020-03-18T17:39:45.942649",
     "exception": false,
-     "start_time": "2020-03-18T17:20:53.942400",
+     "start_time": "2020-03-18T17:39:45.920018",
     "status": "completed"
    },
    "tags": []
@@ -130,10 +130,10 @@
   "execution_count": 5,
   "metadata": {
    "papermill": {
-     "duration": 0.054163,
+     "duration": 0.05949,
-     "end_time": "2020-03-18T17:20:54.033789",
+     "end_time": "2020-03-18T17:39:46.014614",
     "exception": false,
-     "start_time": "2020-03-18T17:20:53.979626",
+     "start_time": "2020-03-18T17:39:45.955124",
     "status": "completed"
    },
    "tags": []
@@ -147,10 +147,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.009594,
+     "duration": 0.010077,
-     "end_time": "2020-03-18T17:20:54.055575",
+     "end_time": "2020-03-18T17:39:46.041211",
     "exception": false,
-     "start_time": "2020-03-18T17:20:54.045981",
+     "start_time": "2020-03-18T17:39:46.031134",
     "status": "completed"
    },
    "tags": []
@@ -164,10 +164,10 @@
   "execution_count": 6,
   "metadata": {
    "papermill": {
-     "duration": 0.041843,
+     "duration": 0.043439,
-     "end_time": "2020-03-18T17:20:54.107149",
+     "end_time": "2020-03-18T17:39:46.094285",
     "exception": false,
-     "start_time": "2020-03-18T17:20:54.065306",
+     "start_time": "2020-03-18T17:39:46.050846",
     "status": "completed"
    },
    "tags": []
@@ -184,10 +184,10 @@
   "execution_count": 7,
   "metadata": {
    "papermill": {
-     "duration": 0.034309,
+     "duration": 0.034514,
-     "end_time": "2020-03-18T17:20:54.157853",
+     "end_time": "2020-03-18T17:39:46.142439",
     "exception": false,
-     "start_time": "2020-03-18T17:20:54.123544",
+     "start_time": "2020-03-18T17:39:46.107925",
     "status": "completed"
    },
    "tags": []
@@ -214,10 +214,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.009863,
+     "duration": 0.009928,
-     "end_time": "2020-03-18T17:20:54.181948",
+     "end_time": "2020-03-18T17:39:46.166476",
     "exception": false,
-     "start_time": "2020-03-18T17:20:54.172085",
+     "start_time": "2020-03-18T17:39:46.156548",
     "status": "completed"
    },
    "tags": []
@@ -231,10 +231,10 @@
   "execution_count": 8,
   "metadata": {
    "papermill": {
-     "duration": 0.024119,
+     "duration": 0.026025,
-     "end_time": "2020-03-18T17:20:54.216240",
+     "end_time": "2020-03-18T17:39:46.202465",
     "exception": false,
-     "start_time": "2020-03-18T17:20:54.192121",
+     "start_time": "2020-03-18T17:39:46.176440",
     "status": "completed"
    },
    "tags": []
@@ -260,10 +260,10 @@
   "execution_count": 9,
   "metadata": {
    "papermill": {
-     "duration": 0.018305,
+     "duration": 0.018618,
-     "end_time": "2020-03-18T17:20:54.247866",
+     "end_time": "2020-03-18T17:39:46.232915",
     "exception": false,
-     "start_time": "2020-03-18T17:20:54.229561",
+     "start_time": "2020-03-18T17:39:46.214297",
     "status": "completed"
    },
    "tags": []
@@ -278,10 +278,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.009955,
+     "duration": 0.009895,
-     "end_time": "2020-03-18T17:20:54.267709",
+     "end_time": "2020-03-18T17:39:46.253760",
     "exception": false,
-     "start_time": "2020-03-18T17:20:54.257754",
+     "start_time": "2020-03-18T17:39:46.243865",
     "status": "completed"
    },
    "tags": []
@@ -295,10 +295,10 @@
   "execution_count": 10,
   "metadata": {
    "papermill": {
-     "duration": 0.036233,
+     "duration": 0.034717,
-     "end_time": "2020-03-18T17:20:54.313980",
+     "end_time": "2020-03-18T17:39:46.298658",
     "exception": false,
-     "start_time": "2020-03-18T17:20:54.277747",
+     "start_time": "2020-03-18T17:39:46.263941",
     "status": "completed"
    },
    "tags": []
@@ -413,10 +413,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.010645,
+     "duration": 0.010396,
-     "end_time": "2020-03-18T17:20:54.337794",
+     "end_time": "2020-03-18T17:39:46.322850",
     "exception": false,
-     "start_time": "2020-03-18T17:20:54.327149",
+     "start_time": "2020-03-18T17:39:46.312454",
     "status": "completed"
    },
    "tags": []
@@ -430,10 +430,10 @@
   "execution_count": 11,
   "metadata": {
    "papermill": {
-     "duration": 0.031073,
+     "duration": 0.028961,
-     "end_time": "2020-03-18T17:20:54.379882",
+     "end_time": "2020-03-18T17:39:46.362455",
     "exception": false,
-     "start_time": "2020-03-18T17:20:54.348809",
+     "start_time": "2020-03-18T17:39:46.333494",
     "status": "completed"
    },
    "tags": []
@@ -465,20 +465,20 @@
   "version": "3.7.6"
  },
  "papermill": {
-   "duration": 2.08007,
+   "duration": 2.135836,
-   "end_time": "2020-03-18T17:20:54.703617",
+   "end_time": "2020-03-18T17:39:46.683682",
   "environment_variables": {},
   "exception": null,
-   "input_path": "/tmp/dixae90v/notebooks/CompileGeoData.ipynb",
+   "input_path": "notebooks/process/CompileGeoData.ipynb",
   "output_path": "runs/CompileGeoData.run.ipynb",
   "parameters": {
-    "PAPERMILL_INPUT_PATH": "/tmp/dixae90v/notebooks/CompileGeoData.ipynb",
+    "PAPERMILL_INPUT_PATH": "notebooks/process/CompileGeoData.ipynb",
    "PAPERMILL_OUTPUT_PATH": "runs/CompileGeoData.run.ipynb",
-    "out_folder": "data/geodata",
+    "out_folder": "./data/geodata/",
-    "ts_folder": "/tmp/dixae90v/data/covid-19_jhu-csse",
+    "ts_folder": "./data/covid-19_jhu-csse/",
-    "worldmap_path": "/tmp/dixae90v/data/worldmap/country_centroids.csv"
+    "worldmap_path": "./data/worldmap/country_centroids.csv"
   },
-   "start_time": "2020-03-18T17:20:52.623547",
+   "start_time": "2020-03-18T17:39:44.547846",
   "version": "1.1.0"
  }
 },

 %% Cell type:markdown id: tags:
 # Extract the Geographic Info
 Use the Harvard [country_centroids.csv](https://worldmap.harvard.edu/data/geonode:country_centroids_az8) data to extract the geographic info we need for the visualizations.
 %% Cell type:code id: tags:
 ``` python
 import pandas as pd
 import os
 ```
 %% Cell type:code id: tags:
 ``` python
 ts_folder = "../data/covid-19_jhu-csse/"
 worldmap_path = "../data/worldmap/country_centroids.csv"
 out_folder = None
 PAPERMILL_OUTPUT_PATH = None
 ```
 %% Cell type:markdown id: tags:parameters
 ## Read in JHU CSSE data
 %% Cell type:code id: tags:injected-parameters
 ``` python
 # Parameters
-PAPERMILL_INPUT_PATH = "/tmp/dixae90v/notebooks/CompileGeoData.ipynb"
+PAPERMILL_INPUT_PATH = "notebooks/process/CompileGeoData.ipynb"
 PAPERMILL_OUTPUT_PATH = "runs/CompileGeoData.run.ipynb"
-ts_folder = "/tmp/dixae90v/data/covid-19_jhu-csse"
+ts_folder = "./data/covid-19_jhu-csse/"
-worldmap_path = "/tmp/dixae90v/data/worldmap/country_centroids.csv"
+worldmap_path = "./data/worldmap/country_centroids.csv"
-out_folder = "data/geodata"
+out_folder = "./data/geodata/"
 ```
 %% Cell type:code id: tags:
 ``` python
 def read_jhu_covid_region_df(name):
    filename = os.path.join(ts_folder, f"time_series_19-covid-{name}.csv")
    df = pd.read_csv(filename)
    df = df.set_index(['Country/Region', 'Province/State', 'Lat', 'Long'])
    df.columns = pd.to_datetime(df.columns)
    region_df = df.groupby(level='Country/Region').sum()
    return region_df
 ```
 %% Cell type:code id: tags:
 ``` python
 confirmed_df = read_jhu_covid_region_df("Confirmed")
 ```
 %% Cell type:markdown id: tags:
 # Read in Harvard country centroids
 %% Cell type:code id: tags:
 ``` python
 country_centroids_df = pd.read_csv(worldmap_path)
 country_centroids_df = country_centroids_df[['name', 'name_long', 'region_un', 'subregion', 'region_wb', 'pop_est', 'gdp_md_est', 'income_grp', 'Longitude', 'Latitude']]
 country_centroids_df['name_jhu'] = country_centroids_df['name_long']
 ```
 %% Cell type:code id: tags:
 ``` python
 country_centroids_df.columns
 ```
 %% Output
    Index(['name', 'name_long', 'region_un', 'subregion', 'region_wb', 'pop_est',
           'gdp_md_est', 'income_grp', 'Longitude', 'Latitude', 'name_jhu'],
          dtype='object')
 %% Cell type:markdown id: tags:
 Fix names that differ between JHU CSSE and Harvard data
 %% Cell type:code id: tags:
 ``` python
 region_hd_jhu_map = {
     'Brunei Darussalam': 'Brunei',
     "CÃ´te d'Ivoire": "Cote d'Ivoire",
     'Czech Republic': 'Czechia',
     'Hong Kong': 'Hong Kong SAR',
     'Republic of Korea': 'Korea, South',
     'Macao': 'Macao SAR',
     'Russian Federation': 'Russia',
     'Taiwan': 'Taiwan*',
     'United States': 'US'
 }
 country_centroids_df['name_jhu'] = country_centroids_df['name_jhu'].replace(region_hd_jhu_map)
 ```
 %% Cell type:code id: tags:
 ``` python
 # Use this to find the name in the series
 # country_centroids_df[country_centroids_df['name'].str.contains('Macao')]
 ```
 %% Cell type:markdown id: tags:
 There are some regions that we cannot resolve, but we will just ignore these.
 %% Cell type:code id: tags:
 ``` python
 confirmed_df.loc[
    (confirmed_df.index.isin(country_centroids_df['name_jhu']) == False)
 ].iloc[:,-2:]
 ```
 %% Output
                           2020-03-16  2020-03-17
    Country/Region
    Congo (Brazzaville)             1           1
    Congo (Kinshasa)                2           3
    Cruise Ship                   696         696
    Eswatini                        1           1
    Holy See                        1           1
    Martinique                     15          16
    North Macedonia                18          26
    Republic of the Congo           1           1
    The Bahamas                     1           1
 %% Cell type:markdown id: tags:
 # Save the result
 %% Cell type:code id: tags:
 ``` python
 if PAPERMILL_OUTPUT_PATH:
    out_path = os.path.join(out_folder, f"geo_data.csv")
    country_centroids_df.to_csv(out_path)
 ```