renku update --with-siblings

e30ded37 · Chandrasekhar Ramakrishnan · renku 0.9.1 · 89f1819d · e30ded37 · e30ded37
Commit e30ded37 authored 5 years ago by Chandrasekhar Ramakrishnan Committed by renku 0.9.1 5 years ago
--- a/.renku/workflow/c5ee81c0a49441369e109730c6d900cf.cwl
+++ b/.renku/workflow/c5ee81c0a49441369e109730c6d900cf.cwl
+class: Workflow
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default: ts_folder
+    streamable: false
+    type: string
+  input_10:
+    default:
+      class: Directory
+      listing: []
+      path: ../../data/covid-19_jhu-csse
+    streamable: false
+    type: Directory
+  input_11:
+    default: wb_path
+    streamable: false
+    type: string
+  input_12:
+    default:
+      class: File
+      path: ../../data/worldbank/SP.POP.TOTL.zip
+    streamable: false
+    type: File
+  input_13:
+    default: geodata_path
+    streamable: false
+    type: string
+  input_14:
+    default:
+      class: File
+      path: ../../data/geodata/geo_data.csv
+    streamable: false
+    type: File
+  input_15:
+    default: out_folder
+    streamable: false
+    type: string
+  input_16:
+    default: data/covid-19_rates
+    streamable: false
+    type: string
+  input_17:
+    default:
+      class: File
+      path: ../../notebooks/ToRates.ipynb
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: Directory
+      listing: []
+      path: ../../data/covid-19_jhu-csse
+    streamable: false
+    type: Directory
+  input_3:
+    default: rates_folder
+    streamable: false
+    type: string
+  input_4:
+    default: geodata_path
+    streamable: false
+    type: string
+  input_5:
+    default:
+      class: File
+      path: ../../data/geodata/geo_data.csv
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../notebooks/Dashboard.ipynb
+    streamable: false
+    type: File
+  input_7:
+    default: runs/Dashboard.run.ipynb
+    streamable: false
+    type: string
+  input_8:
+    default: ts_folder
+    streamable: false
+    type: string
+  input_9:
+    default: runs/ToRates.run.ipynb
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputSource: step_2/output_0
+    streamable: false
+    type: File
+  output_1:
+    outputSource: step_1/output_0
+    streamable: false
+    type: File
+  output_2:
+    outputSource: step_2/output_1
+    streamable: false
+    type: Directory
+requirements: []
+steps:
+  step_1:
+    in:
+      input_1: input_1
+      input_2: input_2
+      input_3: input_3
+      input_4: step_2/output_1
+      input_5: input_4
+      input_6: input_5
+      input_7: input_6
+      input_8: input_7
+    out:
+    - output_0
+    run: 4cc7ffe9d5a045efb048ef2222a40ffa_papermill.cwl
+  step_2:
+    in:
+      input_1: input_8
+      input_10: input_9
+      input_2: input_10
+      input_3: input_11
+      input_4: input_12
+      input_5: input_13
+      input_6: input_14
+      input_7: input_15
+      input_8: input_16
+      input_9: input_17
+    out:
+    - output_0
+    - output_1
+    run: 2c413376f8aa4ba1a325212655d423e5_papermill.cwl
--- a/data/covid-19_rates/ts_rates_19-covid-confirmed.csv
+++ b/data/covid-19_rates/ts_rates_19-covid-confirmed.csv
--- a/data/covid-19_rates/ts_rates_19-covid-deaths.csv
+++ b/data/covid-19_rates/ts_rates_19-covid-deaths.csv
--- a/data/covid-19_rates/ts_rates_19-covid-recovered.csv
+++ b/data/covid-19_rates/ts_rates_19-covid-recovered.csv
--- a/runs/Dashboard.run.ipynb
+++ b/runs/Dashboard.run.ipynb
--- a/runs/ToRates.run.ipynb
+++ b/runs/ToRates.run.ipynb
@@ -4,10 +4,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.026762,
+     "duration": 0.024961,
-     "end_time": "2020-03-15T16:43:42.228951",
+     "end_time": "2020-03-15T18:37:31.040525",
     "exception": false,
-     "start_time": "2020-03-15T16:43:42.202189",
+     "start_time": "2020-03-15T18:37:31.015564",
     "status": "completed"
    },
    "tags": []
@@ -21,10 +21,10 @@
   "execution_count": 1,
   "metadata": {
    "papermill": {
-     "duration": 0.308159,
+     "duration": 0.301514,
-     "end_time": "2020-03-15T16:43:42.549233",
+     "end_time": "2020-03-15T18:37:31.355857",
     "exception": false,
-     "start_time": "2020-03-15T16:43:42.241074",
+     "start_time": "2020-03-15T18:37:31.054343",
     "status": "completed"
    },
    "tags": []
@@ -40,10 +40,10 @@
   "execution_count": 2,
   "metadata": {
    "papermill": {
-     "duration": 0.024464,
+     "duration": 0.024072,
-     "end_time": "2020-03-15T16:43:42.596678",
+     "end_time": "2020-03-15T18:37:31.401335",
     "exception": false,
-     "start_time": "2020-03-15T16:43:42.572214",
+     "start_time": "2020-03-15T18:37:31.377263",
     "status": "completed"
    },
    "tags": [
@@ -64,10 +64,10 @@
   "execution_count": 3,
   "metadata": {
    "papermill": {
-     "duration": 0.023161,
+     "duration": 0.024999,
-     "end_time": "2020-03-15T16:43:42.632795",
+     "end_time": "2020-03-15T18:37:31.439386",
     "exception": false,
-     "start_time": "2020-03-15T16:43:42.609634",
+     "start_time": "2020-03-15T18:37:31.414387",
     "status": "completed"
    },
    "tags": [
@@ -77,11 +77,11 @@
   "outputs": [],
   "source": [
    "# Parameters\n",
-    "PAPERMILL_INPUT_PATH = \"/tmp/sj0uwmdy/notebooks/ToRates.ipynb\"\n",
+    "PAPERMILL_INPUT_PATH = \"/tmp/fsb4wn_r/notebooks/ToRates.ipynb\"\n",
    "PAPERMILL_OUTPUT_PATH = \"runs/ToRates.run.ipynb\"\n",
-    "ts_folder = \"/tmp/sj0uwmdy/data/covid-19_jhu-csse\"\n",
+    "ts_folder = \"/tmp/fsb4wn_r/data/covid-19_jhu-csse\"\n",
-    "wb_path = \"/tmp/sj0uwmdy/data/worldbank/SP.POP.TOTL.zip\"\n",
+    "wb_path = \"/tmp/fsb4wn_r/data/worldbank/SP.POP.TOTL.zip\"\n",
-    "geodata_path = \"/tmp/sj0uwmdy/data/geodata/geo_data.csv\"\n",
+    "geodata_path = \"/tmp/fsb4wn_r/data/geodata/geo_data.csv\"\n",
    "out_folder = \"data/covid-19_rates\"\n"
   ]
  },
@@ -89,10 +89,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.008873,
+     "duration": 0.010544,
-     "end_time": "2020-03-15T16:43:42.653860",
+     "end_time": "2020-03-15T18:37:31.463234",
     "exception": false,
-     "start_time": "2020-03-15T16:43:42.644987",
+     "start_time": "2020-03-15T18:37:31.452690",
     "status": "completed"
    },
    "tags": [
@@ -110,10 +110,10 @@
   "execution_count": 4,
   "metadata": {
    "papermill": {
-     "duration": 0.027857,
+     "duration": 0.030699,
-     "end_time": "2020-03-15T16:43:42.690591",
+     "end_time": "2020-03-15T18:37:31.504252",
     "exception": false,
-     "start_time": "2020-03-15T16:43:42.662734",
+     "start_time": "2020-03-15T18:37:31.473553",
     "status": "completed"
    },
    "tags": []
@@ -135,10 +135,10 @@
   "execution_count": 5,
   "metadata": {
    "papermill": {
-     "duration": 0.132113,
+     "duration": 0.130448,
-     "end_time": "2020-03-15T16:43:42.839054",
+     "end_time": "2020-03-15T18:37:31.650988",
     "exception": false,
-     "start_time": "2020-03-15T16:43:42.706941",
+     "start_time": "2020-03-15T18:37:31.520540",
     "status": "completed"
    },
    "tags": []
@@ -156,10 +156,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.01131,
+     "duration": 0.022829,
-     "end_time": "2020-03-15T16:43:42.871122",
+     "end_time": "2020-03-15T18:37:31.697981",
     "exception": false,
-     "start_time": "2020-03-15T16:43:42.859812",
+     "start_time": "2020-03-15T18:37:31.675152",
     "status": "completed"
    },
    "tags": []
@@ -173,10 +173,10 @@
   "execution_count": 6,
   "metadata": {
    "papermill": {
-     "duration": 0.042676,
+     "duration": 0.047034,
-     "end_time": "2020-03-15T16:43:42.923258",
+     "end_time": "2020-03-15T18:37:31.756497",
     "exception": false,
-     "start_time": "2020-03-15T16:43:42.880582",
+     "start_time": "2020-03-15T18:37:31.709463",
     "status": "completed"
    },
    "tags": []
@@ -192,10 +192,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.011135,
+     "duration": 0.011097,
-     "end_time": "2020-03-15T16:43:42.955397",
+     "end_time": "2020-03-15T18:37:31.790107",
     "exception": false,
-     "start_time": "2020-03-15T16:43:42.944262",
+     "start_time": "2020-03-15T18:37:31.779010",
     "status": "completed"
    },
    "tags": []
@@ -209,10 +209,10 @@
   "execution_count": 7,
   "metadata": {
    "papermill": {
-     "duration": 0.063297,
+     "duration": 0.063536,
-     "end_time": "2020-03-15T16:43:43.027899",
+     "end_time": "2020-03-15T18:37:31.862736",
     "exception": false,
-     "start_time": "2020-03-15T16:43:42.964602",
+     "start_time": "2020-03-15T18:37:31.799200",
     "status": "completed"
    },
    "tags": []
@@ -345,10 +345,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.010848,
+     "duration": 0.010344,
-     "end_time": "2020-03-15T16:43:43.059760",
+     "end_time": "2020-03-15T18:37:31.891117",
     "exception": false,
-     "start_time": "2020-03-15T16:43:43.048912",
+     "start_time": "2020-03-15T18:37:31.880773",
     "status": "completed"
    },
    "tags": []
@@ -362,10 +362,10 @@
   "execution_count": 8,
   "metadata": {
    "papermill": {
-     "duration": 0.041061,
+     "duration": 0.027744,
-     "end_time": "2020-03-15T16:43:43.110716",
+     "end_time": "2020-03-15T18:37:31.928366",
     "exception": false,
-     "start_time": "2020-03-15T16:43:43.069655",
+     "start_time": "2020-03-15T18:37:31.900622",
     "status": "completed"
    },
    "tags": []
@@ -394,10 +394,10 @@
   "execution_count": 9,
   "metadata": {
    "papermill": {
-     "duration": 0.034436,
+     "duration": 0.020322,
-     "end_time": "2020-03-15T16:43:43.163584",
+     "end_time": "2020-03-15T18:37:31.963446",
     "exception": false,
-     "start_time": "2020-03-15T16:43:43.129148",
+     "start_time": "2020-03-15T18:37:31.943124",
     "status": "completed"
    },
    "tags": []
@@ -412,10 +412,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.012003,
+     "duration": 0.010478,
-     "end_time": "2020-03-15T16:43:43.199703",
+     "end_time": "2020-03-15T18:37:31.984626",
     "exception": false,
-     "start_time": "2020-03-15T16:43:43.187700",
+     "start_time": "2020-03-15T18:37:31.974148",
     "status": "completed"
    },
    "tags": []
@@ -429,10 +429,10 @@
   "execution_count": 10,
   "metadata": {
    "papermill": {
-     "duration": 0.050802,
+     "duration": 0.047267,
-     "end_time": "2020-03-15T16:43:43.260325",
+     "end_time": "2020-03-15T18:37:32.042700",
     "exception": false,
-     "start_time": "2020-03-15T16:43:43.209523",
+     "start_time": "2020-03-15T18:37:31.995433",
     "status": "completed"
    },
    "tags": []
@@ -626,10 +626,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.011461,
+     "duration": 0.011403,
-     "end_time": "2020-03-15T16:43:43.292694",
+     "end_time": "2020-03-15T18:37:32.074357",
     "exception": false,
-     "start_time": "2020-03-15T16:43:43.281233",
+     "start_time": "2020-03-15T18:37:32.062954",
     "status": "completed"
    },
    "tags": []
@@ -643,10 +643,10 @@
   "execution_count": 11,
   "metadata": {
    "papermill": {
-     "duration": 0.037434,
+     "duration": 0.036638,
-     "end_time": "2020-03-15T16:43:43.340346",
+     "end_time": "2020-03-15T18:37:32.121372",
     "exception": false,
-     "start_time": "2020-03-15T16:43:43.302912",
+     "start_time": "2020-03-15T18:37:32.084734",
     "status": "completed"
    },
    "tags": []
@@ -660,10 +660,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.010233,
+     "duration": 0.010241,
-     "end_time": "2020-03-15T16:43:43.366112",
+     "end_time": "2020-03-15T18:37:32.149495",
     "exception": false,
-     "start_time": "2020-03-15T16:43:43.355879",
+     "start_time": "2020-03-15T18:37:32.139254",
     "status": "completed"
    },
    "tags": []
@@ -677,10 +677,10 @@
   "execution_count": 12,
   "metadata": {
    "papermill": {
-     "duration": 0.062288,
+     "duration": 0.049287,
-     "end_time": "2020-03-15T16:43:43.438245",
+     "end_time": "2020-03-15T18:37:32.209471",
     "exception": false,
-     "start_time": "2020-03-15T16:43:43.375957",
+     "start_time": "2020-03-15T18:37:32.160184",
     "status": "completed"
    },
    "tags": []
@@ -889,10 +889,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.012259,
+     "duration": 0.011342,
-     "end_time": "2020-03-15T16:43:43.474771",
+     "end_time": "2020-03-15T18:37:32.242689",
     "exception": false,
-     "start_time": "2020-03-15T16:43:43.462512",
+     "start_time": "2020-03-15T18:37:32.231347",
     "status": "completed"
    },
    "tags": []
@@ -906,10 +906,10 @@
   "execution_count": 13,
   "metadata": {
    "papermill": {
-     "duration": 0.080269,
+     "duration": 0.079225,
-     "end_time": "2020-03-15T16:43:43.565663",
+     "end_time": "2020-03-15T18:37:32.333345",
     "exception": false,
-     "start_time": "2020-03-15T16:43:43.485394",
+     "start_time": "2020-03-15T18:37:32.254120",
     "status": "completed"
    },
    "tags": []
@@ -934,10 +934,10 @@
   "execution_count": 14,
   "metadata": {
    "papermill": {
-     "duration": 0.065907,
+     "duration": 0.05118,
-     "end_time": "2020-03-15T16:43:43.656507",
+     "end_time": "2020-03-15T18:37:32.407865",
     "exception": false,
-     "start_time": "2020-03-15T16:43:43.590600",
+     "start_time": "2020-03-15T18:37:32.356685",
     "status": "completed"
    },
    "tags": []
@@ -970,21 +970,21 @@
   "version": "3.7.6"
  },
  "papermill": {
-   "duration": 2.541462,
+   "duration": 2.574464,
-   "end_time": "2020-03-15T16:43:43.994344",
+   "end_time": "2020-03-15T18:37:32.738690",
   "environment_variables": {},
   "exception": null,
-   "input_path": "/tmp/sj0uwmdy/notebooks/ToRates.ipynb",
+   "input_path": "/tmp/fsb4wn_r/notebooks/ToRates.ipynb",
   "output_path": "runs/ToRates.run.ipynb",
   "parameters": {
-    "PAPERMILL_INPUT_PATH": "/tmp/sj0uwmdy/notebooks/ToRates.ipynb",
+    "PAPERMILL_INPUT_PATH": "/tmp/fsb4wn_r/notebooks/ToRates.ipynb",
    "PAPERMILL_OUTPUT_PATH": "runs/ToRates.run.ipynb",
-    "geodata_path": "/tmp/sj0uwmdy/data/geodata/geo_data.csv",
+    "geodata_path": "/tmp/fsb4wn_r/data/geodata/geo_data.csv",
    "out_folder": "data/covid-19_rates",
-    "ts_folder": "/tmp/sj0uwmdy/data/covid-19_jhu-csse",
+    "ts_folder": "/tmp/fsb4wn_r/data/covid-19_jhu-csse",
-    "wb_path": "/tmp/sj0uwmdy/data/worldbank/SP.POP.TOTL.zip"
+    "wb_path": "/tmp/fsb4wn_r/data/worldbank/SP.POP.TOTL.zip"
   },
-   "start_time": "2020-03-15T16:43:41.452882",
+   "start_time": "2020-03-15T18:37:30.164226",
   "version": "1.1.0"
  }
 },

 %% Cell type:markdown id: tags:
 # Convert Series to Rates per 100,000
 %% Cell type:code id: tags:
 ``` python
 import pandas as pd
 import os
 ```
 %% Cell type:code id: tags:parameters
 ``` python
 ts_folder = "../data/covid-19_jhu-csse/"
 wb_path = "../data/worldbank/SP.POP.TOTL.zip"
 geodata_path = "../data/geodata/geo_data.csv"
 out_folder = None
 PAPERMILL_OUTPUT_PATH = None
 ```
 %% Cell type:code id: tags:injected-parameters
 ``` python
 # Parameters
-PAPERMILL_INPUT_PATH = "/tmp/sj0uwmdy/notebooks/ToRates.ipynb"
+PAPERMILL_INPUT_PATH = "/tmp/fsb4wn_r/notebooks/ToRates.ipynb"
 PAPERMILL_OUTPUT_PATH = "runs/ToRates.run.ipynb"
-ts_folder = "/tmp/sj0uwmdy/data/covid-19_jhu-csse"
+ts_folder = "/tmp/fsb4wn_r/data/covid-19_jhu-csse"
-wb_path = "/tmp/sj0uwmdy/data/worldbank/SP.POP.TOTL.zip"
+wb_path = "/tmp/fsb4wn_r/data/worldbank/SP.POP.TOTL.zip"
-geodata_path = "/tmp/sj0uwmdy/data/geodata/geo_data.csv"
+geodata_path = "/tmp/fsb4wn_r/data/geodata/geo_data.csv"
 out_folder = "data/covid-19_rates"
 ```
 %% Cell type:markdown id: tags:parameters
 ## Read in JHU CSSE data
 I will switch to [xarray](http://xarray.pydata.org/en/stable/), but ATM, it's easier like this...
 %% Cell type:code id: tags:
 ``` python
 def read_jhu_covid_region_df(name):
    filename = os.path.join(ts_folder, f"time_series_19-covid-{name}.csv")
    df = pd.read_csv(filename)
    df = df.set_index(['Country/Region', 'Province/State', 'Lat', 'Long'])
    df.columns = pd.to_datetime(df.columns)
    region_df = df.groupby(level='Country/Region').sum()
    loc_df = df.reset_index([2,3]).groupby(level='Country/Region').mean()[['Long', 'Lat']]
    return region_df.join(loc_df).set_index(['Long', 'Lat'], append=True)
 ```
 %% Cell type:code id: tags:
 ``` python
 frames_map = {
    "confirmed": read_jhu_covid_region_df("Confirmed"),
    "deaths": read_jhu_covid_region_df("Deaths"),
    "recovered": read_jhu_covid_region_df("Recovered")
 }
 ```
 %% Cell type:markdown id: tags:
 # Read in World Bank data
 %% Cell type:code id: tags:
 ``` python
 import zipfile
 zf = zipfile.ZipFile(wb_path)
 pop_df = pd.read_csv(zf.open("API_SP.POP.TOTL_DS2_en_csv_v2_821007.csv"), skiprows=4)
 ```
 %% Cell type:markdown id: tags:
 There is 2018 pop data for all countries/regions except Eritrea
 %% Cell type:code id: tags:
 ``` python
 pop_df[pd.isna(pop_df['2018'])]
 ```
 %% Output
           Country Name Country Code     Indicator Name Indicator Code       1960  \
    67          Eritrea          ERI  Population, total    SP.POP.TOTL  1007590.0
    108  Not classified          INX  Population, total    SP.POP.TOTL        NaN
              1961       1962       1963       1964       1965  ...       2011  \
    67   1033328.0  1060486.0  1088854.0  1118159.0  1148189.0  ...  3213972.0
    108        NaN        NaN        NaN        NaN        NaN  ...        NaN
         2012  2013  2014  2015  2016  2017  2018  2019  Unnamed: 64
    67    NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN          NaN
    108   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN          NaN
    [2 rows x 65 columns]
 %% Cell type:markdown id: tags:
 Fix the country/region names that differ between the World Bank population data and the JHU CSSE data.
 %% Cell type:code id: tags:
 ``` python
 region_wb_jhu_map = {
     'Brunei Darussalam': 'Brunei',
     'Czech Republic': 'Czechia',
     'Egypt, Arab Rep.': 'Egypt',
     'Hong Kong SAR, China': 'Hong Kong SAR',
     'Iran, Islamic Rep.': 'Iran',
     'Korea, Rep.': 'Korea, South',
     'Macao SAR, China': 'Macao SAR',
     'Russian Federation': 'Russia',
     'Slovak Republic': 'Slovakia',
     'St. Martin (French part)': 'Saint Martin',
     'United States': 'US'
 }
 current_pop_ser = pop_df[['Country Name', '2018']].copy().replace(region_wb_jhu_map).set_index('Country Name')['2018']
 data_pop_ser = current_pop_ser[current_pop_ser.index.isin(frames_map['confirmed'].index.levels[0])]
 ```
 %% Cell type:code id: tags:
 ``` python
 # Use this to find the name in the series
 # current_pop_ser[current_pop_ser.index.str.contains('Czech')]
 ```
 %% Cell type:markdown id: tags:
 There are some regions that we cannot resolve, but we will just ignore these.
 %% Cell type:code id: tags:
 ``` python
 frames_map['confirmed'].loc[
    frames_map['confirmed'].index.levels[0].isin(data_pop_ser.index) == False
 ].iloc[:,-2:]
 ```
 %% Output
                                                         2020-03-13 00:00:00  \
    Country/Region                   Long      Lat
    Congo (Kinshasa)                  21.7587  -4.0383                     2
    Cruise Ship                       139.6380  35.4437                  696
    French Guiana                    -53.1258   3.9339                     5
    Guadeloupe                       -61.5510   16.2650                    1
    Guernsey                         -2.5800    49.4500                    0
    Holy See                          12.4534   41.9029                    1
    Jersey                           -2.1100    49.1900                    0
    Martinique                       -61.0242   14.6415                    3
    Reunion                           55.5364  -21.1151                    5
    Saint Lucia                      -60.9789   13.9094                    0
    Saint Vincent and the Grenadines -61.2872   12.9843                    0
    Taiwan*                           121.0000  23.7000                   50
    Venezuela                        -66.5897   6.4238                     0
    occupied Palestinian territory    35.2332   31.9522                    0
                                                         2020-03-14 00:00:00
    Country/Region                   Long      Lat
    Congo (Kinshasa)                  21.7587  -4.0383                     2
    Cruise Ship                       139.6380  35.4437                  696
    French Guiana                    -53.1258   3.9339                     5
    Guadeloupe                       -61.5510   16.2650                    1
    Guernsey                         -2.5800    49.4500                    1
    Holy See                          12.4534   41.9029                    1
    Jersey                           -2.1100    49.1900                    2
    Martinique                       -61.0242   14.6415                    9
    Reunion                           55.5364  -21.1151                    6
    Saint Lucia                      -60.9789   13.9094                    1
    Saint Vincent and the Grenadines -61.2872   12.9843                    1
    Taiwan*                           121.0000  23.7000                   53
    Venezuela                        -66.5897   6.4238                     2
    occupied Palestinian territory    35.2332   31.9522                    0
 %% Cell type:markdown id: tags:
 # Read in geodata to get additional population numbers
 %% Cell type:code id: tags:
 ``` python
 geodata_df = pd.read_csv(geodata_path).drop('Unnamed: 0', axis=1).set_index('name_jhu')
 ```
 %% Cell type:markdown id: tags:
 Add in populations for missing countries
 %% Cell type:code id: tags:
 ``` python
 missing_countries = frames_map['confirmed'].loc[
    frames_map['confirmed'].index.levels[0].isin(data_pop_ser.index) == False
 ].iloc[:,-2:].reset_index()['Country/Region']
 display(geodata_df.loc[geodata_df.index.isin(missing_countries)])
 data_pop_ser = data_pop_ser.append(geodata_df.loc[geodata_df.index.isin(missing_countries), 'pop_est'])
 ```
 %% Output
 %% Cell type:markdown id: tags:
 # Compute rates per 100,000 for regions
 %% Cell type:code id: tags:
 ``` python
 def cases_to_rates_df(df):
    per_100000_df = df.reset_index([1, 2], drop=True)
    per_100000_df = per_100000_df.div(data_pop_ser, 'index').mul(100000).dropna()
    per_100000_df.index.name = 'Country/Region'
    return per_100000_df
 def frames_to_rates(frames_map):
    return {k: cases_to_rates_df(v) for k,v in frames_map.items()}
 rates_map = frames_to_rates(frames_map)
 ```
 %% Cell type:code id: tags:
 ``` python
 if PAPERMILL_OUTPUT_PATH:
    for k, v in rates_map.items():
        out_path = os.path.join(out_folder, f"ts_rates_19-covid-{k}.csv")
        v.reset_index().to_csv(out_path)
 ```