feat: notebook to convert values to rates

3c144729 · Chandrasekhar Ramakrishnan · f9c316f6 · 3c144729 · 3c144729
Commit 3c144729 authored 5 years ago by Chandrasekhar Ramakrishnan
--- a/notebooks/Play.ipynb
+++ b/notebooks/Play.ipynb
@@ -34,7 +34,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -47,7 +47,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -61,7 +61,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
@@ -260,7 +260,7 @@
       "Australia                         107       3         21"
      ]
     },
-     "execution_count": 23,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -279,7 +279,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -288,9 +288,16 @@
    "pop_df = pd.read_csv(zf.open(\"API_SP.POP.TOTL_DS2_en_csv_v2_821007.csv\"), skiprows=4)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "There is 2018 pop data for all countries/regions except Eritrea"
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
@@ -407,13 +414,12 @@
       "[2 rows x 65 columns]"
      ]
     },
-     "execution_count": 53,
+     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "# There is 2018 pop data for all countries/regions except Eritrea\n",
    "pop_df[pd.isna(pop_df['2018'])]"
   ]
  },
@@ -421,12 +427,12 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Fix the country/region names that differ"
+    "Fix the country/region names that differ between the World Bank population data and the JHU CSSE data."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 111,
+   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -445,7 +451,7 @@
    "     'St. Martin (French part)': 'Saint Martin',\n",
    "     'Brunei Darussalam': 'Brunei'\n",
    "}\n",
-    "current_pop_ser = pop_df[['Country Name', '2018']].copy().replace(region_wb_jhu_map).set_index('Country Name')\n",
+    "current_pop_ser = pop_df[['Country Name', '2018']].copy().replace(region_wb_jhu_map).set_index('Country Name')['2018']\n",
    "data_pop_ser = current_pop_ser[current_pop_ser.index.isin(current_totals_df.index)]"
   ]
  },
@@ -458,7 +464,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 112,
+   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
@@ -545,7 +551,7 @@
       "Saint Barthelemy                        1       0          0"
      ]
     },
-     "execution_count": 112,
+     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -554,6 +560,218 @@
    "current_totals_df[current_totals_df.index.isin(data_pop_ser.index) == False]"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Compute rates per 100,000 for regions with more than 100 cases"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>confirmed</th>\n",
+       "      <th>deaths</th>\n",
+       "      <th>recovered</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Italy</th>\n",
+       "      <td>16.794282</td>\n",
+       "      <td>1.044161</td>\n",
+       "      <td>1.198055</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Republic of Korea</th>\n",
+       "      <td>14.550136</td>\n",
+       "      <td>0.104580</td>\n",
+       "      <td>0.478355</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Iran (Islamic Republic of)</th>\n",
+       "      <td>9.831264</td>\n",
+       "      <td>0.355745</td>\n",
+       "      <td>3.338620</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Norway</th>\n",
+       "      <td>7.526810</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.018817</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Bahrain</th>\n",
+       "      <td>7.008874</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>1.401775</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Mainland China</th>\n",
+       "      <td>5.798468</td>\n",
+       "      <td>0.225169</td>\n",
+       "      <td>4.315697</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Switzerland</th>\n",
+       "      <td>5.765250</td>\n",
+       "      <td>0.035226</td>\n",
+       "      <td>0.035226</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Denmark</th>\n",
+       "      <td>4.519231</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.017249</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Spain</th>\n",
+       "      <td>3.627705</td>\n",
+       "      <td>0.074908</td>\n",
+       "      <td>0.068488</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Sweden</th>\n",
+       "      <td>3.486143</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.009820</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Singapore</th>\n",
+       "      <td>2.837546</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>1.383303</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>France</th>\n",
+       "      <td>2.663194</td>\n",
+       "      <td>0.049263</td>\n",
+       "      <td>0.017914</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Belgium</th>\n",
+       "      <td>2.337580</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.008755</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Netherlands</th>\n",
+       "      <td>2.216932</td>\n",
+       "      <td>0.023214</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Austria</th>\n",
+       "      <td>2.057186</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.045213</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Germany</th>\n",
+       "      <td>1.756947</td>\n",
+       "      <td>0.002412</td>\n",
+       "      <td>0.021706</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Hong Kong SAR</th>\n",
+       "      <td>1.610522</td>\n",
+       "      <td>0.040263</td>\n",
+       "      <td>0.872366</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>UK</th>\n",
+       "      <td>0.574531</td>\n",
+       "      <td>0.009024</td>\n",
+       "      <td>0.027072</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>US</th>\n",
+       "      <td>0.510442</td>\n",
+       "      <td>0.017117</td>\n",
+       "      <td>0.004585</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Japan</th>\n",
+       "      <td>0.459183</td>\n",
+       "      <td>0.007903</td>\n",
+       "      <td>0.079824</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Australia</th>\n",
+       "      <td>0.428131</td>\n",
+       "      <td>0.012004</td>\n",
+       "      <td>0.084026</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Malaysia</th>\n",
+       "      <td>0.409153</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.076121</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                            confirmed    deaths  recovered\n",
+       "Italy                       16.794282  1.044161   1.198055\n",
+       "Republic of Korea           14.550136  0.104580   0.478355\n",
+       "Iran (Islamic Republic of)   9.831264  0.355745   3.338620\n",
+       "Norway                       7.526810  0.000000   0.018817\n",
+       "Bahrain                      7.008874  0.000000   1.401775\n",
+       "Mainland China               5.798468  0.225169   4.315697\n",
+       "Switzerland                  5.765250  0.035226   0.035226\n",
+       "Denmark                      4.519231  0.000000   0.017249\n",
+       "Spain                        3.627705  0.074908   0.068488\n",
+       "Sweden                       3.486143  0.000000   0.009820\n",
+       "Singapore                    2.837546  0.000000   1.383303\n",
+       "France                       2.663194  0.049263   0.017914\n",
+       "Belgium                      2.337580  0.000000   0.008755\n",
+       "Netherlands                  2.216932  0.023214   0.000000\n",
+       "Austria                      2.057186  0.000000   0.045213\n",
+       "Germany                      1.756947  0.002412   0.021706\n",
+       "Hong Kong SAR                1.610522  0.040263   0.872366\n",
+       "UK                           0.574531  0.009024   0.027072\n",
+       "US                           0.510442  0.017117   0.004585\n",
+       "Japan                        0.459183  0.007903   0.079824\n",
+       "Australia                    0.428131  0.012004   0.084026\n",
+       "Malaysia                     0.409153  0.000000   0.076121"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "current_per_100000_df = current_totals_df[current_totals_df['confirmed'] > 100]\n",
+    "current_per_100000_df = current_per_100000_df.div(data_pop_ser, 'index').mul(100000).dropna()\n",
+    "current_per_100000_df.sort_values('confirmed', ascending=False)"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,

 %% Cell type:code id: tags:

 ``` python
 import pandas as pd
 ```

 %% Cell type:markdown id: tags:

 # Read in JHU CSSE data

 I will switch to [xarray](http://xarray.pydata.org/en/stable/), but ATM, it's easier like this...

 %% Cell type:code id: tags:

 ``` python
 def read_jhu_covid_df(name):
    filename = f"../data/covid-19_jhu-csse/time_series_19-covid-{name}.csv"
    df = pd.read_csv(filename)
    df = df.set_index(['Province/State', 'Country/Region', 'Lat', 'Long'])
    df.columns = pd.to_datetime(df.columns)
    return df
 ```

 %% Cell type:code id: tags:

 ``` python
 frames_map = {
    "confirmed": read_jhu_covid_df("Confirmed"),
    "deaths": read_jhu_covid_df("Deaths"),
    "recovered": read_jhu_covid_df("Recovered")
 }
 ```

 %% Cell type:code id: tags:

 ``` python
 def current_region_totals_df(frames_map):
    sers = [df.groupby(level='Country/Region').sum().iloc[:,-1].sort_values(ascending=False)
            for name, df in frames_map.items()]
    for name, ser in zip(frames_map, sers):
        ser.name = name
    return pd.concat(sers, axis=1)
 ```

 %% Cell type:code id: tags:

 ``` python
 current_totals_df = current_region_totals_df(frames_map)
 current_totals_df[current_totals_df['confirmed'] > 100]
 ```

 %% Output

                                confirmed  deaths  recovered
    Mainland China                  80757    3136      60106
    Italy                           10149     631        724
    Iran (Islamic Republic of)       8042     291       2731
    Republic of Korea                7513      54        247
    France                           1784      33         12
    Spain                            1695      35         32
    US                               1670      56         15
    Germany                          1457       2         18
    Others                            696       6         40
    Japan                             581      10        101
    Switzerland                       491       3          3
    Norway                            400       0          1
    UK                                382       6         18
    Netherlands                       382       4          0
    Sweden                            355       0          1
    Belgium                           267       0          1
    Denmark                           262       0          1
    Austria                           182       0          4
    Singapore                         160       0         78
    Malaysia                          129       0         24
    Hong Kong SAR                     120       3         65
    Bahrain                           110       0         22
    Australia                         107       3         21

 %% Cell type:markdown id: tags:

 # Read in World Bank data

 %% Cell type:code id: tags:

 ``` python
 import zipfile
 zf = zipfile.ZipFile("../data/worldbank/SP.POP.TOTL.zip")
 pop_df = pd.read_csv(zf.open("API_SP.POP.TOTL_DS2_en_csv_v2_821007.csv"), skiprows=4)
 ```

+%% Cell type:markdown id: tags:
+
+There is 2018 pop data for all countries/regions except Eritrea
+
 %% Cell type:code id: tags:

 ``` python
-# There is 2018 pop data for all countries/regions except Eritrea
 pop_df[pd.isna(pop_df['2018'])]
 ```

 %% Output

           Country Name Country Code     Indicator Name Indicator Code       1960  \
    67          Eritrea          ERI  Population, total    SP.POP.TOTL  1007590.0
    108  Not classified          INX  Population, total    SP.POP.TOTL        NaN
    
              1961       1962       1963       1964       1965  ...       2011  \
    67   1033328.0  1060486.0  1088854.0  1118159.0  1148189.0  ...  3213972.0
    108        NaN        NaN        NaN        NaN        NaN  ...        NaN
    
         2012  2013  2014  2015  2016  2017  2018  2019  Unnamed: 64
    67    NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN          NaN
    108   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN          NaN
    
    [2 rows x 65 columns]

 %% Cell type:markdown id: tags:

-Fix the country/region names that differ
+Fix the country/region names that differ between the World Bank population data and the JHU CSSE data.

 %% Cell type:code id: tags:

 ``` python
 region_wb_jhu_map = {
    'China': 'Mainland China',
     'Iran, Islamic Rep.': 'Iran (Islamic Republic of)',
     'Korea, Rep.': 'Republic of Korea',
     'United States': 'US',
     'United Kingdom': 'UK',
     'Hong Kong SAR, China': 'Hong Kong SAR',
     'Egypt, Arab Rep.': 'Egypt',
     'Vietnam': 'Viet Nam',
     'Macao SAR, China': 'Macao SAR',
     'Slovak Republic': 'Slovakia',
     'Moldova': 'Republic of Moldova',
     'St. Martin (French part)': 'Saint Martin',
     'Brunei Darussalam': 'Brunei'
 }
-current_pop_ser = pop_df[['Country Name', '2018']].copy().replace(region_wb_jhu_map).set_index('Country Name')
+current_pop_ser = pop_df[['Country Name', '2018']].copy().replace(region_wb_jhu_map).set_index('Country Name')['2018']
 data_pop_ser = current_pop_ser[current_pop_ser.index.isin(current_totals_df.index)]
 ```

 %% Cell type:markdown id: tags:

 There are some regions that we cannot resolve, but we will just ignore these.

 %% Cell type:code id: tags:

 ``` python
 current_totals_df[current_totals_df.index.isin(data_pop_ser.index) == False]
 ```

 %% Output

                                    confirmed  deaths  recovered
    Others                                696       6         40
    Taipei and environs                    47       1         17
    occupied Palestinian territory         25       0          0
    French Guiana                           5       0          0
    Martinique                              2       0          0
    Holy See                                1       0          0
    Saint Barthelemy                        1       0          0

+%% Cell type:markdown id: tags:
+
+# Compute rates per 100,000 for regions with more than 100 cases
+
+%% Cell type:code id: tags:
+
+``` python
+current_per_100000_df = current_totals_df[current_totals_df['confirmed'] > 100]
+current_per_100000_df = current_per_100000_df.div(data_pop_ser, 'index').mul(100000).dropna()
+current_per_100000_df.sort_values('confirmed', ascending=False)
+```
+
+%% Output
+
+                                confirmed    deaths  recovered
+    Italy                       16.794282  1.044161   1.198055
+    Republic of Korea           14.550136  0.104580   0.478355
+    Iran (Islamic Republic of)   9.831264  0.355745   3.338620
+    Norway                       7.526810  0.000000   0.018817
+    Bahrain                      7.008874  0.000000   1.401775
+    Mainland China               5.798468  0.225169   4.315697
+    Switzerland                  5.765250  0.035226   0.035226
+    Denmark                      4.519231  0.000000   0.017249
+    Spain                        3.627705  0.074908   0.068488
+    Sweden                       3.486143  0.000000   0.009820
+    Singapore                    2.837546  0.000000   1.383303
+    France                       2.663194  0.049263   0.017914
+    Belgium                      2.337580  0.000000   0.008755
+    Netherlands                  2.216932  0.023214   0.000000
+    Austria                      2.057186  0.000000   0.045213
+    Germany                      1.756947  0.002412   0.021706
+    Hong Kong SAR                1.610522  0.040263   0.872366
+    UK                           0.574531  0.009024   0.027072
+    US                           0.510442  0.017117   0.004585
+    Japan                        0.459183  0.007903   0.079824
+    Australia                    0.428131  0.012004   0.084026
+    Malaysia                     0.409153  0.000000   0.076121
+
 %% Cell type:code id: tags:

 ``` python
 ```

--- a/notebooks/ToRates.ipynb
+++ b/notebooks/ToRates.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Convert Series to Rates per 100,000"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "ts_folder = \"../data/covid-19_jhu-csse/\"\n",
+    "wb_path = \"../data/worldbank/SP.POP.TOTL.zip\"\n",
+    "out_folder = None\n",
+    "PAPERMILL_OUTPUT_PATH = None"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   },
+   "source": [
+    "## Read in JHU CSSE data\n",
+    "\n",
+    "I will switch to [xarray](http://xarray.pydata.org/en/stable/), but ATM, it's easier like this..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_jhu_covid_region_df(name):\n",
+    "    filename = os.path.join(ts_folder, f\"time_series_19-covid-{name}.csv\")\n",
+    "    df = pd.read_csv(filename)\n",
+    "    df = df.set_index(['Country/Region', 'Province/State', 'Lat', 'Long'])\n",
+    "    df.columns = pd.to_datetime(df.columns)\n",
+    "    region_df = df.groupby(level='Country/Region').sum()\n",
+    "    loc_df = df.reset_index([2,3]).groupby(level='Country/Region').mean()[['Long', 'Lat']]\n",
+    "    return region_df.join(loc_df).set_index(['Long', 'Lat'], append=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "frames_map = {\n",
+    "    \"confirmed\": read_jhu_covid_region_df(\"Confirmed\"),\n",
+    "    \"deaths\": read_jhu_covid_region_df(\"Deaths\"),\n",
+    "    \"recovered\": read_jhu_covid_region_df(\"Recovered\")\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "frames_map['confirmed'].sort_values(frames_map['confirmed'].columns[-1], ascending=False).head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Read in World Bank data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import zipfile\n",
+    "zf = zipfile.ZipFile(wb_path)\n",
+    "pop_df = pd.read_csv(zf.open(\"API_SP.POP.TOTL_DS2_en_csv_v2_821007.csv\"), skiprows=4)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "There is 2018 pop data for all countries/regions except Eritrea"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pop_df[pd.isna(pop_df['2018'])]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Fix the country/region names that differ between the World Bank population data and the JHU CSSE data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "region_wb_jhu_map = {\n",
+    "    'China': 'Mainland China',\n",
+    "     'Iran, Islamic Rep.': 'Iran (Islamic Republic of)',\n",
+    "     'Korea, Rep.': 'Republic of Korea',\n",
+    "     'United States': 'US',\n",
+    "     'United Kingdom': 'UK',\n",
+    "     'Hong Kong SAR, China': 'Hong Kong SAR',\n",
+    "     'Egypt, Arab Rep.': 'Egypt',\n",
+    "     'Vietnam': 'Viet Nam',\n",
+    "     'Macao SAR, China': 'Macao SAR',\n",
+    "     'Slovak Republic': 'Slovakia',\n",
+    "     'Moldova': 'Republic of Moldova',\n",
+    "     'St. Martin (French part)': 'Saint Martin',\n",
+    "     'Brunei Darussalam': 'Brunei'\n",
+    "}\n",
+    "current_pop_ser = pop_df[['Country Name', '2018']].copy().replace(region_wb_jhu_map).set_index('Country Name')['2018']\n",
+    "data_pop_ser = current_pop_ser[current_pop_ser.index.isin(frames_map['confirmed'].index.levels[0])]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "There are some regions that we cannot resolve, but we will just ignore these."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Compute rates per 100,000 for regions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def cases_to_rates_df(df):\n",
+    "    per_100000_df = df.reset_index([1, 2], drop=True)\n",
+    "    per_100000_df = per_100000_df.div(data_pop_ser, 'index').mul(100000).dropna()\n",
+    "    per_100000_df.index.name = 'Country/Region'\n",
+    "    return per_100000_df\n",
+    "    \n",
+    "def frames_to_rates(frames_map):\n",
+    "    return {k: cases_to_rates_df(v) for k,v in frames_map.items()}\n",
+    "\n",
+    "\n",
+    "rates_map = frames_to_rates(frames_map)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if PAPERMILL_OUTPUT_PATH:\n",
+    "    for k, v in rates_map.items():\n",
+    "        out_path = os.path.join(out_folder, f\"ts_rates_19-covid-{k}.csv\")\n",
+    "        v.reset_index().to_csv(out_path)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
+%% Cell type:markdown id: tags:
+
+# Convert Series to Rates per 100,000
+
+%% Cell type:code id: tags:
+
+``` python
+import pandas as pd
+import os
+```
+
+%% Cell type:code id: tags:parameters
+
+``` python
+ts_folder = "../data/covid-19_jhu-csse/"
+wb_path = "../data/worldbank/SP.POP.TOTL.zip"
+out_folder = None
+PAPERMILL_OUTPUT_PATH = None
+```
+
+%% Cell type:markdown id: tags:parameters
+
+## Read in JHU CSSE data
+
+I will switch to [xarray](http://xarray.pydata.org/en/stable/), but ATM, it's easier like this...
+
+%% Cell type:code id: tags:
+
+``` python
+def read_jhu_covid_region_df(name):
+    filename = os.path.join(ts_folder, f"time_series_19-covid-{name}.csv")
+    df = pd.read_csv(filename)
+    df = df.set_index(['Country/Region', 'Province/State', 'Lat', 'Long'])
+    df.columns = pd.to_datetime(df.columns)
+    region_df = df.groupby(level='Country/Region').sum()
+    loc_df = df.reset_index([2,3]).groupby(level='Country/Region').mean()[['Long', 'Lat']]
+    return region_df.join(loc_df).set_index(['Long', 'Lat'], append=True)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+frames_map = {
+    "confirmed": read_jhu_covid_region_df("Confirmed"),
+    "deaths": read_jhu_covid_region_df("Deaths"),
+    "recovered": read_jhu_covid_region_df("Recovered")
+}
+```
+
+%% Cell type:code id: tags:
+
+``` python
+frames_map['confirmed'].sort_values(frames_map['confirmed'].columns[-1], ascending=False).head()
+```
+
+%% Cell type:markdown id: tags:
+
+# Read in World Bank data
+
+%% Cell type:code id: tags:
+
+``` python
+import zipfile
+zf = zipfile.ZipFile(wb_path)
+pop_df = pd.read_csv(zf.open("API_SP.POP.TOTL_DS2_en_csv_v2_821007.csv"), skiprows=4)
+```
+
+%% Cell type:markdown id: tags:
+
+There is 2018 pop data for all countries/regions except Eritrea
+
+%% Cell type:code id: tags:
+
+``` python
+pop_df[pd.isna(pop_df['2018'])]
+```
+
+%% Cell type:markdown id: tags:
+
+Fix the country/region names that differ between the World Bank population data and the JHU CSSE data.
+
+%% Cell type:code id: tags:
+
+``` python
+region_wb_jhu_map = {
+    'China': 'Mainland China',
+     'Iran, Islamic Rep.': 'Iran (Islamic Republic of)',
+     'Korea, Rep.': 'Republic of Korea',
+     'United States': 'US',
+     'United Kingdom': 'UK',
+     'Hong Kong SAR, China': 'Hong Kong SAR',
+     'Egypt, Arab Rep.': 'Egypt',
+     'Vietnam': 'Viet Nam',
+     'Macao SAR, China': 'Macao SAR',
+     'Slovak Republic': 'Slovakia',
+     'Moldova': 'Republic of Moldova',
+     'St. Martin (French part)': 'Saint Martin',
+     'Brunei Darussalam': 'Brunei'
+}
+current_pop_ser = pop_df[['Country Name', '2018']].copy().replace(region_wb_jhu_map).set_index('Country Name')['2018']
+data_pop_ser = current_pop_ser[current_pop_ser.index.isin(frames_map['confirmed'].index.levels[0])]
+```
+
+%% Cell type:markdown id: tags:
+
+There are some regions that we cannot resolve, but we will just ignore these.
+
+%% Cell type:markdown id: tags:
+
+# Compute rates per 100,000 for regions
+
+%% Cell type:code id: tags:
+
+``` python
+def cases_to_rates_df(df):
+    per_100000_df = df.reset_index([1, 2], drop=True)
+    per_100000_df = per_100000_df.div(data_pop_ser, 'index').mul(100000).dropna()
+    per_100000_df.index.name = 'Country/Region'
+    return per_100000_df
+
+def frames_to_rates(frames_map):
+    return {k: cases_to_rates_df(v) for k,v in frames_map.items()}
+
+
+rates_map = frames_to_rates(frames_map)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+if PAPERMILL_OUTPUT_PATH:
+    for k, v in rates_map.items():
+        out_path = os.path.join(out_folder, f"ts_rates_19-covid-{k}.csv")
+        v.reset_index().to_csv(out_path)
+```