{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Read in JHU CSSE data\n", "\n", "I will switch to [xarray](http://xarray.pydata.org/en/stable/), but ATM, it's easier like this..." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def read_jhu_covid_df(name):\n", " filename = f\"../data/covid-19_jhu-csse/time_series_19-covid-{name}.csv\"\n", " df = pd.read_csv(filename)\n", " df = df.set_index(['Province/State', 'Country/Region', 'Lat', 'Long'])\n", " df.columns = pd.to_datetime(df.columns)\n", " return df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "frames_map = {\n", " \"confirmed\": read_jhu_covid_df(\"Confirmed\"),\n", " \"deaths\": read_jhu_covid_df(\"Deaths\"),\n", " \"recovered\": read_jhu_covid_df(\"Recovered\")\n", "}" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def current_region_totals_df(frames_map):\n", " sers = [df.groupby(level='Country/Region').sum().iloc[:,-1].sort_values(ascending=False)\n", " for name, df in frames_map.items()]\n", " for name, ser in zip(frames_map, sers):\n", " ser.name = name\n", " return pd.concat(sers, axis=1)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>confirmed</th>\n", " <th>deaths</th>\n", " <th>recovered</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>Mainland China</th>\n", " <td>80757</td>\n", " <td>3136</td>\n", " <td>60106</td>\n", " </tr>\n", " <tr>\n", " <th>Italy</th>\n", " <td>10149</td>\n", " <td>631</td>\n", " <td>724</td>\n", " </tr>\n", " <tr>\n", " <th>Iran (Islamic Republic of)</th>\n", " <td>8042</td>\n", " <td>291</td>\n", " <td>2731</td>\n", " </tr>\n", " <tr>\n", " <th>Republic of Korea</th>\n", " <td>7513</td>\n", " <td>54</td>\n", " <td>247</td>\n", " </tr>\n", " <tr>\n", " <th>France</th>\n", " <td>1784</td>\n", " <td>33</td>\n", " <td>12</td>\n", " </tr>\n", " <tr>\n", " <th>Spain</th>\n", " <td>1695</td>\n", " <td>35</td>\n", " <td>32</td>\n", " </tr>\n", " <tr>\n", " <th>US</th>\n", " <td>1670</td>\n", " <td>56</td>\n", " <td>15</td>\n", " </tr>\n", " <tr>\n", " <th>Germany</th>\n", " <td>1457</td>\n", " <td>2</td>\n", " <td>18</td>\n", " </tr>\n", " <tr>\n", " <th>Others</th>\n", " <td>696</td>\n", " <td>6</td>\n", " <td>40</td>\n", " </tr>\n", " <tr>\n", " <th>Japan</th>\n", " <td>581</td>\n", " <td>10</td>\n", " <td>101</td>\n", " </tr>\n", " <tr>\n", " <th>Switzerland</th>\n", " <td>491</td>\n", " <td>3</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>Norway</th>\n", " <td>400</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>UK</th>\n", " <td>382</td>\n", " <td>6</td>\n", " <td>18</td>\n", " </tr>\n", " <tr>\n", " <th>Netherlands</th>\n", " <td>382</td>\n", " <td>4</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>Sweden</th>\n", " <td>355</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>Belgium</th>\n", " <td>267</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>Denmark</th>\n", " <td>262</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>Austria</th>\n", " <td>182</td>\n", " <td>0</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", " <th>Singapore</th>\n", " <td>160</td>\n", " <td>0</td>\n", " <td>78</td>\n", " </tr>\n", " <tr>\n", " <th>Malaysia</th>\n", " <td>129</td>\n", " <td>0</td>\n", " <td>24</td>\n", " </tr>\n", " <tr>\n", " <th>Hong Kong SAR</th>\n", " <td>120</td>\n", " <td>3</td>\n", " <td>65</td>\n", " </tr>\n", " <tr>\n", " <th>Bahrain</th>\n", " <td>110</td>\n", " <td>0</td>\n", " <td>22</td>\n", " </tr>\n", " <tr>\n", " <th>Australia</th>\n", " <td>107</td>\n", " <td>3</td>\n", " <td>21</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " confirmed deaths recovered\n", "Mainland China 80757 3136 60106\n", "Italy 10149 631 724\n", "Iran (Islamic Republic of) 8042 291 2731\n", "Republic of Korea 7513 54 247\n", "France 1784 33 12\n", "Spain 1695 35 32\n", "US 1670 56 15\n", "Germany 1457 2 18\n", "Others 696 6 40\n", "Japan 581 10 101\n", "Switzerland 491 3 3\n", "Norway 400 0 1\n", "UK 382 6 18\n", "Netherlands 382 4 0\n", "Sweden 355 0 1\n", "Belgium 267 0 1\n", "Denmark 262 0 1\n", "Austria 182 0 4\n", "Singapore 160 0 78\n", "Malaysia 129 0 24\n", "Hong Kong SAR 120 3 65\n", "Bahrain 110 0 22\n", "Australia 107 3 21" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "current_totals_df = current_region_totals_df(frames_map)\n", "current_totals_df[current_totals_df['confirmed'] > 100]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Read in World Bank data" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "import zipfile\n", "zf = zipfile.ZipFile(\"../data/worldbank/SP.POP.TOTL.zip\")\n", "pop_df = pd.read_csv(zf.open(\"API_SP.POP.TOTL_DS2_en_csv_v2_821007.csv\"), skiprows=4)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "There is 2018 pop data for all countries/regions except Eritrea" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Country Name</th>\n", " <th>Country Code</th>\n", " <th>Indicator Name</th>\n", " <th>Indicator Code</th>\n", " <th>1960</th>\n", " <th>1961</th>\n", " <th>1962</th>\n", " <th>1963</th>\n", " <th>1964</th>\n", " <th>1965</th>\n", " <th>...</th>\n", " <th>2011</th>\n", " <th>2012</th>\n", " <th>2013</th>\n", " <th>2014</th>\n", " <th>2015</th>\n", " <th>2016</th>\n", " <th>2017</th>\n", " <th>2018</th>\n", " <th>2019</th>\n", " <th>Unnamed: 64</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>67</th>\n", " <td>Eritrea</td>\n", " <td>ERI</td>\n", " <td>Population, total</td>\n", " <td>SP.POP.TOTL</td>\n", " <td>1007590.0</td>\n", " <td>1033328.0</td>\n", " <td>1060486.0</td>\n", " <td>1088854.0</td>\n", " <td>1118159.0</td>\n", " <td>1148189.0</td>\n", " <td>...</td>\n", " <td>3213972.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>108</th>\n", " <td>Not classified</td>\n", " <td>INX</td>\n", " <td>Population, total</td>\n", " <td>SP.POP.TOTL</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>2 rows × 65 columns</p>\n", "</div>" ], "text/plain": [ " Country Name Country Code Indicator Name Indicator Code 1960 \\\n", "67 Eritrea ERI Population, total SP.POP.TOTL 1007590.0 \n", "108 Not classified INX Population, total SP.POP.TOTL NaN \n", "\n", " 1961 1962 1963 1964 1965 ... 2011 \\\n", "67 1033328.0 1060486.0 1088854.0 1118159.0 1148189.0 ... 3213972.0 \n", "108 NaN NaN NaN NaN NaN ... NaN \n", "\n", " 2012 2013 2014 2015 2016 2017 2018 2019 Unnamed: 64 \n", "67 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "108 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "\n", "[2 rows x 65 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pop_df[pd.isna(pop_df['2018'])]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Fix the country/region names that differ between the World Bank population data and the JHU CSSE data." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "region_wb_jhu_map = {\n", " 'China': 'Mainland China',\n", " 'Iran, Islamic Rep.': 'Iran (Islamic Republic of)',\n", " 'Korea, Rep.': 'Republic of Korea',\n", " 'United States': 'US',\n", " 'United Kingdom': 'UK',\n", " 'Hong Kong SAR, China': 'Hong Kong SAR',\n", " 'Egypt, Arab Rep.': 'Egypt',\n", " 'Vietnam': 'Viet Nam',\n", " 'Macao SAR, China': 'Macao SAR',\n", " 'Slovak Republic': 'Slovakia',\n", " 'Moldova': 'Republic of Moldova',\n", " 'St. Martin (French part)': 'Saint Martin',\n", " 'Brunei Darussalam': 'Brunei'\n", "}\n", "current_pop_ser = pop_df[['Country Name', '2018']].copy().replace(region_wb_jhu_map).set_index('Country Name')['2018']\n", "data_pop_ser = current_pop_ser[current_pop_ser.index.isin(current_totals_df.index)]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "There are some regions that we cannot resolve, but we will just ignore these." ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>confirmed</th>\n", " <th>deaths</th>\n", " <th>recovered</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>Others</th>\n", " <td>696</td>\n", " <td>6</td>\n", " <td>40</td>\n", " </tr>\n", " <tr>\n", " <th>Taipei and environs</th>\n", " <td>47</td>\n", " <td>1</td>\n", " <td>17</td>\n", " </tr>\n", " <tr>\n", " <th>occupied Palestinian territory</th>\n", " <td>25</td>\n", " <td>0</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>French Guiana</th>\n", " <td>5</td>\n", " <td>0</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>Martinique</th>\n", " <td>2</td>\n", " <td>0</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>Holy See</th>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>Saint Barthelemy</th>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " confirmed deaths recovered\n", "Others 696 6 40\n", "Taipei and environs 47 1 17\n", "occupied Palestinian territory 25 0 0\n", "French Guiana 5 0 0\n", "Martinique 2 0 0\n", "Holy See 1 0 0\n", "Saint Barthelemy 1 0 0" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "current_totals_df[current_totals_df.index.isin(data_pop_ser.index) == False]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Compute rates per 100,000 for regions with more than 100 cases" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>confirmed</th>\n", " <th>deaths</th>\n", " <th>recovered</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>Italy</th>\n", " <td>16.794282</td>\n", " <td>1.044161</td>\n", " <td>1.198055</td>\n", " </tr>\n", " <tr>\n", " <th>Republic of Korea</th>\n", " <td>14.550136</td>\n", " <td>0.104580</td>\n", " <td>0.478355</td>\n", " </tr>\n", " <tr>\n", " <th>Iran (Islamic Republic of)</th>\n", " <td>9.831264</td>\n", " <td>0.355745</td>\n", " <td>3.338620</td>\n", " </tr>\n", " <tr>\n", " <th>Norway</th>\n", " <td>7.526810</td>\n", " <td>0.000000</td>\n", " <td>0.018817</td>\n", " </tr>\n", " <tr>\n", " <th>Bahrain</th>\n", " <td>7.008874</td>\n", " <td>0.000000</td>\n", " <td>1.401775</td>\n", " </tr>\n", " <tr>\n", " <th>Mainland China</th>\n", " <td>5.798468</td>\n", " <td>0.225169</td>\n", " <td>4.315697</td>\n", " </tr>\n", " <tr>\n", " <th>Switzerland</th>\n", " <td>5.765250</td>\n", " <td>0.035226</td>\n", " <td>0.035226</td>\n", " </tr>\n", " <tr>\n", " <th>Denmark</th>\n", " <td>4.519231</td>\n", " <td>0.000000</td>\n", " <td>0.017249</td>\n", " </tr>\n", " <tr>\n", " <th>Spain</th>\n", " <td>3.627705</td>\n", " <td>0.074908</td>\n", " <td>0.068488</td>\n", " </tr>\n", " <tr>\n", " <th>Sweden</th>\n", " <td>3.486143</td>\n", " <td>0.000000</td>\n", " <td>0.009820</td>\n", " </tr>\n", " <tr>\n", " <th>Singapore</th>\n", " <td>2.837546</td>\n", " <td>0.000000</td>\n", " <td>1.383303</td>\n", " </tr>\n", " <tr>\n", " <th>France</th>\n", " <td>2.663194</td>\n", " <td>0.049263</td>\n", " <td>0.017914</td>\n", " </tr>\n", " <tr>\n", " <th>Belgium</th>\n", " <td>2.337580</td>\n", " <td>0.000000</td>\n", " <td>0.008755</td>\n", " </tr>\n", " <tr>\n", " <th>Netherlands</th>\n", " <td>2.216932</td>\n", " <td>0.023214</td>\n", " <td>0.000000</td>\n", " </tr>\n", " <tr>\n", " <th>Austria</th>\n", " <td>2.057186</td>\n", " <td>0.000000</td>\n", " <td>0.045213</td>\n", " </tr>\n", " <tr>\n", " <th>Germany</th>\n", " <td>1.756947</td>\n", " <td>0.002412</td>\n", " <td>0.021706</td>\n", " </tr>\n", " <tr>\n", " <th>Hong Kong SAR</th>\n", " <td>1.610522</td>\n", " <td>0.040263</td>\n", " <td>0.872366</td>\n", " </tr>\n", " <tr>\n", " <th>UK</th>\n", " <td>0.574531</td>\n", " <td>0.009024</td>\n", " <td>0.027072</td>\n", " </tr>\n", " <tr>\n", " <th>US</th>\n", " <td>0.510442</td>\n", " <td>0.017117</td>\n", " <td>0.004585</td>\n", " </tr>\n", " <tr>\n", " <th>Japan</th>\n", " <td>0.459183</td>\n", " <td>0.007903</td>\n", " <td>0.079824</td>\n", " </tr>\n", " <tr>\n", " <th>Australia</th>\n", " <td>0.428131</td>\n", " <td>0.012004</td>\n", " <td>0.084026</td>\n", " </tr>\n", " <tr>\n", " <th>Malaysia</th>\n", " <td>0.409153</td>\n", " <td>0.000000</td>\n", " <td>0.076121</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " confirmed deaths recovered\n", "Italy 16.794282 1.044161 1.198055\n", "Republic of Korea 14.550136 0.104580 0.478355\n", "Iran (Islamic Republic of) 9.831264 0.355745 3.338620\n", "Norway 7.526810 0.000000 0.018817\n", "Bahrain 7.008874 0.000000 1.401775\n", "Mainland China 5.798468 0.225169 4.315697\n", "Switzerland 5.765250 0.035226 0.035226\n", "Denmark 4.519231 0.000000 0.017249\n", "Spain 3.627705 0.074908 0.068488\n", "Sweden 3.486143 0.000000 0.009820\n", "Singapore 2.837546 0.000000 1.383303\n", "France 2.663194 0.049263 0.017914\n", "Belgium 2.337580 0.000000 0.008755\n", "Netherlands 2.216932 0.023214 0.000000\n", "Austria 2.057186 0.000000 0.045213\n", "Germany 1.756947 0.002412 0.021706\n", "Hong Kong SAR 1.610522 0.040263 0.872366\n", "UK 0.574531 0.009024 0.027072\n", "US 0.510442 0.017117 0.004585\n", "Japan 0.459183 0.007903 0.079824\n", "Australia 0.428131 0.012004 0.084026\n", "Malaysia 0.409153 0.000000 0.076121" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "current_per_100000_df = current_totals_df[current_totals_df['confirmed'] > 100]\n", "current_per_100000_df = current_per_100000_df.div(data_pop_ser, 'index').mul(100000).dropna()\n", "current_per_100000_df.sort_values('confirmed', ascending=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }