Play.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Read in JHU CSSE data\n",
    "\n",
    "I will switch to [xarray](http://xarray.pydata.org/en/stable/), but ATM, it's easier like this..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_jhu_covid_df(name):\n",
    "    filename = f\"../data/covid-19_jhu-csse/time_series_19-covid-{name}.csv\"\n",
    "    df = pd.read_csv(filename)\n",
    "    df = df.set_index(['Province/State', 'Country/Region', 'Lat', 'Long'])\n",
    "    df.columns = pd.to_datetime(df.columns)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "frames_map = {\n",
    "    \"confirmed\": read_jhu_covid_df(\"Confirmed\"),\n",
    "    \"deaths\": read_jhu_covid_df(\"Deaths\"),\n",
    "    \"recovered\": read_jhu_covid_df(\"Recovered\")\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "def current_region_totals_df(frames_map):\n",
    "    sers = [df.groupby(level='Country/Region').sum().iloc[:,-1].sort_values(ascending=False)\n",
    "            for name, df in frames_map.items()]\n",
    "    for name, ser in zip(frames_map, sers):\n",
    "        ser.name = name\n",
    "    return pd.concat(sers, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>confirmed</th>\n",
       "      <th>deaths</th>\n",
       "      <th>recovered</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Mainland China</th>\n",
       "      <td>80757</td>\n",
       "      <td>3136</td>\n",
       "      <td>60106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Italy</th>\n",
       "      <td>10149</td>\n",
       "      <td>631</td>\n",
       "      <td>724</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Iran (Islamic Republic of)</th>\n",
       "      <td>8042</td>\n",
       "      <td>291</td>\n",
       "      <td>2731</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Republic of Korea</th>\n",
       "      <td>7513</td>\n",
       "      <td>54</td>\n",
       "      <td>247</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>France</th>\n",
       "      <td>1784</td>\n",
       "      <td>33</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Spain</th>\n",
       "      <td>1695</td>\n",
       "      <td>35</td>\n",
       "      <td>32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>US</th>\n",
       "      <td>1670</td>\n",
       "      <td>56</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Germany</th>\n",
       "      <td>1457</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Others</th>\n",
       "      <td>696</td>\n",
       "      <td>6</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Japan</th>\n",
       "      <td>581</td>\n",
       "      <td>10</td>\n",
       "      <td>101</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Switzerland</th>\n",
       "      <td>491</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Norway</th>\n",
       "      <td>400</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>UK</th>\n",
       "      <td>382</td>\n",
       "      <td>6</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Netherlands</th>\n",
       "      <td>382</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sweden</th>\n",
       "      <td>355</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Belgium</th>\n",
       "      <td>267</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Denmark</th>\n",
       "      <td>262</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Austria</th>\n",
       "      <td>182</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Singapore</th>\n",
       "      <td>160</td>\n",
       "      <td>0</td>\n",
       "      <td>78</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Malaysia</th>\n",
       "      <td>129</td>\n",
       "      <td>0</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Hong Kong SAR</th>\n",
       "      <td>120</td>\n",
       "      <td>3</td>\n",
       "      <td>65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Bahrain</th>\n",
       "      <td>110</td>\n",
       "      <td>0</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Australia</th>\n",
       "      <td>107</td>\n",
       "      <td>3</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                            confirmed  deaths  recovered\n",
       "Mainland China                  80757    3136      60106\n",
       "Italy                           10149     631        724\n",
       "Iran (Islamic Republic of)       8042     291       2731\n",
       "Republic of Korea                7513      54        247\n",
       "France                           1784      33         12\n",
       "Spain                            1695      35         32\n",
       "US                               1670      56         15\n",
       "Germany                          1457       2         18\n",
       "Others                            696       6         40\n",
       "Japan                             581      10        101\n",
       "Switzerland                       491       3          3\n",
       "Norway                            400       0          1\n",
       "UK                                382       6         18\n",
       "Netherlands                       382       4          0\n",
       "Sweden                            355       0          1\n",
       "Belgium                           267       0          1\n",
       "Denmark                           262       0          1\n",
       "Austria                           182       0          4\n",
       "Singapore                         160       0         78\n",
       "Malaysia                          129       0         24\n",
       "Hong Kong SAR                     120       3         65\n",
       "Bahrain                           110       0         22\n",
       "Australia                         107       3         21"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "current_totals_df = current_region_totals_df(frames_map)\n",
    "current_totals_df[current_totals_df['confirmed'] > 100]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Read in World Bank data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "import zipfile\n",
    "zf = zipfile.ZipFile(\"../data/worldbank/SP.POP.TOTL.zip\")\n",
    "pop_df = pd.read_csv(zf.open(\"API_SP.POP.TOTL_DS2_en_csv_v2_821007.csv\"), skiprows=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Country Name</th>\n",
       "      <th>Country Code</th>\n",
       "      <th>Indicator Name</th>\n",
       "      <th>Indicator Code</th>\n",
       "      <th>1960</th>\n",
       "      <th>1961</th>\n",
       "      <th>1962</th>\n",
       "      <th>1963</th>\n",
       "      <th>1964</th>\n",
       "      <th>1965</th>\n",
       "      <th>...</th>\n",
       "      <th>2011</th>\n",
       "      <th>2012</th>\n",
       "      <th>2013</th>\n",
       "      <th>2014</th>\n",
       "      <th>2015</th>\n",
       "      <th>2016</th>\n",
       "      <th>2017</th>\n",
       "      <th>2018</th>\n",
       "      <th>2019</th>\n",
       "      <th>Unnamed: 64</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>Eritrea</td>\n",
       "      <td>ERI</td>\n",
       "      <td>Population, total</td>\n",
       "      <td>SP.POP.TOTL</td>\n",
       "      <td>1007590.0</td>\n",
       "      <td>1033328.0</td>\n",
       "      <td>1060486.0</td>\n",
       "      <td>1088854.0</td>\n",
       "      <td>1118159.0</td>\n",
       "      <td>1148189.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3213972.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>108</th>\n",
       "      <td>Not classified</td>\n",
       "      <td>INX</td>\n",
       "      <td>Population, total</td>\n",
       "      <td>SP.POP.TOTL</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 65 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       Country Name Country Code     Indicator Name Indicator Code       1960  \\\n",
       "67          Eritrea          ERI  Population, total    SP.POP.TOTL  1007590.0   \n",
       "108  Not classified          INX  Population, total    SP.POP.TOTL        NaN   \n",
       "\n",
       "          1961       1962       1963       1964       1965  ...       2011  \\\n",
       "67   1033328.0  1060486.0  1088854.0  1118159.0  1148189.0  ...  3213972.0   \n",
       "108        NaN        NaN        NaN        NaN        NaN  ...        NaN   \n",
       "\n",
       "     2012  2013  2014  2015  2016  2017  2018  2019  Unnamed: 64  \n",
       "67    NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN          NaN  \n",
       "108   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN          NaN  \n",
       "\n",
       "[2 rows x 65 columns]"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# There is 2018 pop data for all countries/regions except Eritrea\n",
    "pop_df[pd.isna(pop_df['2018'])]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Fix the country/region names that differ"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [],
   "source": [
    "region_wb_jhu_map = {\n",
    "    'China': 'Mainland China',\n",
    "     'Iran, Islamic Rep.': 'Iran (Islamic Republic of)',\n",
    "     'Korea, Rep.': 'Republic of Korea',\n",
    "     'United States': 'US',\n",
    "     'United Kingdom': 'UK',\n",
    "     'Hong Kong SAR, China': 'Hong Kong SAR',\n",
    "     'Egypt, Arab Rep.': 'Egypt',\n",
    "     'Vietnam': 'Viet Nam',\n",
    "     'Macao SAR, China': 'Macao SAR',\n",
    "     'Slovak Republic': 'Slovakia',\n",
    "     'Moldova': 'Republic of Moldova',\n",
    "     'St. Martin (French part)': 'Saint Martin',\n",
    "     'Brunei Darussalam': 'Brunei'\n",
    "}\n",
    "current_pop_ser = pop_df[['Country Name', '2018']].copy().replace(region_wb_jhu_map).set_index('Country Name')\n",
    "data_pop_ser = current_pop_ser[current_pop_ser.index.isin(current_totals_df.index)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "There are some regions that we cannot resolve, but we will just ignore these."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>confirmed</th>\n",
       "      <th>deaths</th>\n",
       "      <th>recovered</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Others</th>\n",
       "      <td>696</td>\n",
       "      <td>6</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Taipei and environs</th>\n",
       "      <td>47</td>\n",
       "      <td>1</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>occupied Palestinian territory</th>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>French Guiana</th>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Martinique</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Holy See</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Saint Barthelemy</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                confirmed  deaths  recovered\n",
       "Others                                696       6         40\n",
       "Taipei and environs                    47       1         17\n",
       "occupied Palestinian territory         25       0          0\n",
       "French Guiana                           5       0          0\n",
       "Martinique                              2       0          0\n",
       "Holy See                                1       0          0\n",
       "Saint Barthelemy                        1       0          0"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "current_totals_df[current_totals_df.index.isin(data_pop_ser.index) == False]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}