diff --git a/notebooks/ToRates.ipynb b/notebooks/ToRates.ipynb index d0aa13c5b9a25a4ed06014e22e57ddf30e6ff5eb..cfc1c6bcf1a9694cac6c2e0534b0d9fdc749211a 100644 --- a/notebooks/ToRates.ipynb +++ b/notebooks/ToRates.ipynb @@ -29,6 +29,7 @@ "source": [ "ts_folder = \"../data/covid-19_jhu-csse/\"\n", "wb_path = \"../data/worldbank/SP.POP.TOTL.zip\"\n", + "geodata_path = \"../data/geodata/geo_data.csv\"\n", "out_folder = None\n", "PAPERMILL_OUTPUT_PATH = None" ] @@ -75,15 +76,6 @@ "}" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "frames_map['confirmed'].sort_values(frames_map['confirmed'].columns[-1], ascending=False).head()" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -176,6 +168,44 @@ "].iloc[:,-2:]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Read in geodata to get additional population numbers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "geodata_df = pd.read_csv(geodata_path).drop('Unnamed: 0', axis=1).set_index('name_jhu')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Add in populations for missing countries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "missing_countries = frames_map['confirmed'].loc[\n", + " frames_map['confirmed'].index.levels[0].isin(data_pop_ser.index) == False\n", + "].iloc[:,-2:].reset_index()['Country/Region']\n", + "\n", + "display(geodata_df.loc[geodata_df.index.isin(missing_countries)])\n", + "\n", + "data_pop_ser = data_pop_ser.append(geodata_df.loc[geodata_df.index.isin(missing_countries), 'pop_est'])" + ] + }, { "cell_type": "markdown", "metadata": {},