From c90274b2e9dfb0168acc904dd1362100618d3a60 Mon Sep 17 00:00:00 2001 From: Chandrasekhar Ramakrishnan <cramakri@ethz.ch> Date: Wed, 8 Apr 2020 12:48:48 +0000 Subject: [PATCH] feat: use Harvard Worldmap for regions without population data from Worldbank --- .../covid_19_dashboard/converters/jhu.py | 31 ++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/src/covid-19/covid_19_dashboard/covid_19_dashboard/converters/jhu.py b/src/covid-19/covid_19_dashboard/covid_19_dashboard/converters/jhu.py index 2236705ca..a6b53cdaf 100644 --- a/src/covid-19/covid_19_dashboard/covid_19_dashboard/converters/jhu.py +++ b/src/covid-19/covid_19_dashboard/covid_19_dashboard/converters/jhu.py @@ -28,32 +28,55 @@ class JhuCsseGlobalCaseConverter(CaseConverterImpl): pop_df = pd.read_csv( zf.open("API_SP.POP.TOTL_DS2_en_csv_v2_821007.csv"), skiprows=4 ) - + pop_df = pop_df[["Country Name", "Country Code", "2018"]] # Fix the names of countries region_jhu_wb_map = { + "Bahamas": "Bahamas, The", "Brunei": "Brunei Darussalam", + "Burma": "Myanmar", + "Congo (Brazzaville)": "Congo, Rep.", + "Congo (Kinshasa)": "Congo, Dem. Rep.", "Czechia": "Czech Republic", "Egypt": "Egypt, Arab Rep.", + "Gambia": "Gambia, The", "Hong Kong SAR": "Hong Kong SAR, China", "Iran": "Iran, Islamic Rep.", "Korea, South": "Korea, Rep.", + "Kyrgyzstan": "Kyrgyz Republic", + "Laos": "Lao PDR", "Macao SAR": "Macao SAR, China", "Russia": "Russian Federation", + "Saint Kitts and Nevis": "St. Kitts and Nevis", + "Saint Lucia": "St. Lucia", + "Saint Vincent and the Grenadines": "St. Vincent and the Grenadines", "Slovakia": "Slovak Republic", "Saint Martin": "St. Martin (French part)", + "Syria": "Syrian Arab Republic", + 'Taiwan*': 'Taiwan', + "Venezuela": "Venezuela, RB", "US": "United States", } df = df.replace(region_jhu_wb_map) + + # add in missing data from Harvard worldmap + missing_countries = pd.unique(df.loc[df["region_label"].isin(pop_df["Country Name"]) == False, "region_label"]) + worldmap_df = pd.read_csv(self.atlas_folder / "worldmap" / "country_centroids.csv") + worldmap_df = worldmap_df[['name', 'sov_a3', 'pop_est']] + worldmap_df = worldmap_df.rename({"name": "Country Name", + "sov_a3": "Country Code", + "pop_est": "2018"}, axis=1) + worldmap_df = worldmap_df.loc[worldmap_df["Country Name"].isin(missing_countries)] + pop_df = pop_df.append(worldmap_df) + + pop_ser = pop_df.set_index("Country Code")["2018"] country_code_map = { r["Country Name"]: r["Country Code"] for i, r in pop_df[["Country Name", "Country Code"]].iterrows() } df["country"] = df["region_label"].replace(country_code_map) df['country_label'] = df['region_label'] - pop_ser = pop_df.set_index("Country Code")["2018"] - merged = df.loc[df["country"].isin(pop_ser.index)].copy() - # TODO Also consult for worldmap data for countries like Taiwan + merged = df.loc[df["country"].isin(pop_ser.index)].copy() merged["population"] = merged.apply(lambda r: pop_ser.loc[r["country"]], axis=1) merged['region_iso'] = merged['country'] return self._set_common_columns(merged) -- GitLab