diff --git a/src/covid-19/covid_19_utils/covid_19_utils/converters/italy.py b/src/covid-19/covid_19_utils/covid_19_utils/converters/italy.py index 66b5673f52cc945f491e92bd7c22f952bd31acf3..1b45b301edaa495ba378ad7b54e28613e38e5703 100644 --- a/src/covid-19/covid_19_utils/covid_19_utils/converters/italy.py +++ b/src/covid-19/covid_19_utils/covid_19_utils/converters/italy.py @@ -104,4 +104,4 @@ def _correct_trentino(df): ItalyRegionalCaseConverter._register() -ItalyNationalCaseConverter._register() \ No newline at end of file +ItalyNationalCaseConverter._register() diff --git a/src/covid-19/covid_19_utils/covid_19_utils/converters/jhu.py b/src/covid-19/covid_19_utils/covid_19_utils/converters/jhu.py index dfd6b79535e96132a5fb0a0ab980720c5df0fbd6..0be24407cb8a77cbac537dc70837f71641adb588 100644 --- a/src/covid-19/covid_19_utils/covid_19_utils/converters/jhu.py +++ b/src/covid-19/covid_19_utils/covid_19_utils/converters/jhu.py @@ -52,20 +52,29 @@ class JhuCsseGlobalCaseConverter(CaseConverterImpl): "Slovakia": "Slovak Republic", "Saint Martin": "St. Martin (French part)", "Syria": "Syrian Arab Republic", - 'Taiwan*': 'Taiwan', + "Taiwan*": "Taiwan", "Venezuela": "Venezuela, RB", "US": "United States", } df = df.replace(region_jhu_wb_map) # add in missing data from Harvard worldmap - missing_countries = pd.unique(df.loc[df["region_label"].isin(pop_df["Country Name"]) == False, "region_label"]) - worldmap_df = pd.read_csv(self.atlas_folder / "worldmap" / "country_centroids.csv") - worldmap_df = worldmap_df[['name', 'sov_a3', 'pop_est']] - worldmap_df = worldmap_df.rename({"name": "Country Name", - "sov_a3": "Country Code", - "pop_est": "2018"}, axis=1) - worldmap_df = worldmap_df.loc[worldmap_df["Country Name"].isin(missing_countries)] + missing_countries = pd.unique( + df.loc[ + df["region_label"].isin(pop_df["Country Name"]) == False, "region_label" + ] + ) + worldmap_df = pd.read_csv( + self.atlas_folder / "worldmap" / "country_centroids.csv" + ) + worldmap_df = worldmap_df[["name", "sov_a3", "pop_est"]] + worldmap_df = worldmap_df.rename( + {"name": "Country Name", "sov_a3": "Country Code", "pop_est": "2018"}, + axis=1, + ) + worldmap_df = worldmap_df.loc[ + worldmap_df["Country Name"].isin(missing_countries) + ] pop_df = pop_df.append(worldmap_df) pop_ser = pop_df.set_index("Country Code")["2018"] @@ -74,12 +83,12 @@ class JhuCsseGlobalCaseConverter(CaseConverterImpl): for i, r in pop_df[["Country Name", "Country Code"]].iterrows() } df["country"] = df["region_label"].replace(country_code_map) - df['country_label'] = df['region_label'] + df["country_label"] = df["region_label"] merged = df.loc[df["country"].isin(pop_ser.index)].copy() merged["population"] = merged.apply(lambda r: pop_ser.loc[r["country"]], axis=1) - merged['region_iso'] = merged['country'] - merged['tested'] = np.nan + merged["region_iso"] = merged["country"] + merged["tested"] = np.nan return self._set_common_columns(merged) def read_ser(self, path, name): diff --git a/src/covid-19/covid_19_utils/covid_19_utils/converters/spain.py b/src/covid-19/covid_19_utils/covid_19_utils/converters/spain.py index 3fcbb2259945a5842056b8fce742327cb37db2b1..6fc55482b6bda938d9d05bd9615e072838e74dd9 100644 --- a/src/covid-19/covid_19_utils/covid_19_utils/converters/spain.py +++ b/src/covid-19/covid_19_utils/covid_19_utils/converters/spain.py @@ -31,7 +31,11 @@ region_populations = [ {"region_iso": "ES-IB", "region_label": "Baleares", "population": "1150839"}, {"region_iso": "ES-CN", "region_label": "Canarias", "population": "2127685"}, {"region_iso": "ES-CB", "region_label": "Cantabria", "population": "580229"}, - {"region_iso": "ES-CM", "region_label": "Castilla-La Mancha", "population": "2106331"}, + { + "region_iso": "ES-CM", + "region_label": "Castilla-La Mancha", + "population": "2106331", + }, {"region_iso": "ES-CL", "region_label": "Castilla y León", "population": "2418694"}, {"region_iso": "ES-CT", "region_label": "Cataluña", "population": "7619494"}, {"region_iso": "ES-CE", "region_label": "Ceuta", "population": "84777"}, @@ -67,7 +71,7 @@ class SpainCaseConverter(CaseConverter): # calculate incidence rates merged = df_conv.merge(pd.DataFrame(region_populations)) - merged['country'] = 'ESP' + merged["country"] = "ESP" return self._set_common_columns(merged)