From 5f6b71b421811f9c3b8fc6d1f0bc9361131096fe Mon Sep 17 00:00:00 2001 From: Chandrasekhar Ramakrishnan <cramakri@ethz.ch> Date: Wed, 25 Mar 2020 22:45:56 +0000 Subject: [PATCH] feat: switch to the new JHU dataset format --- notebooks/Dashboard.ipynb | 2 +- notebooks/process/CompileGeoData.ipynb | 10 ++++----- notebooks/process/ToRates.ipynb | 15 ++++++------- .../covid_19_dashboard/helper.py | 22 ++++++++----------- 4 files changed, 22 insertions(+), 27 deletions(-) diff --git a/notebooks/Dashboard.ipynb b/notebooks/Dashboard.ipynb index 588336ca..17e57941 100644 --- a/notebooks/Dashboard.ipynb +++ b/notebooks/Dashboard.ipynb @@ -52,7 +52,7 @@ "# Identify countries with 100 or more cases\n", "countries_over_thresh = helper.countries_with_number_of_cases(jhu_frames_map, 'confirmed', 100)\n", "# Filter out some countries with very high case/population ratio\n", - "countries_over_thresh = [c for c in countries_over_thresh if c not in set(['San Marino', 'Iceland'])]" + "countries_over_thresh = [c for c in countries_over_thresh if c not in set(['Andorra', 'Iceland', 'San Marino'])]" ] }, { diff --git a/notebooks/process/CompileGeoData.ipynb b/notebooks/process/CompileGeoData.ipynb index afc2c2b8..3dda43ed 100644 --- a/notebooks/process/CompileGeoData.ipynb +++ b/notebooks/process/CompileGeoData.ipynb @@ -25,8 +25,8 @@ "metadata": {}, "outputs": [], "source": [ - "ts_folder = \"../data/covid-19_jhu-csse/\"\n", - "worldmap_path = \"../data/worldmap/country_centroids.csv\"\n", + "ts_folder = \"../../data/covid-19_jhu-csse/\"\n", + "worldmap_path = \"../../data/worldmap/country_centroids.csv\"\n", "out_folder = None\n", "PAPERMILL_OUTPUT_PATH = None" ] @@ -49,7 +49,7 @@ "outputs": [], "source": [ "def read_jhu_covid_region_df(name):\n", - " filename = os.path.join(ts_folder, f\"time_series_19-covid-{name}.csv\")\n", + " filename = os.path.join(ts_folder, f\"time_series_covid19_{name}_global.csv\")\n", " df = pd.read_csv(filename)\n", " df = df.set_index(['Country/Region', 'Province/State', 'Lat', 'Long'])\n", " df.columns = pd.to_datetime(df.columns)\n", @@ -63,7 +63,7 @@ "metadata": {}, "outputs": [], "source": [ - "confirmed_df = read_jhu_covid_region_df(\"Confirmed\")" + "confirmed_df = read_jhu_covid_region_df(\"confirmed\")" ] }, { @@ -183,7 +183,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.7.3" } }, "nbformat": 4, diff --git a/notebooks/process/ToRates.ipynb b/notebooks/process/ToRates.ipynb index cfc1c6bc..9aa7f046 100644 --- a/notebooks/process/ToRates.ipynb +++ b/notebooks/process/ToRates.ipynb @@ -27,9 +27,9 @@ }, "outputs": [], "source": [ - "ts_folder = \"../data/covid-19_jhu-csse/\"\n", - "wb_path = \"../data/worldbank/SP.POP.TOTL.zip\"\n", - "geodata_path = \"../data/geodata/geo_data.csv\"\n", + "ts_folder = \"../../data/covid-19_jhu-csse/\"\n", + "wb_path = \"../../data/worldbank/SP.POP.TOTL.zip\"\n", + "geodata_path = \"../../data/geodata/geo_data.csv\"\n", "out_folder = None\n", "PAPERMILL_OUTPUT_PATH = None" ] @@ -54,7 +54,7 @@ "outputs": [], "source": [ "def read_jhu_covid_region_df(name):\n", - " filename = os.path.join(ts_folder, f\"time_series_19-covid-{name}.csv\")\n", + " filename = os.path.join(ts_folder, f\"time_series_covid19_{name}_global.csv\")\n", " df = pd.read_csv(filename)\n", " df = df.set_index(['Country/Region', 'Province/State', 'Lat', 'Long'])\n", " df.columns = pd.to_datetime(df.columns)\n", @@ -70,9 +70,8 @@ "outputs": [], "source": [ "frames_map = {\n", - " \"confirmed\": read_jhu_covid_region_df(\"Confirmed\"),\n", - " \"deaths\": read_jhu_covid_region_df(\"Deaths\"),\n", - " \"recovered\": read_jhu_covid_region_df(\"Recovered\")\n", + " \"confirmed\": read_jhu_covid_region_df(\"confirmed\"),\n", + " \"deaths\": read_jhu_covid_region_df(\"deaths\"),\n", "}" ] }, @@ -261,7 +260,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.7.3" } }, "nbformat": 4, diff --git a/src/covid-19/covid_19_dashboard/covid_19_dashboard/helper.py b/src/covid-19/covid_19_dashboard/covid_19_dashboard/helper.py index fc71c372..879186f7 100644 --- a/src/covid-19/covid_19_dashboard/covid_19_dashboard/helper.py +++ b/src/covid-19/covid_19_dashboard/covid_19_dashboard/helper.py @@ -8,7 +8,7 @@ import os def read_jhu_covid_df(ts_folder, name): - filename = os.path.join(ts_folder, f"time_series_19-covid-{name}.csv") + filename = os.path.join(ts_folder, f"time_series_covid19_{name}_global.csv") df = pd.read_csv(filename) df = df.set_index(['Province/State', 'Country/Region', 'Lat', 'Long']) df.columns = pd.to_datetime(df.columns) @@ -18,9 +18,8 @@ def read_jhu_covid_df(ts_folder, name): def read_jhu_frames_map(ts_folder): jhu_frames_map = { - "confirmed": read_jhu_covid_df(ts_folder, "Confirmed"), - "deaths": read_jhu_covid_df(ts_folder, "Deaths"), - "recovered": read_jhu_covid_df(ts_folder, "Recovered") + "confirmed": read_jhu_covid_df(ts_folder, "confirmed"), + "deaths": read_jhu_covid_df(ts_folder, "deaths"), } return jhu_frames_map @@ -37,7 +36,6 @@ def read_rates_frames_map(rates_folder): rates_frames_map = { "confirmed": read_rates_covid_df(rates_folder, "confirmed"), "deaths": read_rates_covid_df(rates_folder, "deaths"), - "recovered": read_rates_covid_df(rates_folder, "recovered") } return rates_frames_map @@ -67,19 +65,17 @@ def latest_rates_ser(rates_frames_map, name): def compute_map_df(rates_frames_map, jhu_frames_map, geodata_df, countries_over_thresh): map_df = pd.concat([ latest_rates_ser(rates_frames_map, 'confirmed'), - latest_rates_ser(rates_frames_map, 'deaths'), - latest_rates_ser(rates_frames_map, 'recovered')], axis=1) + latest_rates_ser(rates_frames_map, 'deaths')], axis=1) nominal_df = pd.concat([ latest_jhu_country_ser(jhu_frames_map, 'confirmed'), - latest_jhu_country_ser(jhu_frames_map, 'deaths'), - latest_jhu_country_ser(jhu_frames_map, 'recovered')], axis=1) + latest_jhu_country_ser(jhu_frames_map, 'deaths')], axis=1) map_df = pd.concat([map_df, nominal_df, geodata_df[['Longitude', 'Latitude']]], axis=1) # Restrict to countries with 100 or more cases map_df = map_df.loc[countries_over_thresh].dropna() map_df = map_df.reset_index() map_df.columns = ['Country/Region', - 'Confirmed/100k', 'Deaths/100k', 'Recovered/100k', - 'Confirmed', 'Deaths', 'Recovered', + 'Confirmed/100k', 'Deaths/100k', + 'Confirmed', 'Deaths', 'Long', 'Lat'] return map_df @@ -103,8 +99,8 @@ def map_of_variable(map_df, variable, title): size=alt.Size(f'{variable}:Q', title="Cases"), color=alt.value('steelblue'), tooltip=["Country/Region:N", - "Confirmed:Q", "Deaths:Q", "Recovered:Q", - "Confirmed/100k:Q", "Deaths/100k:Q", "Recovered/100k:Q"] + "Confirmed:Q", "Deaths:Q", + "Confirmed/100k:Q", "Deaths/100k:Q"] ) ).project( 'naturalEarth1' -- GitLab