From 5f8235b69d290a3729b66acdf449adb8c1a496aa Mon Sep 17 00:00:00 2001 From: Chandrasekhar Ramakrishnan <cramakri@ethz.ch> Date: Sun, 15 Mar 2020 23:24:56 +0000 Subject: [PATCH] feat: plot trajectory since the 100th case --- notebooks/Dashboard.ipynb | 135 +++++++++++++++++- .../covid_19_dashboard/helper.py | 4 +- 2 files changed, 134 insertions(+), 5 deletions(-) diff --git a/notebooks/Dashboard.ipynb b/notebooks/Dashboard.ipynb index 6fe07cb7..24e40e7f 100644 --- a/notebooks/Dashboard.ipynb +++ b/notebooks/Dashboard.ipynb @@ -7,6 +7,7 @@ "outputs": [], "source": [ "import pandas as pd\n", + "import numpy as np\n", "import os\n", "from IPython.display import display, HTML, Markdown\n", "import covid_19_dashboard as helper" @@ -158,7 +159,7 @@ "metadata": {}, "outputs": [], "source": [ - "confirmed_rate_df = helper.growth_df(rates_frames_map, geodata_df, 'confirmed', countries_over_thresh)\n", + "confirmed_rate_df = helper.growth_df(rates_frames_map, geodata_df, 'confirmed', countries_over_thresh, 2)\n", "latest_confirmed_ser = confirmed_rate_df.set_index(\n", " ['Country/Region', 'Geo Region', 'Date']).drop(\n", " ['Longitude', 'Latitude'], axis=1).unstack().iloc[:,-1]\n", @@ -171,7 +172,8 @@ "metadata": {}, "outputs": [], "source": [ - "base = alt.Chart(confirmed_rate_df).properties(width=300, height=200, title=\"Countries with 2 or more cases per 100k\")\n", + "base = alt.Chart(confirmed_rate_df).properties(\n", + " width=300, height=200, title=\"Countries with 2 or more cases per 100k\")\n", "line = base.mark_line().encode(\n", " x='Date',\n", " y='Confirmed/100k',\n", @@ -179,7 +181,134 @@ " facet=alt.Facet('Geo Region:N', columns=1, sort=alt.SortArray(sort_order), title='Geographic Region'),\n", " tooltip=[\"Country/Region:N\", \"Date:T\", \"Confirmed/100k:Q\"]\n", ")\n", - "line" + "line\n", + "display(line)\n", + "display(HTML('''\n", + "<p style=\"font-size: smaller\">Data Sources: \n", + " <a href=\"https://github.com/CSSEGISandData/COVID-19\">JHU CSSE</a>,\n", + " <a href=\"https://data.worldbank.org/indicator/SP.POP.TOTL\">World Bank</a>,\n", + " <a href=\"https://worldmap.harvard.edu/data/geonode:country_centroids_az8\">Harvard Worldmap</a>\n", + "</p>'''))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def country_increase_df(c, df_nominal, growth_in_rate_df):\n", + " over_100 = df_nominal[df_nominal['Confirmed'] >= 100]\n", + " tdf = (over_100[['Date', 'Confirmed']] - over_100.iloc[0][['Date', 'Confirmed']]).reset_index()\n", + " tdfr = growth_in_rate_df[(growth_in_rate_df['Date'] >= over_100.iloc[0]['Date']) &\n", + " (growth_in_rate_df['Country/Region'] == c)].reset_index()\n", + " tdf['Confirmed/100k'] = tdfr['Confirmed/100k']\n", + " tdf['Country/Region'] = c\n", + " tdf['Days'] = (tdf['Date'] / np.timedelta64(1, 'D')).astype(int)\n", + " return tdf[['Country/Region', 'Days', 'Confirmed', 'Confirmed/100k']]\n", + "\n", + "\n", + "growth_in_rate_df = helper.growth_df(rates_frames_map, geodata_df, 'confirmed', countries_over_thresh, 0)\n", + "frame_map = {'confirmed': jhu_frames_map['confirmed'].groupby(level='Country/Region').sum()}\n", + "growth_in_value_df = helper.growth_df(frame_map, geodata_df, 'confirmed', countries_over_thresh, 1000)\n", + "growth_in_value_df = growth_in_value_df.rename({'Confirmed/100k':'Confirmed'}, axis=1)\n", + "increase_df = pd.concat([country_increase_df(c, df_nominal, growth_in_rate_df) for \n", + " c, df_nominal in growth_in_value_df.groupby('Country/Region')])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def facetted_growth_plot(df, variable, sort_order, ref_country, title):\n", + " base = alt.Chart(df).properties(\n", + " width=250, height=150)\n", + " line = base.mark_line().encode(\n", + " x='Days',\n", + " y=variable,\n", + " color='Country/Region',\n", + " tooltip=[\"Country/Region:N\", \"Days:Q\", f\"{variable}:Q\"]\n", + " )\n", + " label_loc = increase_df[increase_df['Country/Region'] == ref_country]['Days'].iloc[-2]\n", + " ref = base.mark_line(opacity=0.3).encode(\n", + " x='Days',\n", + " y=variable,\n", + " color=alt.ColorValue('steelblue'),\n", + " ).transform_filter(f\"datum['Country/Region'] == '{ref_country}'\")\n", + " ref += ref.mark_text().encode(text='Country/Region:N').transform_filter(f\"datum['Days'] == {label_loc}\")\n", + " charts = []\n", + " # make our small multiples\n", + " for country in sort_order:\n", + " smallm = line.transform_filter(f\"datum['Country/Region'] == '{country}'\").properties(\n", + " title=country)\n", + " smallm += ref\n", + " charts.append(smallm)\n", + "\n", + " # group the small multiples into 3 horizontal charts\n", + " groups = []\n", + " c = None\n", + " for i, chart in enumerate(charts):\n", + " if not i%3:\n", + " if c != None:\n", + " groups.append(c)\n", + " c = alt.hconcat()\n", + " c |= chart\n", + " # vertically combine the horizontal charts\n", + " chart = alt.vconcat(title=title)\n", + " for c in groups:\n", + " chart &= c\n", + " return chart" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sort_order = growth_in_value_df.groupby(\n", + " 'Country/Region').max().sort_values(\n", + " 'Confirmed', ascending=False).index.tolist()\n", + "# Exclude China in this plot because its numbers are far greater then everywhere else\n", + "sort_order = [o for o in sort_order if o != 'China']\n", + "chart = facetted_growth_plot(increase_df[increase_df['Country/Region'] != 'China'], \n", + " 'Confirmed',\n", + " sort_order,\n", + " 'Italy',\n", + " \"Growth of cases from case 100, compared to Italy\")\n", + "display(chart)\n", + "display(HTML('''\n", + "<p style=\"font-size: smaller\">Data Sources: \n", + " <a href=\"https://github.com/CSSEGISandData/COVID-19\">JHU CSSE</a>,\n", + " <a href=\"https://data.worldbank.org/indicator/SP.POP.TOTL\">World Bank</a>,\n", + " <a href=\"https://worldmap.harvard.edu/data/geonode:country_centroids_az8\">Harvard Worldmap</a>\n", + "</p>\n", + "<p style=\"font-size: smaller\">Inspired by <a href=\"https://covid19dashboards.com/growth-analysis/\">Thomas Wiecki</a>'''))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sort_order = growth_in_value_df.groupby(\n", + " 'Country/Region').max().sort_values(\n", + " 'Confirmed', ascending=False).index.tolist()\n", + "chart = facetted_growth_plot(increase_df, \n", + " 'Confirmed/100k',\n", + " sort_order,\n", + " 'Italy',\n", + " \"Growth of cases/100k from case 100, compared to Italy\")\n", + "display(chart)\n", + "display(HTML('''\n", + "<p style=\"font-size: smaller\">Data Sources: \n", + " <a href=\"https://github.com/CSSEGISandData/COVID-19\">JHU CSSE</a>,\n", + " <a href=\"https://data.worldbank.org/indicator/SP.POP.TOTL\">World Bank</a>,\n", + " <a href=\"https://worldmap.harvard.edu/data/geonode:country_centroids_az8\">Harvard Worldmap</a>\n", + "</p>'''))" ] } ], diff --git a/src/covid-19/covid_19_dashboard/covid_19_dashboard/helper.py b/src/covid-19/covid_19_dashboard/covid_19_dashboard/helper.py index 6b62468e..fc71c372 100644 --- a/src/covid-19/covid_19_dashboard/covid_19_dashboard/helper.py +++ b/src/covid-19/covid_19_dashboard/covid_19_dashboard/helper.py @@ -113,9 +113,9 @@ def map_of_variable(map_df, variable, title): return p -def growth_df(rates_frames_map, geodata_df, name, countries_over_thresh): +def growth_df(rates_frames_map, geodata_df, name, countries_over_thresh, cutoff): latest_confirmed_ser = rates_frames_map['confirmed'].iloc[:,-1] - countries_over_1 = latest_confirmed_ser[latest_confirmed_ser >= 2].reset_index()['Country/Region'] + countries_over_1 = latest_confirmed_ser[latest_confirmed_ser >= cutoff].reset_index()['Country/Region'] confirmed_rate_df = rates_frames_map['confirmed'] confirmed_rate_df = confirmed_rate_df.loc[ -- GitLab