Chandrasekhar Ramakrishnan · 62e2cdb1
--- a/notebooks/Dashboard.ipynb

+ 2

− 2
+++ b/notebooks/Dashboard.ipynb

+ 2

− 2
 %% Cell type:code id: tags:

 ``` python
 %load_ext autoreload
 %autoreload 2
 ```

 %% Cell type:code id: tags:

 ``` python
 import altair as alt
 import numpy as np
 import pandas as pd

 from IPython.display import display, HTML, Markdown
 from covid_19_utils import helper
 ```

 %% Cell type:code id: tags:parameters

 ``` python
 ts_folder = "../data/covid-19_jhu-csse/"
 rates_folder = "../data/covid-19_rates/"
 geodata_path = "../data/geodata/geo_data.csv"
 atlas_path = "../data/atlas"
 out_folder = None
 PAPERMILL_OUTPUT_PATH = None
 ```

 %% Cell type:code id: tags:

 ``` python
 # Read in and transform the case data
 from covid_19_utils.converters import CaseConverter
 converter = CaseConverter(atlas_path)
 jhu_df = converter.read_convert(ts_folder)

 # Read in geographical data
 geodata_df = helper.read_geodata(geodata_path)

 # Join in the geo data
 jhu_df = jhu_df.merge(
    geodata_df.rename(
        columns={"name": "country_label"}
    )[['Latitude','Longitude','country_label', 'region_un']]
 ).rename(columns={'region_un': 'Geo Region'})
 ```

 %% Cell type:code id: tags:

 ``` python
 # case threshold
 nthresh = 500

 # Identify countries with {nthresh} or more cases
-country_df = jhu_df.groupby('country')
-countries_over_thresh = country_df.max()[country_df.max()['positive']>nthresh]['region_label']
+country_max_ser = jhu_df.set_index(['region_label', 'date'])['positive'].groupby(level='region_label').max()
+countries_over_thresh = country_max_ser[country_max_ser>nthresh].index

 # Filter out some countries with very high case/population ratio
 countries_over_thresh = [c for c in countries_over_thresh if c not in set(['Andorra', 'Iceland', 'San Marino'])]
 ```

 %% Cell type:markdown id: tags:

 # Questions About COVID-19 and Its Spread

 Understanding the spread, distribution, and deadliness of COVID-19 is difficult, despite the data available about it. Differences in rates of testing, quality of data, demographics, etc. make it difficult to compare data between countries.

 All this needs to be considered when looking at the plots below. But despite those caveats, I found it helpful to plot the raw data, even though direct comparisons between countries might not be inaccurate.

 %% Cell type:code id: tags:

 ``` python
 data_ts = jhu_df.date.iloc[-1].strftime("%b %d %Y")
 display(HTML(f"<em>Data up to {data_ts}; countries with {nthresh} or more confirmed cases.</em>"))
 ```

 %% Cell type:markdown id: tags:

 ## How are cases per 100,000 distributed geographically?

 %% Cell type:code id: tags:

 ``` python
 latest_df = jhu_df[jhu_df.date == data_ts]
 ```

 %% Cell type:code id: tags:

 ``` python
 map_df = latest_df[latest_df.country_label.isin(countries_over_thresh)]
 ```

 %% Cell type:code id: tags:

 ``` python
 display(helper.map_of_variable(map_df, 'positive_100k', 'Positive'))
 display(HTML('''
 <p style="font-size: smaller">Data Sources:
  <a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
  <a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
  <a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
 </p>'''))
 ```

 %% Cell type:code id: tags:

 ``` python
 bars = alt.Chart(map_df).mark_bar().encode(
    x=alt.X('positive_100k:Q', title='Positive cases/100k'),
    y=alt.Y("country_label:N", title='Country/Region', sort='-x'),
    tooltip=["country_label:N",
         "positive:Q", "deceased:Q",
         "positive_100k:Q", "deceased_100k:Q"]
 )

 text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3  # Nudges text to right so it doesn't appear on top of the bar
 ).encode(
    text=alt.Text('positive_100k:Q', format=".3")
 )

 chart = (bars + text).properties(height=900, title=f"Confirmed cases per 100k inhabitants")
 display(chart)
 display(HTML('''
 <p style="font-size: smaller">Data Sources:
  <a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
  <a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
  <a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
 </p>'''))
 ```

 %% Cell type:markdown id: tags:

 ## How have cases been growing?

 %% Cell type:code id: tags:

 ``` python
 # select countries over a certain per capita case threshold
 per_capita_thresh = 50
 countries_over_thresh_per_capita = latest_df[latest_df.positive_100k > per_capita_thresh].country_label
 countries_over_thresh_per_capita = [c for c in countries_over_thresh_per_capita if c not in set(['Andorra', 'Iceland', 'San Marino'])]

 # build the charts
 sort_order = latest_df.groupby('Geo Region').mean().sort_values(ascending=False, by='positive').index.tolist()
 selection = alt.selection_multi(fields=['country_label'], bind='legend')
 opacity=alt.condition(selection, alt.value(1), alt.value(0.2))

 base = alt.Chart(
  jhu_df[jhu_df.country_label.isin(
    countries_over_thresh_per_capita)]
  ).encode(
    alt.X('date', title='Date')
 ).properties(
  width=300,
  height=200
 )
 cases = base.mark_line().encode(
    alt.Y('positive_100k', scale=alt.Scale(type='symlog'), title='Cases per 100k population'),
    color=alt.Color('country_label', title='Country'),
    facet=alt.Facet('Geo Region:N', columns=1, sort=alt.SortArray(sort_order), title=''),
    tooltip=["country_label:N", "date:T", "positive_100k:Q"],
    opacity=opacity
 ).add_selection(selection)

 deaths = base.mark_line().encode(
    alt.Y('deceased_100k', scale=alt.Scale(type='symlog'), title='Deaths per 100k population'),
    color=alt.Color('country_label', title='Country'),
    facet=alt.Facet('Geo Region:N', columns=1, sort=alt.SortArray(sort_order), title=''),
    tooltip=["country_label:N", "date:T", "deceased_100k:Q"],
    opacity=opacity
 ).add_selection(selection)

 chart = alt.hconcat(
    cases, deaths, title=f"Countries with {per_capita_thresh} or more cases per 100k"
 ).configure_title(
    anchor='middle'
 )

 display(chart)
 display(HTML('''
 <p style="font-size: smaller">Data Sources:
  <a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
  <a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
  <a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
 </p>'''))
 ```

 %% Cell type:code id: tags:

 ``` python
 since_df = helper.make_since_df(
    jhu_df[jhu_df.country_label.isin(countries_over_thresh_per_capita)],
    region_column='country_label'
 )
 ```

 %% Cell type:code id: tags:

 ``` python
 sort_order = since_df.groupby(
    'country_label').max().sort_values(
    'positive', ascending=False).index.tolist()
 # Exclude China in this plot because its numbers are far greater then everywhere else
 sort_order = [o for o in sort_order if o != 'China']
 chart = helper.facetted_growth_plot(
  since_df[since_df['country_label'] != 'China'],
  'sinceDay0',
  'positive_100k',
  sort_order,
  'Italy',
  "Growth of cases per 100k population from case 100, compared to Italy",
  "Cases/100k"
 )
 display(chart)
 display(HTML('''
 <p style="font-size: smaller">Data Sources:
  <a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
  <a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
  <a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
 </p>
 <p style="font-size: smaller">Inspired by <a href="https://covid19dashboards.com/growth-analysis/">Thomas Wiecki</a>'''))
 ```

 %% Cell type:code id: tags:

 ``` python
 # Same with log scale
 chart = helper.facetted_growth_plot(
  since_df,
  'sinceDay0',
  'positive_100k',
  sort_order,
  'Italy',
  "Growth of cases per 100k population from case 100, compared to Italy (log scale)",
  "Cases/100k",
  'log'
 )
 display(chart)
 display(HTML('''
 <p style="font-size: smaller">Data Sources:
  <a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
  <a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
  <a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
 </p>'''))
 ```