update ch notebook

1ecbcf04 · Rok Roškar · 478549a7 · 1ecbcf04
Commit 1ecbcf04 authored 5 years ago by Rok Roškar
--- a/notebooks/examples/openzh-covid-19-example.ipynb
+++ b/notebooks/examples/openzh-covid-19-example.ipynb
@@ -303,7 +303,7 @@
   "source": [
    "## Plot the available data\n",
    "\n",
-    "Below we make plots of total cases, total cases per 10k population and total deaths. "
+    "Below we make plots of total cases, total cases per 10k population and total deaths. You can click on the canton abbreviations in the legend to highlight individual lines. "
   ]
  },
  {

 %% Cell type:code id: tags:

 ``` python
 from pathlib import Path

 import altair as alt
 import pandas as pd
 from IPython.display import display, HTML
 ```

 %% Cell type:code id: tags:parameters

 ``` python
 save_figures = False
 data_path = '../../data/openzh-covid-19'
 figures_path = '../../figures'
 ```

 %% Cell type:code id: tags:

 ``` python
 html_credits=HTML('''
 <p style="font-size: smaller">Data Sources:
  <a href="https://github.com/openZH/covid_19">OpenData Zuerich</a>,
  <a href="https://www.bfs.admin.ch">Federal Statistical Office</a>
 <br>
 Analysis:
  <a href="https://renkulab.io/projects/covid-19/covid-19-public-data">Covid-19 Public Data Collaboration Project</a>
 </p>''')
 ```

 %% Cell type:markdown id: tags:

 ## Read in the data

 We have two datasets for Switzerland - the COVID-19 dataset from https://github.com/openZH/covid_19 and the population statistics by age and canton. We can read both of these in to dataframes:

 %% Cell type:code id: tags:

 ``` python
 # read in cantonal data and produce one dataframe
 df_list = []

 for f in Path(data_path).glob('COVID19_Fallzahlen_Kanton_*total.csv'):
    df_list.append(pd.read_csv(f))

 df = pd.concat(df_list)

 df['date'] = pd.to_datetime(df['date'], dayfirst=True)
 ```

 %% Cell type:code id: tags:

 ``` python
 # read in population data
 df_pop = pd.read_excel(
    Path(data_path) / '../ch-population-statistics/ch-population-by-age-canton.xls',
    header=1,
    skipfooter=5
 )
 df_pop = df_pop.where(
    df_pop.Region.str.startswith('-')
 ).dropna().sort_values('Region').reset_index(drop=True)

 # match the cantons in the two datasets
 df_pop['abbreviation_canton_and_fl'] = ['AG', 'AI', 'AR', 'BL', 'BS', 'BE', 'FR', 'GE', 'GL', 'GR', 'JU', 'LU', 'NE', 'NW', 'OW', 'SH', 'SZ', 'SO', 'SG', 'TG', 'TI', 'UR', 'VS', 'VD', 'ZG', 'ZH']

 pop_d = df_pop[['abbreviation_canton_and_fl', 'Total']].set_index('abbreviation_canton_and_fl').to_dict()


 # calculate cases and deaths per 10k

 for x in ['conf', 'deceased']:
    df[f'ncumul_{x}_10k'] = df.apply(
        lambda row: row[f'ncumul_{x}']/pop_d['Total'][row.abbreviation_canton_and_fl]*10000, axis=1
    )
 ```

 %% Cell type:code id: tags:

 ``` python
 # display the dataframe
 df.head()
 ```

 %% Output

            date time abbreviation_canton_and_fl  ncumul_tested  ncumul_conf  \
    0 2020-02-28  NaN                         VS            NaN          1.0
    1 2020-03-03  NaN                         VS            NaN          2.0
    2 2020-03-05  NaN                         VS            NaN          3.0
    3 2020-03-06  NaN                         VS            NaN          5.0
    4 2020-03-08  NaN                         VS            NaN          7.0
    
       ncumul_hosp  ncumul_ICU  ncumul_vent  ncumul_released  ncumul_deceased  \
    0          NaN         NaN          NaN              NaN              NaN
    1          NaN         NaN          NaN              NaN              NaN
    2          NaN         NaN          NaN              NaN              NaN
    3          NaN         NaN          NaN              NaN              NaN
    4          NaN         NaN          NaN              NaN              NaN
    
                                                  source  ncumul_ICF  \
    0  https://vs.ch/documents/529400/6767345/2020+02...         NaN
    1  https://vs.ch/documents/529400/6789273/2020+03...         NaN
    2  https://www.vs.ch/de/web/coronavirus/info?p_p_...         NaN
    3  https://www.vs.ch/de/web/coronavirus/info?p_p_...         NaN
    4  https://www.vs.ch/de/web/coronavirus/info?p_p_...         NaN
    
       ncumul_ICU_intub  TotalPosTests1  TotalCured  ncumul_conf_per10k  \
    0               NaN             NaN         NaN            0.029074
    1               NaN             NaN         NaN            0.058147
    2               NaN             NaN         NaN            0.087221
    3               NaN             NaN         NaN            0.145368
    4               NaN             NaN         NaN            0.203515
    
       ncumul_conf_10k
    0         0.029074
    1         0.058147
    2         0.087221
    3         0.145368
    4         0.203515

 %% Cell type:markdown id: tags:

 ## Plot the available data

-Below we make plots of total cases, total cases per 10k population and total deaths.
+Below we make plots of total cases, total cases per 10k population and total deaths. You can click on the canton abbreviations in the legend to highlight individual lines.

 %% Cell type:code id: tags:

 ``` python
 def generate_canton_chart(column, title, tooltip_title):
    """Produce a canton chart given a column name"""
    selection = alt.selection_multi(fields=['abbreviation_canton_and_fl'], bind='legend')
    chart = base.mark_line().encode(
        alt.X('date', title='Date'),
        alt.Y(column,
              title=title, scale=alt.Scale(type='linear')),
        color=alt.Color('abbreviation_canton_and_fl', legend=alt.Legend(title="Canton")),
        tooltip=[alt.Tooltip('abbreviation_canton_and_fl',title='Canton'),
                 alt.Tooltip('ncumul_conf',title=tooltip_title),
                 alt.Tooltip('date',title='Date')],
        opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
    ).add_selection(
        selection
    )
    return chart
 ```

 %% Cell type:markdown id: tags:

 ### Total cases

 %% Cell type:code id: tags:

 ``` python
 base = alt.Chart(df.where(df.ncumul_conf>0).dropna(subset=['abbreviation_canton_and_fl']))
 base.configure_header(titleFontSize=25)
 base.configure_axis(labelFontSize=15, titleFontSize=15)

 cumul = generate_canton_chart('ncumul_conf', 'Cases', 'Cases')
 cumul_10k = generate_canton_chart('ncumul_conf_10k', 'Cases per 10k population', 'Cases/10k')

 chart = alt.hconcat(
    cumul, cumul_10k, title='Covid-19 cases in Switzerland by Canton'
 ).configure_title(
    anchor='middle'
 )

 display(chart)
 if save_figures:
    chart.save(str(Path(figures_path) / 'switzerland-cases-by-canton.html'))

 display(html_credits)
 ```

 %% Output



 %% Cell type:markdown id: tags:

 ### Deaths

 %% Cell type:code id: tags:

 ``` python
 base = alt.Chart(df.where(df.ncumul_deceased>0).dropna(subset=['abbreviation_canton_and_fl']))
 base.configure_header(titleFontSize=25)
 base.configure_axis(labelFontSize=15, titleFontSize=15)

 deaths = generate_canton_chart('ncumul_deceased', 'Deaths', 'Deaths')
 deaths_10k = generate_canton_chart('ncumul_deceased_10k', 'Deaths per 10k population', 'Deaths/10k')

 chart = alt.hconcat(
    deaths, deaths_10k, title='Covid-19 deaths in Switzerland by Canton'
 ).configure_title(
    anchor='middle'
 )
 display(chart)
 display(html_credits)

 if save_figures:
    chart.save(str(Path(figures_path) / 'switzerland-deaths-by-canton.html'))
 ```

 %% Output



 %% Cell type:code id: tags:

 ``` python
 ```