Skip to content
Snippets Groups Projects
Commit 1ecbcf04 authored by Rok Roškar's avatar Rok Roškar
Browse files

update ch notebook

parent 478549a7
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags:
``` python
from pathlib import Path
import altair as alt
import pandas as pd
from IPython.display import display, HTML
```
%% Cell type:code id: tags:parameters
``` python
save_figures = False
data_path = '../../data/openzh-covid-19'
figures_path = '../../figures'
```
%% Cell type:code id: tags:
``` python
html_credits=HTML('''
<p style="font-size: smaller">Data Sources:
<a href="https://github.com/openZH/covid_19">OpenData Zuerich</a>,
<a href="https://www.bfs.admin.ch">Federal Statistical Office</a>
<br>
Analysis:
<a href="https://renkulab.io/projects/covid-19/covid-19-public-data">Covid-19 Public Data Collaboration Project</a>
</p>''')
```
%% Cell type:markdown id: tags:
## Read in the data
We have two datasets for Switzerland - the COVID-19 dataset from https://github.com/openZH/covid_19 and the population statistics by age and canton. We can read both of these in to dataframes:
%% Cell type:code id: tags:
``` python
# read in cantonal data and produce one dataframe
df_list = []
for f in Path(data_path).glob('COVID19_Fallzahlen_Kanton_*total.csv'):
df_list.append(pd.read_csv(f))
df = pd.concat(df_list)
df['date'] = pd.to_datetime(df['date'], dayfirst=True)
```
%% Cell type:code id: tags:
``` python
# read in population data
df_pop = pd.read_excel(
Path(data_path) / '../ch-population-statistics/ch-population-by-age-canton.xls',
header=1,
skipfooter=5
)
df_pop = df_pop.where(
df_pop.Region.str.startswith('-')
).dropna().sort_values('Region').reset_index(drop=True)
# match the cantons in the two datasets
df_pop['abbreviation_canton_and_fl'] = ['AG', 'AI', 'AR', 'BL', 'BS', 'BE', 'FR', 'GE', 'GL', 'GR', 'JU', 'LU', 'NE', 'NW', 'OW', 'SH', 'SZ', 'SO', 'SG', 'TG', 'TI', 'UR', 'VS', 'VD', 'ZG', 'ZH']
pop_d = df_pop[['abbreviation_canton_and_fl', 'Total']].set_index('abbreviation_canton_and_fl').to_dict()
# calculate cases and deaths per 10k
for x in ['conf', 'deceased']:
df[f'ncumul_{x}_10k'] = df.apply(
lambda row: row[f'ncumul_{x}']/pop_d['Total'][row.abbreviation_canton_and_fl]*10000, axis=1
)
```
%% Cell type:code id: tags:
``` python
# display the dataframe
df.head()
```
%% Output
date time abbreviation_canton_and_fl ncumul_tested ncumul_conf \
0 2020-02-28 NaN VS NaN 1.0
1 2020-03-03 NaN VS NaN 2.0
2 2020-03-05 NaN VS NaN 3.0
3 2020-03-06 NaN VS NaN 5.0
4 2020-03-08 NaN VS NaN 7.0
ncumul_hosp ncumul_ICU ncumul_vent ncumul_released ncumul_deceased \
0 NaN NaN NaN NaN NaN
1 NaN NaN NaN NaN NaN
2 NaN NaN NaN NaN NaN
3 NaN NaN NaN NaN NaN
4 NaN NaN NaN NaN NaN
source ncumul_ICF \
0 https://vs.ch/documents/529400/6767345/2020+02... NaN
1 https://vs.ch/documents/529400/6789273/2020+03... NaN
2 https://www.vs.ch/de/web/coronavirus/info?p_p_... NaN
3 https://www.vs.ch/de/web/coronavirus/info?p_p_... NaN
4 https://www.vs.ch/de/web/coronavirus/info?p_p_... NaN
ncumul_ICU_intub TotalPosTests1 TotalCured ncumul_conf_per10k \
0 NaN NaN NaN 0.029074
1 NaN NaN NaN 0.058147
2 NaN NaN NaN 0.087221
3 NaN NaN NaN 0.145368
4 NaN NaN NaN 0.203515
ncumul_conf_10k
0 0.029074
1 0.058147
2 0.087221
3 0.145368
4 0.203515
%% Cell type:markdown id: tags:
## Plot the available data
Below we make plots of total cases, total cases per 10k population and total deaths.
Below we make plots of total cases, total cases per 10k population and total deaths. You can click on the canton abbreviations in the legend to highlight individual lines.
%% Cell type:code id: tags:
``` python
def generate_canton_chart(column, title, tooltip_title):
"""Produce a canton chart given a column name"""
selection = alt.selection_multi(fields=['abbreviation_canton_and_fl'], bind='legend')
chart = base.mark_line().encode(
alt.X('date', title='Date'),
alt.Y(column,
title=title, scale=alt.Scale(type='linear')),
color=alt.Color('abbreviation_canton_and_fl', legend=alt.Legend(title="Canton")),
tooltip=[alt.Tooltip('abbreviation_canton_and_fl',title='Canton'),
alt.Tooltip('ncumul_conf',title=tooltip_title),
alt.Tooltip('date',title='Date')],
opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
).add_selection(
selection
)
return chart
```
%% Cell type:markdown id: tags:
### Total cases
%% Cell type:code id: tags:
``` python
base = alt.Chart(df.where(df.ncumul_conf>0).dropna(subset=['abbreviation_canton_and_fl']))
base.configure_header(titleFontSize=25)
base.configure_axis(labelFontSize=15, titleFontSize=15)
cumul = generate_canton_chart('ncumul_conf', 'Cases', 'Cases')
cumul_10k = generate_canton_chart('ncumul_conf_10k', 'Cases per 10k population', 'Cases/10k')
chart = alt.hconcat(
cumul, cumul_10k, title='Covid-19 cases in Switzerland by Canton'
).configure_title(
anchor='middle'
)
display(chart)
if save_figures:
chart.save(str(Path(figures_path) / 'switzerland-cases-by-canton.html'))
display(html_credits)
```
%% Output
%% Cell type:markdown id: tags:
### Deaths
%% Cell type:code id: tags:
``` python
base = alt.Chart(df.where(df.ncumul_deceased>0).dropna(subset=['abbreviation_canton_and_fl']))
base.configure_header(titleFontSize=25)
base.configure_axis(labelFontSize=15, titleFontSize=15)
deaths = generate_canton_chart('ncumul_deceased', 'Deaths', 'Deaths')
deaths_10k = generate_canton_chart('ncumul_deceased_10k', 'Deaths per 10k population', 'Deaths/10k')
chart = alt.hconcat(
deaths, deaths_10k, title='Covid-19 deaths in Switzerland by Canton'
).configure_title(
anchor='middle'
)
display(chart)
display(html_credits)
if save_figures:
chart.save(str(Path(figures_path) / 'switzerland-deaths-by-canton.html'))
```
%% Output
%% Cell type:code id: tags:
``` python
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment