Skip to content
Snippets Groups Projects
Commit 0ce36734 authored by Rok Roškar's avatar Rok Roškar
Browse files

ch-notebook remove png

parent be5ec232
No related branches found
No related tags found
1 merge request!43Swiss papermill pipeline
Pipeline #18554 passed with stage
in 20 seconds
%% Cell type:code id: tags:
``` python
from pathlib import Path
import altair as alt
import pandas as pd
from IPython.display import display, HTML
```
%% Cell type:code id: tags:parameters
``` python
save_figures = False
data_path = '../../data/openzh-covid-19'
figures_path = '../../figures'
```
%% Cell type:code id: tags:
``` python
html_credits=HTML('''
<p style="font-size: smaller">Data Sources:
<a href="https://github.com/openZH/covid_19">OpenData Zuerich</a>,
<a href="https://www.bfs.admin.ch">Federal Statistical Office</a>
<br>
Analysis:
<a href="https://renkulab.io/projects/covid-19/covid-19-public-data">Covid-19 Public Data Collaboration Project</a>
</p>''')
```
%% Cell type:markdown id: tags:
## Read in the data
We have two datasets for Switzerland - the COVID-19 dataset from https://github.com/openZH/covid_19 and the population statistics by age and canton. We can read both of these in to dataframes:
%% Cell type:code id: tags:
``` python
# read in cantonal data and produce one dataframe
df_list = []
for f in Path(data_path).glob('COVID19_Fallzahlen_Kanton_*total.csv'):
df_list.append(pd.read_csv(f))
df = pd.concat(df_list)
df['date'] = pd.to_datetime(df['date'], dayfirst=True)
```
%% Cell type:code id: tags:
``` python
# read in population data
df_pop = pd.read_excel(
Path(data_path) / '../ch-population-statistics/ch-population-by-age-canton.xls',
header=1,
skipfooter=5
)
df_pop = df_pop.where(
df_pop.Region.str.startswith('-')
).dropna().sort_values('Region').reset_index(drop=True)
# match the cantons in the two datasets
df_pop['abbreviation_canton_and_fl'] = ['AG', 'AI', 'AR', 'BL', 'BS', 'BE', 'FR', 'GE', 'GL', 'GR', 'JU', 'LU', 'NE', 'NW', 'OW', 'SH', 'SZ', 'SO', 'SG', 'TG', 'TI', 'UR', 'VS', 'VD', 'ZG', 'ZH']
pop_d = df_pop[['abbreviation_canton_and_fl', 'Total']].set_index('abbreviation_canton_and_fl').to_dict()
# calculate cases and deaths per 10k
for x in ['conf', 'deceased']:
df[f'ncumul_{x}_10k'] = df.apply(
lambda row: row[f'ncumul_{x}']/pop_d['Total'][row.abbreviation_canton_and_fl]*10000, axis=1
)
```
%% Cell type:code id: tags:
``` python
# display the dataframe
df.head()
```
%% Output
date time abbreviation_canton_and_fl ncumul_tested ncumul_conf \
0 2020-02-28 NaN VS NaN 1.0
1 2020-03-03 NaN VS NaN 2.0
2 2020-03-05 NaN VS NaN 3.0
3 2020-03-06 NaN VS NaN 5.0
4 2020-03-08 NaN VS NaN 7.0
ncumul_hosp ncumul_ICU ncumul_vent ncumul_released ncumul_deceased \
0 NaN NaN NaN NaN NaN
1 NaN NaN NaN NaN NaN
2 NaN NaN NaN NaN NaN
3 NaN NaN NaN NaN NaN
4 NaN NaN NaN NaN NaN
source ncumul_ICF \
0 https://vs.ch/documents/529400/6767345/2020+02... NaN
1 https://vs.ch/documents/529400/6789273/2020+03... NaN
2 https://www.vs.ch/de/web/coronavirus/info?p_p_... NaN
3 https://www.vs.ch/de/web/coronavirus/info?p_p_... NaN
4 https://www.vs.ch/de/web/coronavirus/info?p_p_... NaN
ncumul_ICU_intub TotalPosTests1 TotalCured ncumul_conf_per10k \
0 NaN NaN NaN 0.029074
1 NaN NaN NaN 0.058147
2 NaN NaN NaN 0.087221
3 NaN NaN NaN 0.145368
4 NaN NaN NaN 0.203515
ncumul_conf_10k
0 0.029074
1 0.058147
2 0.087221
3 0.145368
4 0.203515
%% Cell type:markdown id: tags:
## Plot the available data
Below we make plots of total cases, total cases per 10k population and total deaths.
%% Cell type:code id: tags:
``` python
def generate_canton_chart(column, title, tooltip_title):
"""Produce a canton chart given a column name"""
selection = alt.selection_multi(fields=['abbreviation_canton_and_fl'], bind='legend')
chart = base.mark_line().encode(
alt.X('date', title='Date'),
alt.Y(column,
title=title, scale=alt.Scale(type='linear')),
color=alt.Color('abbreviation_canton_and_fl', legend=alt.Legend(title="Canton")),
tooltip=[alt.Tooltip('abbreviation_canton_and_fl',title='Canton'),
alt.Tooltip('ncumul_conf',title=tooltip_title),
alt.Tooltip('date',title='Date')],
opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
).add_selection(
selection
)
return chart
```
%% Cell type:markdown id: tags:
### Total cases
%% Cell type:code id: tags:
``` python
base = alt.Chart(df.where(df.ncumul_conf>0).dropna(subset=['abbreviation_canton_and_fl']))
base.configure_header(titleFontSize=25)
base.configure_axis(labelFontSize=15, titleFontSize=15)
cumul = generate_canton_chart('ncumul_conf', 'Cases', 'Cases')
cumul_10k = generate_canton_chart('ncumul_conf_10k', 'Cases per 10k population', 'Cases/10k')
chart = alt.hconcat(
cumul, cumul_10k, title='Covid-19 cases in Switzerland by Canton'
).configure_title(
anchor='middle'
)
display(chart)
if save_figures:
chart.save(str(Path(figures_path) / 'switzerland-cases-by-canton.png'), webdriver='firefox')
chart.save(str(Path(figures_path) / 'switzerland-cases-by-canton.html'))
display(html_credits)
```
%% Output
%% Cell type:markdown id: tags:
### Deaths
%% Cell type:code id: tags:
``` python
base = alt.Chart(df.where(df.ncumul_deceased>0).dropna(subset=['abbreviation_canton_and_fl']))
base.configure_header(titleFontSize=25)
base.configure_axis(labelFontSize=15, titleFontSize=15)
deaths = generate_canton_chart('ncumul_deceased', 'Deaths', 'Deaths')
deaths_10k = generate_canton_chart('ncumul_deceased_10k', 'Deaths per 10k population', 'Deaths/10k')
chart = alt.hconcat(
deaths, deaths_10k, title='Covid-19 deaths in Switzerland by Canton'
).configure_title(
anchor='middle'
)
display(chart)
display(html_credits)
if save_figures:
chart.save(str(Path(figures_path) / 'switzerland-deaths-by-canton.png'), webdriver='firefox')
chart.save(str(Path(figures_path) / 'switzerland-deaths-by-canton.html'))
```
%% Output
%% Cell type:code id: tags:
``` python
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment