Skip to content
Snippets Groups Projects
Commit 0ce36734 authored by Rok Roškar's avatar Rok Roškar
Browse files

ch-notebook remove png

parent be5ec232
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
from pathlib import Path from pathlib import Path
import altair as alt import altair as alt
import pandas as pd import pandas as pd
from IPython.display import display, HTML from IPython.display import display, HTML
``` ```
%% Cell type:code id: tags:parameters %% Cell type:code id: tags:parameters
``` python ``` python
save_figures = False save_figures = False
data_path = '../../data/openzh-covid-19' data_path = '../../data/openzh-covid-19'
figures_path = '../../figures' figures_path = '../../figures'
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
html_credits=HTML(''' html_credits=HTML('''
<p style="font-size: smaller">Data Sources: <p style="font-size: smaller">Data Sources:
<a href="https://github.com/openZH/covid_19">OpenData Zuerich</a>, <a href="https://github.com/openZH/covid_19">OpenData Zuerich</a>,
<a href="https://www.bfs.admin.ch">Federal Statistical Office</a> <a href="https://www.bfs.admin.ch">Federal Statistical Office</a>
<br> <br>
Analysis: Analysis:
<a href="https://renkulab.io/projects/covid-19/covid-19-public-data">Covid-19 Public Data Collaboration Project</a> <a href="https://renkulab.io/projects/covid-19/covid-19-public-data">Covid-19 Public Data Collaboration Project</a>
</p>''') </p>''')
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Read in the data ## Read in the data
We have two datasets for Switzerland - the COVID-19 dataset from https://github.com/openZH/covid_19 and the population statistics by age and canton. We can read both of these in to dataframes: We have two datasets for Switzerland - the COVID-19 dataset from https://github.com/openZH/covid_19 and the population statistics by age and canton. We can read both of these in to dataframes:
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# read in cantonal data and produce one dataframe # read in cantonal data and produce one dataframe
df_list = [] df_list = []
for f in Path(data_path).glob('COVID19_Fallzahlen_Kanton_*total.csv'): for f in Path(data_path).glob('COVID19_Fallzahlen_Kanton_*total.csv'):
df_list.append(pd.read_csv(f)) df_list.append(pd.read_csv(f))
df = pd.concat(df_list) df = pd.concat(df_list)
df['date'] = pd.to_datetime(df['date'], dayfirst=True) df['date'] = pd.to_datetime(df['date'], dayfirst=True)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# read in population data # read in population data
df_pop = pd.read_excel( df_pop = pd.read_excel(
Path(data_path) / '../ch-population-statistics/ch-population-by-age-canton.xls', Path(data_path) / '../ch-population-statistics/ch-population-by-age-canton.xls',
header=1, header=1,
skipfooter=5 skipfooter=5
) )
df_pop = df_pop.where( df_pop = df_pop.where(
df_pop.Region.str.startswith('-') df_pop.Region.str.startswith('-')
).dropna().sort_values('Region').reset_index(drop=True) ).dropna().sort_values('Region').reset_index(drop=True)
# match the cantons in the two datasets # match the cantons in the two datasets
df_pop['abbreviation_canton_and_fl'] = ['AG', 'AI', 'AR', 'BL', 'BS', 'BE', 'FR', 'GE', 'GL', 'GR', 'JU', 'LU', 'NE', 'NW', 'OW', 'SH', 'SZ', 'SO', 'SG', 'TG', 'TI', 'UR', 'VS', 'VD', 'ZG', 'ZH'] df_pop['abbreviation_canton_and_fl'] = ['AG', 'AI', 'AR', 'BL', 'BS', 'BE', 'FR', 'GE', 'GL', 'GR', 'JU', 'LU', 'NE', 'NW', 'OW', 'SH', 'SZ', 'SO', 'SG', 'TG', 'TI', 'UR', 'VS', 'VD', 'ZG', 'ZH']
pop_d = df_pop[['abbreviation_canton_and_fl', 'Total']].set_index('abbreviation_canton_and_fl').to_dict() pop_d = df_pop[['abbreviation_canton_and_fl', 'Total']].set_index('abbreviation_canton_and_fl').to_dict()
# calculate cases and deaths per 10k # calculate cases and deaths per 10k
for x in ['conf', 'deceased']: for x in ['conf', 'deceased']:
df[f'ncumul_{x}_10k'] = df.apply( df[f'ncumul_{x}_10k'] = df.apply(
lambda row: row[f'ncumul_{x}']/pop_d['Total'][row.abbreviation_canton_and_fl]*10000, axis=1 lambda row: row[f'ncumul_{x}']/pop_d['Total'][row.abbreviation_canton_and_fl]*10000, axis=1
) )
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# display the dataframe # display the dataframe
df.head() df.head()
``` ```
%% Output %% Output
date time abbreviation_canton_and_fl ncumul_tested ncumul_conf \ date time abbreviation_canton_and_fl ncumul_tested ncumul_conf \
0 2020-02-28 NaN VS NaN 1.0 0 2020-02-28 NaN VS NaN 1.0
1 2020-03-03 NaN VS NaN 2.0 1 2020-03-03 NaN VS NaN 2.0
2 2020-03-05 NaN VS NaN 3.0 2 2020-03-05 NaN VS NaN 3.0
3 2020-03-06 NaN VS NaN 5.0 3 2020-03-06 NaN VS NaN 5.0
4 2020-03-08 NaN VS NaN 7.0 4 2020-03-08 NaN VS NaN 7.0
ncumul_hosp ncumul_ICU ncumul_vent ncumul_released ncumul_deceased \ ncumul_hosp ncumul_ICU ncumul_vent ncumul_released ncumul_deceased \
0 NaN NaN NaN NaN NaN 0 NaN NaN NaN NaN NaN
1 NaN NaN NaN NaN NaN 1 NaN NaN NaN NaN NaN
2 NaN NaN NaN NaN NaN 2 NaN NaN NaN NaN NaN
3 NaN NaN NaN NaN NaN 3 NaN NaN NaN NaN NaN
4 NaN NaN NaN NaN NaN 4 NaN NaN NaN NaN NaN
source ncumul_ICF \ source ncumul_ICF \
0 https://vs.ch/documents/529400/6767345/2020+02... NaN 0 https://vs.ch/documents/529400/6767345/2020+02... NaN
1 https://vs.ch/documents/529400/6789273/2020+03... NaN 1 https://vs.ch/documents/529400/6789273/2020+03... NaN
2 https://www.vs.ch/de/web/coronavirus/info?p_p_... NaN 2 https://www.vs.ch/de/web/coronavirus/info?p_p_... NaN
3 https://www.vs.ch/de/web/coronavirus/info?p_p_... NaN 3 https://www.vs.ch/de/web/coronavirus/info?p_p_... NaN
4 https://www.vs.ch/de/web/coronavirus/info?p_p_... NaN 4 https://www.vs.ch/de/web/coronavirus/info?p_p_... NaN
ncumul_ICU_intub TotalPosTests1 TotalCured ncumul_conf_per10k \ ncumul_ICU_intub TotalPosTests1 TotalCured ncumul_conf_per10k \
0 NaN NaN NaN 0.029074 0 NaN NaN NaN 0.029074
1 NaN NaN NaN 0.058147 1 NaN NaN NaN 0.058147
2 NaN NaN NaN 0.087221 2 NaN NaN NaN 0.087221
3 NaN NaN NaN 0.145368 3 NaN NaN NaN 0.145368
4 NaN NaN NaN 0.203515 4 NaN NaN NaN 0.203515
ncumul_conf_10k ncumul_conf_10k
0 0.029074 0 0.029074
1 0.058147 1 0.058147
2 0.087221 2 0.087221
3 0.145368 3 0.145368
4 0.203515 4 0.203515
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Plot the available data ## Plot the available data
Below we make plots of total cases, total cases per 10k population and total deaths. Below we make plots of total cases, total cases per 10k population and total deaths.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
def generate_canton_chart(column, title, tooltip_title): def generate_canton_chart(column, title, tooltip_title):
"""Produce a canton chart given a column name""" """Produce a canton chart given a column name"""
selection = alt.selection_multi(fields=['abbreviation_canton_and_fl'], bind='legend') selection = alt.selection_multi(fields=['abbreviation_canton_and_fl'], bind='legend')
chart = base.mark_line().encode( chart = base.mark_line().encode(
alt.X('date', title='Date'), alt.X('date', title='Date'),
alt.Y(column, alt.Y(column,
title=title, scale=alt.Scale(type='linear')), title=title, scale=alt.Scale(type='linear')),
color=alt.Color('abbreviation_canton_and_fl', legend=alt.Legend(title="Canton")), color=alt.Color('abbreviation_canton_and_fl', legend=alt.Legend(title="Canton")),
tooltip=[alt.Tooltip('abbreviation_canton_and_fl',title='Canton'), tooltip=[alt.Tooltip('abbreviation_canton_and_fl',title='Canton'),
alt.Tooltip('ncumul_conf',title=tooltip_title), alt.Tooltip('ncumul_conf',title=tooltip_title),
alt.Tooltip('date',title='Date')], alt.Tooltip('date',title='Date')],
opacity=alt.condition(selection, alt.value(1), alt.value(0.2)) opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
).add_selection( ).add_selection(
selection selection
) )
return chart return chart
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Total cases ### Total cases
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
base = alt.Chart(df.where(df.ncumul_conf>0).dropna(subset=['abbreviation_canton_and_fl'])) base = alt.Chart(df.where(df.ncumul_conf>0).dropna(subset=['abbreviation_canton_and_fl']))
base.configure_header(titleFontSize=25) base.configure_header(titleFontSize=25)
base.configure_axis(labelFontSize=15, titleFontSize=15) base.configure_axis(labelFontSize=15, titleFontSize=15)
cumul = generate_canton_chart('ncumul_conf', 'Cases', 'Cases') cumul = generate_canton_chart('ncumul_conf', 'Cases', 'Cases')
cumul_10k = generate_canton_chart('ncumul_conf_10k', 'Cases per 10k population', 'Cases/10k') cumul_10k = generate_canton_chart('ncumul_conf_10k', 'Cases per 10k population', 'Cases/10k')
chart = alt.hconcat( chart = alt.hconcat(
cumul, cumul_10k, title='Covid-19 cases in Switzerland by Canton' cumul, cumul_10k, title='Covid-19 cases in Switzerland by Canton'
).configure_title( ).configure_title(
anchor='middle' anchor='middle'
) )
display(chart) display(chart)
if save_figures: if save_figures:
chart.save(str(Path(figures_path) / 'switzerland-cases-by-canton.png'), webdriver='firefox')
chart.save(str(Path(figures_path) / 'switzerland-cases-by-canton.html')) chart.save(str(Path(figures_path) / 'switzerland-cases-by-canton.html'))
display(html_credits) display(html_credits)
``` ```
%% Output %% Output
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Deaths ### Deaths
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
base = alt.Chart(df.where(df.ncumul_deceased>0).dropna(subset=['abbreviation_canton_and_fl'])) base = alt.Chart(df.where(df.ncumul_deceased>0).dropna(subset=['abbreviation_canton_and_fl']))
base.configure_header(titleFontSize=25) base.configure_header(titleFontSize=25)
base.configure_axis(labelFontSize=15, titleFontSize=15) base.configure_axis(labelFontSize=15, titleFontSize=15)
deaths = generate_canton_chart('ncumul_deceased', 'Deaths', 'Deaths') deaths = generate_canton_chart('ncumul_deceased', 'Deaths', 'Deaths')
deaths_10k = generate_canton_chart('ncumul_deceased_10k', 'Deaths per 10k population', 'Deaths/10k') deaths_10k = generate_canton_chart('ncumul_deceased_10k', 'Deaths per 10k population', 'Deaths/10k')
chart = alt.hconcat( chart = alt.hconcat(
deaths, deaths_10k, title='Covid-19 deaths in Switzerland by Canton' deaths, deaths_10k, title='Covid-19 deaths in Switzerland by Canton'
).configure_title( ).configure_title(
anchor='middle' anchor='middle'
) )
display(chart) display(chart)
display(html_credits) display(html_credits)
if save_figures: if save_figures:
chart.save(str(Path(figures_path) / 'switzerland-deaths-by-canton.png'), webdriver='firefox')
chart.save(str(Path(figures_path) / 'switzerland-deaths-by-canton.html')) chart.save(str(Path(figures_path) / 'switzerland-deaths-by-canton.html'))
``` ```
%% Output %% Output
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment