Skip to content
Snippets Groups Projects
Commit 26e8a499 authored by Andreas Bleuler's avatar Andreas Bleuler
Browse files

Add examples for Italian data

squashed commits:

renku dataset: updated 3 files and deleted 0 files

renku dataset update covid-19-italy

Install voila and its dependencies

Install matplotlib

Add example notebook and example dashboard for Italian data
parent 1fbe12e0
No related branches found
No related tags found
No related merge requests found
......@@ -33,6 +33,12 @@ RUN conda env update -q -f /tmp/environment.yml && \
conda clean -y --all && \
conda env export -n "root" && \
jupyter lab build
RUN /opt/conda/bin/pip install ipywidgets voila
RUN jupyter labextension install @jupyter-voila/jupyterlab-preview && \
jupyter labextension install @jupyter-widgets/jupyterlab-manager
USER ${NB_USER}
# install the R dependencies
......
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
```
%% Cell type:code id: tags:
``` python
import ipywidgets as widgets
import matplotlib.pyplot as plt
import pandas as pd
from italy_utils import *
```
%% Cell type:code id: tags:
``` python
data_folder = "../../../data/covid-19-italy/"
```
%% Cell type:code id: tags:
``` python
df_provinces = prepare_dataframe(
data_folder,
"dpc-covid19-ita-province.csv",
"dati-province-description.json",
use_time_index=True
)
province_dict = get_province_structure(df_provinces)
```
%% Cell type:code id: tags:
``` python
def get_growth_factor_series(province, df, N_min=1000):
ts = df.loc[
(df['province'] == province) & \
(df['total_cases'] >= N_min)
] \
['total_cases'] \
.rolling('3d') \
.mean() \
.pct_change() \
.add(1.0)
return ts.iloc[1:]
```
%% Cell type:code id: tags:
``` python
def plot_growth_factors(provinces, df, N_min=1000):
if len(provinces) == 0:
return
plt.figure(figsize=(9, 6))
datemin = datemax = df.index[-1]
for province in provinces:
data = get_growth_factor_series(province, df, N_min=N_min)
if len(data) >= 1:
data.plot(label=province)
datemin = min(data.index[0], datemin)
ax = plt.gca()
plt.legend(loc='upper left', bbox_to_anchor=(1.05, 1.0), frameon=False)
plt.plot([datemin, datemax], [1,1], color='gray', alpha=0.2)
plt.ylim(0.95, ax.get_ylim()[1])
plt.title('Daily growth rate of total cases per province')
plt.xlabel('');
def plot_total_cases(provinces, df, N_min=500):
if len(provinces) == 0:
return
plt.figure(figsize=(9, 6))
for province in provinces:
data = df.loc[
(df['province'] == province) & \
(df['total_cases'] >= N_min)
] \
['total_cases'] \
.rolling('1d') \
.mean() \
.add(1.0)
if len(data) >= 1:
data.plot(label=province, logy=True)
ax = plt.gca()
plt.legend(loc='upper left', bbox_to_anchor=(1.05, 1.0), frameon=False)
plt.title('Total cases per province')
plt.xlabel('');
def make_plots(provinces, df):
plot_growth_factors(provinces, df)
plot_total_cases(provinces, df)
```
%% Cell type:code id: tags:
``` python
def get_province_selector(region):
return widgets.SelectMultiple(
options=province_dict[region],
value=[],
description='Provinces:',
disabled=region_selector.value is None
)
def get_interactive_widgets(region):
widgets.interact(lambda prov: make_plots(list(prov), df_provinces), prov=get_province_selector(region_selector.value));
```
%% Cell type:code id: tags:
``` python
region_selector = widgets.Dropdown(
options=[key for key in province_dict],
value='Lombardia',
description='Region:',
disabled=False,
)
widgets.interact(lambda reg: get_interactive_widgets(reg), reg=region_selector);
```
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
```
%% Output
The autoreload extension is already loaded. To reload it, use:
%reload_ext autoreload
%% Cell type:code id: tags:
``` python
import ipywidgets as widgets
import matplotlib.pyplot as plt
import pandas as pd
from italy_utils import *
```
%% Cell type:code id: tags:
``` python
data_folder = "../../../data/covid-19-italy/"
```
%% Cell type:code id: tags:
``` python
df_national_trend = prepare_dataframe(
data_folder,
"dpc-covid19-ita-andamento-nazionale.csv",
"dati-andamento-nazionale-description.json",
use_time_index=True
)
df_national_trend.tail()
```
%% Output
country_code hospitalised_with_symptoms intensive_care \
date
2020-03-18 ITA 14363 2257
2020-03-19 ITA 15757 2498
2020-03-20 ITA 16020 2655
2020-03-21 ITA 17708 2857
2020-03-22 ITA 19846 3009
total_hospitalised home_confinement total_current_positive \
date
2020-03-18 16620 12090 28710
2020-03-19 18255 14935 33190
2020-03-20 18675 19185 37860
2020-03-21 20565 22116 42681
2020-03-22 22855 23783 46638
new_current_positive recovered dead total_cases tests
date
2020-03-18 2648 4025 2978 35713 165541
2020-03-19 4480 4440 3405 41035 182777
2020-03-20 4670 5129 4032 47021 206886
2020-03-21 4821 6072 4825 53578 233222
2020-03-22 3957 7024 5476 59138 258402
%% Cell type:code id: tags:
``` python
df_national_trend["New cases per day"] = df_national_trend["total_cases"].diff().rolling('3d').mean()
df_national_trend.plot(y="New cases per day", kind="bar", logy=True, legend=False);
plt.title('New cases per day in Italy, 3 day backwards mean');
```
%% Output
%% Cell type:code id: tags:
``` python
df_provinces = prepare_dataframe(
data_folder,
"dpc-covid19-ita-province.csv",
"dati-province-description.json",
use_time_index=True
)
df_provinces.head()
```
%% Output
country_code region_code region province_code \
date
2020-02-24 ITA 13 Abruzzo 69
2020-02-24 ITA 13 Abruzzo 66
2020-02-24 ITA 13 Abruzzo 68
2020-02-24 ITA 13 Abruzzo 67
2020-02-24 ITA 13 Abruzzo 979
province province_short latitude \
date
2020-02-24 Chieti CH 42.351032
2020-02-24 L'Aquila AQ 42.351222
2020-02-24 Pescara PE 42.464584
2020-02-24 Teramo TE 42.658918
2020-02-24 In fase di definizione/aggiornamento NaN 0.000000
longitude total_cases
date
2020-02-24 14.167546 0
2020-02-24 13.398438 0
2020-02-24 14.213648 0
2020-02-24 13.704400 0
2020-02-24 0.000000 0
%% Cell type:code id: tags:
``` python
df_regions = prepare_dataframe(
data_folder,
"dpc-covid19-ita-regioni.csv",
"dati-regioni-description.json",
use_time_index=True
)
df_provinces.head()
```
%% Output
country_code region_code region province_code \
date
2020-02-24 ITA 13 Abruzzo 69
2020-02-24 ITA 13 Abruzzo 66
2020-02-24 ITA 13 Abruzzo 68
2020-02-24 ITA 13 Abruzzo 67
2020-02-24 ITA 13 Abruzzo 979
province province_short latitude \
date
2020-02-24 Chieti CH 42.351032
2020-02-24 L'Aquila AQ 42.351222
2020-02-24 Pescara PE 42.464584
2020-02-24 Teramo TE 42.658918
2020-02-24 In fase di definizione/aggiornamento NaN 0.000000
longitude total_cases
date
2020-02-24 14.167546 0
2020-02-24 13.398438 0
2020-02-24 14.213648 0
2020-02-24 13.704400 0
2020-02-24 0.000000 0
import json
import os
import pandas as pd
def translate_columns(data_folder, df, description_filename):
description_file_path = os.path.join(data_folder, description_filename)
with open(description_file_path, 'r') as description_file:
decoded_data = description_file.read().encode().decode('utf-8-sig')
descriptions = json.loads(decoded_data)
descriptions = { column_dict['Nome campo']: column_dict for column_dict in descriptions}
df.rename(columns=lambda col: descriptions[col]['Field name'], inplace=True)
return df
def set_time_index(df, drop_hour=True):
if drop_hour:
lambda_func = lambda x: x.split(' ')[0]
else:
labda_func = lambda x: x
timestamp = pd.DatetimeIndex(df['date'].apply(lambda_func))
df.set_index(timestamp, inplace=True)
del df['date']
return df
def prepare_dataframe(data_folder, df_filename, description_filename, use_time_index=False):
data_file_path = os.path.join(data_folder, df_filename)
df = pd.read_csv(data_file_path)
df = translate_columns(data_folder, df, description_filename)
if use_time_index:
df = set_time_index(df)
return df
def get_province_structure(df_provinces):
"""Extract the province/region structure from the province dataframe."""
def get_province_list(region):
"""Get list of provinces for a given region."""
provinces = set(df_provinces.loc[df_provinces['region']==region]['province'])
provinces.discard('In fase di definizione/aggiornamento')
return list(provinces)
regions = df_provinces['region'].unique()
return {region: get_province_list(region) for region in regions}
\ No newline at end of file
aiohttp==3.6.2
alembic==1.4.0
altair==4.0.1
ansiwrap==0.8.4
appnope==0.1.0
argcomplete==1.11.1
async-generator==1.10
async-timeout==3.0.1
attrs==19.3.0
backcall==0.1.0
bleach==3.1.1
......@@ -15,6 +18,7 @@ Click==7.0
colorama==0.4.3
-e src/covid-19/covid_19_dashboard
cryptography==2.8
cycler==0.10.0
decorator==4.4.1
defusedxml==0.6.0
distro==1.4.0
......@@ -34,14 +38,18 @@ jsonschema==3.2.0
jupyter-client==6.0.0
jupyter-core==4.6.3
jupyter-rsession-proxy==1.1
jupyter-server-proxy==1.2.0
jupyter-telemetry==0.0.5
jupyterhub==0.9.6
jupyterlab==1.2.5
jupyterlab-git==0.9.0
jupyterlab-server==1.0.6
kiwisolver==1.1.0
Mako==1.1.0
MarkupSafe==1.1.1
matplotlib==3.2.1
mistune==0.8.4
multidict==4.7.5
nbconvert==5.6.1
nbdime==1.1.0
nbformat==5.0.4
......@@ -66,6 +74,7 @@ pycurl==7.43.0.5
Pygments==2.5.2
PyJWT==1.7.1
pyOpenSSL==19.1.0
pyparsing==2.4.6
pyrsistent==0.15.7
PySocks==1.7.1
python-dateutil==2.8.1
......@@ -76,9 +85,10 @@ pytz==2019.3
PyYAML==5.3
pyzmq==19.0.0
requests==2.23.0
ruamel-yaml==0.15.80
ruamel.yaml==0.15.80
ruamel.yaml.clib==0.2.0
Send2Trash==1.5.0
simpervisor==0.3
six==1.14.0
smmap==3.0.1
SQLAlchemy==1.3.13
......@@ -95,4 +105,5 @@ userpath==1.3.0
vega-datasets==0.8.0
wcwidth==0.1.8
webencodings==0.5.1
yarl==1.4.2
zipp==3.0.0
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment