Skip to content
Snippets Groups Projects
Commit cfddccd6 authored by Rok Roškar's avatar Rok Roškar
Browse files

using altair for italy notebooks

parent 449652dc
No related branches found
No related tags found
1 merge request!76using altair for italy notebooks
Pipeline #20594 passed with stages
in 10 minutes and 50 seconds
......@@ -58,8 +58,8 @@ The environment image allows you to work in Python or R in JupyterLab or RStudio
<td><a href="https://github.com/pcm-dpc/COVID-19">Covid-19 data for Italy</a></td>
<td><a href="https://renkulab.io/projects/covid-19/covid-19-public-data/datasets/286c58b1-dbbc-4caa-a23a-fcb001d5ac51/">covid-19-italy</a></td>
<td><code>data/covid-19-italy</code></td>
<td><a href="https://renkulab.io/projects/covid-19/covid-19-public-data/files/blob/notebooks/examples/italy-examples/italy-notebook-example.ipynb">notebook</a>,
<a href="https://renkulab.io/projects/covid-19/covid-19-public-data/files/blob/notebooks/examples/italy-examples/italy-dashboard-example.ipynb">dashboard</a></td>
<td><a href="https://renkulab.io/projects/covid-19/covid-19-public-data/files/blob/notebooks/examples/italy-examples/italy-notebook-example.ipynb">notebook</a>
</td>
</tr>
<tr>
<td><a href="https://github.com/itoledor/coronavirus.git">Covid-19 data for Chile</a></td>
......
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
```
%% Cell type:code id: tags:
``` python
import ipywidgets as widgets
import matplotlib.pyplot as plt
import pandas as pd
from italy_utils import *
```
%% Cell type:code id: tags:
``` python
data_folder = "../../../data/covid-19-italy/"
```
%% Cell type:code id: tags:
``` python
df_provinces = prepare_dataframe(
data_folder,
"dpc-covid19-ita-province.csv",
"dati-province-description.json",
use_time_index=True
)
province_dict = get_province_structure(df_provinces)
```
%% Output
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-4-98cd8ebec933> in <module>
3 "dpc-covid19-ita-province.csv",
4 "dati-province-description.json",
----> 5 use_time_index=True
6 )
7
/work/covid-19-public-data/notebooks/examples/italy-examples/italy_utils.py in prepare_dataframe(data_folder, df_filename, description_filename, use_time_index)
29 data_file_path = os.path.join(data_folder, df_filename)
30 df = pd.read_csv(data_file_path)
---> 31 df = translate_columns(data_folder, df, description_filename)
32 if use_time_index:
33 df = set_time_index(df)
/work/covid-19-public-data/notebooks/examples/italy-examples/italy_utils.py in translate_columns(data_folder, df, description_filename)
12 descriptions = { column_dict['Nome campo']: column_dict for column_dict in descriptions}
13
---> 14 df.rename(columns=lambda col: descriptions[col]['Field name'], inplace=True)
15 return df
16
/opt/conda/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
225 @wraps(func)
226 def wrapper(*args, **kwargs) -> Callable[..., Any]:
--> 227 return func(*args, **kwargs)
228
229 kind = inspect.Parameter.POSITIONAL_OR_KEYWORD
/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py in rename(self, mapper, index, columns, axis, copy, inplace, level, errors)
4131 inplace=inplace,
4132 level=level,
-> 4133 errors=errors,
4134 )
4135
/opt/conda/lib/python3.7/site-packages/pandas/core/generic.py in rename(self, mapper, index, columns, axis, copy, inplace, level, errors)
1098
1099 result._data = result._data.rename_axis(
-> 1100 f, axis=baxis, copy=copy, level=level
1101 )
1102 result._clear_item_cache()
/opt/conda/lib/python3.7/site-packages/pandas/core/internals/managers.py in rename_axis(self, mapper, axis, copy, level)
194 """
195 obj = self.copy(deep=copy)
--> 196 obj.set_axis(axis, _transform_index(self.axes[axis], mapper, level))
197 return obj
198
/opt/conda/lib/python3.7/site-packages/pandas/core/internals/managers.py in _transform_index(index, func, level)
1957 return MultiIndex.from_tuples(items, names=index.names)
1958 else:
-> 1959 items = [func(x) for x in index]
1960 return Index(items, name=index.name, tupleize_cols=False)
1961
/opt/conda/lib/python3.7/site-packages/pandas/core/internals/managers.py in <listcomp>(.0)
1957 return MultiIndex.from_tuples(items, names=index.names)
1958 else:
-> 1959 items = [func(x) for x in index]
1960 return Index(items, name=index.name, tupleize_cols=False)
1961
/work/covid-19-public-data/notebooks/examples/italy-examples/italy_utils.py in <lambda>(col)
12 descriptions = { column_dict['Nome campo']: column_dict for column_dict in descriptions}
13
---> 14 df.rename(columns=lambda col: descriptions[col]['Field name'], inplace=True)
15 return df
16
KeyError: 'note_it'
%% Cell type:code id: tags:
``` python
def get_growth_factor_series(province, df, N_min=1000):
ts = df.loc[
(df['province'] == province) & \
(df['total_cases'] >= N_min)
] \
['total_cases'] \
.rolling('3d') \
.mean() \
.pct_change() \
.add(1.0)
return ts.iloc[1:]
```
%% Cell type:code id: tags:
``` python
def plot_growth_factors(provinces, df, N_min=1000):
if len(provinces) == 0:
return
plt.figure(figsize=(9, 6))
datemin = datemax = df.index[-1]
for province in provinces:
data = get_growth_factor_series(province, df, N_min=N_min)
if len(data) >= 1:
data.plot(label=province)
datemin = min(data.index[0], datemin)
ax = plt.gca()
plt.legend(loc='upper left', bbox_to_anchor=(1.05, 1.0), frameon=False)
plt.plot([datemin, datemax], [1,1], color='gray', alpha=0.2)
plt.ylim(0.95, ax.get_ylim()[1])
plt.title('Daily growth rate of total cases per province')
plt.xlabel('');
def plot_total_cases(provinces, df, N_min=500):
if len(provinces) == 0:
return
plt.figure(figsize=(9, 6))
for province in provinces:
data = df.loc[
(df['province'] == province) & \
(df['total_cases'] >= N_min)
] \
['total_cases'] \
.rolling('1d') \
.mean() \
.add(1.0)
if len(data) >= 1:
data.plot(label=province, logy=True)
ax = plt.gca()
plt.legend(loc='upper left', bbox_to_anchor=(1.05, 1.0), frameon=False)
plt.title('Total cases per province')
plt.xlabel('');
def make_plots(provinces, df):
plot_growth_factors(provinces, df)
plot_total_cases(provinces, df)
```
%% Cell type:code id: tags:
``` python
def get_province_selector(region):
return widgets.SelectMultiple(
options=province_dict[region],
value=[],
description='Provinces:',
disabled=region_selector.value is None
)
def get_interactive_widgets(region):
widgets.interact(lambda prov: make_plots(list(prov), df_provinces), prov=get_province_selector(region_selector.value));
```
%% Cell type:code id: tags:
``` python
region_selector = widgets.Dropdown(
options=[key for key in province_dict],
value='Lombardia',
description='Region:',
disabled=False,
)
widgets.interact(lambda reg: get_interactive_widgets(reg), reg=region_selector);
```
......
......@@ -7,10 +7,8 @@ def translate_columns(data_folder, df, description_filename):
description_file_path = os.path.join(data_folder, description_filename)
with open(description_file_path, 'r') as description_file:
decoded_data = description_file.read().encode().decode('utf-8-sig')
descriptions = json.loads(decoded_data)
descriptions = { column_dict['Nome campo']: column_dict for column_dict in descriptions}
descriptions = json.loads(description_file.read().encode().decode('utf-8-sig'))
descriptions = { column_dict['Nome campo']: column_dict for column_dict in descriptions}
df.rename(columns=lambda col: descriptions[col]['Field name'], inplace=True)
return df
......@@ -28,6 +26,8 @@ def set_time_index(df, drop_hour=True):
def prepare_dataframe(data_folder, df_filename, description_filename, use_time_index=False):
data_file_path = os.path.join(data_folder, df_filename)
df = pd.read_csv(data_file_path)
del df['note_it']
del df['note_en']
df = translate_columns(data_folder, df, description_filename)
if use_time_index:
df = set_time_index(df)
......
......@@ -91,6 +91,7 @@ Send2Trash==1.5.0
simpervisor==0.3
six==1.14.0
smmap==3.0.1
SPARQLWrapper==1.8.5
SQLAlchemy==1.3.13
tenacity==6.1.0
terminado==0.8.3
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment