Skip to content
Snippets Groups Projects
Commit 0c105a2f authored by Rok Roškar's avatar Rok Roškar
Browse files

chore: update italy notebook

parent c98aca3f
No related branches found
No related tags found
1 merge request!378000-update-notebooks
%% Cell type:markdown id: tags:
# COVID-19 Case data for Italy
Data from [Civil Protection of Italy](https://github.com/pcm-dpc/COVID-19).
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
```
%% Cell type:code id: tags:
``` python
import pandas as pd
import altair as alt
from pathlib import Path
from IPython.display import display, HTML
from covid_19_utils import helper, plotting
from covid_19_utils.converters import CaseConverter
from covid_19_utils.italy_utils import get_region_populations, prepare_dataframe
```
%% Cell type:code id: tags:
``` python
html_credits=HTML('''
<p style="font-size: smaller">Data Sources:
<a href="https://github.com/pcm-dpc/COVID-19">Italian Civil Protection</a>, <a href="https://wikidata.org">Wikidata</a>
<br>
Analysis and Visualization:
<a href="https://renkulab.io/projects/covid-19/covid-19-public-data">Covid-19 Public Data Collaboration Project</a>
</p>''')
```
%% Cell type:code id: tags:parameters
``` python
data_folder = "../data/covid-19-italy/"
atlas_folder = "../data/atlas"
```
%% Cell type:code id: tags:
``` python
converter = CaseConverter(atlas_folder)
```
%% Cell type:code id: tags:
``` python
df_national = converter.read_convert(Path(data_folder) / 'dpc-covid19-ita-andamento-nazionale.csv')
```
%% Cell type:code id: tags:
``` python
# compute daily new cases
df_national.reset_index(level=0, inplace=True)
df_national["New cases per day"] = df_national["positive"].diff()
```
%% Cell type:markdown id: tags:
## National summary of total cases and tests
%% Cell type:code id: tags:
``` python
# make the charts
base = alt.Chart(df_national).mark_bar(size=10).encode(alt.X('date'))
daily = base.encode(alt.Y('New cases per day'))
total = base.mark_line(color='orange', size=2).encode(alt.Y('positive', title="Cases (orange)"))
tests = base.encode(alt.Y('tested', title="Tests (blue)"))
totals = alt.layer(tests, total)
chart = alt.hconcat(totals, daily,
title='Total and daily tests and cases nationwide'
).configure_title(
anchor='middle'
)
display(chart)
display(html_credits)
```
%% Cell type:code id: tags:
``` python
df_regions = converter.read_convert(Path(data_folder) / 'dpc-covid19-ita-regioni.csv')
```
%% Cell type:code id: tags:
``` python
max_cases = df_regions.set_index('region_label').max(level='region_label')['positive'].reset_index()
sorted_regions = max_cases.sort_values(by='positive', ascending=False)['positive']
```
%% Cell type:markdown id: tags:
## Cases per region
%% Cell type:code id: tags:
``` python
# Total cases and deaths per 100k population
from datetime import datetime
start_date = datetime.fromisoformat('2020-08-01')
base = alt.Chart(df_regions).encode(alt.X('date'))
base = alt.Chart(df_regions.loc[(df_regions.date > start_date)]).encode(alt.X('date'))
death = base.mark_line(color='black').encode(alt.Y('deceased', title='Deaths'))
total = base.mark_bar(color='orange', size=5).encode(alt.Y('positive', title="Total cases")).properties(
width=150,
height=150)
totals = alt.layer(total, death)
chart = totals.facet(
facet='region_label',
columns=5,
align='each',
title='Total cases (orange) and deaths (black) by region',
).configure_title(
anchor='middle'
)
display(chart)
display(html_credits)
```
%% Cell type:code id: tags:
``` python
# Total cases and deaths per 100k population
base = alt.Chart(df_regions).encode(alt.X('date'))
base = alt.Chart(df_regions.loc[(df_regions.date > start_date)]).encode(alt.X('date'))
death = base.mark_line(color='black').encode(alt.Y('deceased_100k', title='Deaths'))
total = base.mark_bar(color='orange', size=5).encode(alt.Y('positive_100k', title="Total cases")).properties(
width=150,
height=150)
totals = alt.layer(total, death)
chart = totals.facet(facet='region_label',
columns=5,
align='each',
title='Total cases (orange) and deaths (black) by region per 100k population'
).configure_title(
anchor='middle'
)
display(chart)
display(html_credits)
```
%% Cell type:code id: tags:
``` python
# compute daily differences
tdf = df_regions.sort_values(['region_label', 'date'], ascending=[True, False]).set_index(['region_label', 'date'])
tdf = df_regions.loc[(df_regions.date > start_date)].sort_values(['region_label', 'date'], ascending=[True, False]).set_index(['region_label', 'date'])
diffs_df = tdf[['positive', 'deceased']].groupby(level='region_label').diff(periods=-1).dropna(how='all')
tdf_diff=tdf.join(diffs_df, rsuffix='_diff').reset_index()
```
%% Cell type:code id: tags:
``` python
# make the daily charts
base = alt.Chart(tdf_diff).encode(alt.X('date')).properties(
width=150,
height=150,
)
daily_new = base.mark_bar(color='orange').encode(alt.Y('positive_diff', title='N'))
daily_dead = base.mark_line(color='black').encode(alt.Y('deceased_diff'))
chart = (daily_new + daily_dead).facet(
facet='region_label',
columns=5,
align='each',
title='Daily new cases (orange) and deaths (black) by region',
).configure_title(
anchor='middle'
)
display(chart)
display(html_credits)
```
%% Cell type:markdown id: tags:
## Rates of growth
How quickly is the infection spreading? The charts below show data for all of the regions of Italy framed by some illustrative guiding lines to give an idea of how the rate of infection is evolving.
%% Cell type:code id: tags:
``` python
since_100th_case = helper.make_since_df(df_regions, column='positive', region_column='region_label')
base = alt.Chart(since_100th_case, title="Italy: total cases since 100th case").mark_line(interpolate='basis')
line_chart = plotting.make_region_since_chart(
base,
'positive',
'sinceDay0',
'region_label',
'Days since 100th case',
'Cumulative cases',
'Cases',
'Region'
).properties(
width=450,
height=450
)
rule_chart = plotting.make_rule_chart(
max_case=110000,
max_days=int(since_100th_case['sinceDay0'].max()),
pos_day=(12,50000),
pos_3days=(35,300000),
pos_week=(40,2000)
)
# add the total cases for italy to the chart
since_100th_case_all = helper.make_since_df(df_national, start_case=100, column='positive', region_column='country')
since_100th_case_all['country_label'] = 'Italy'
italy = alt.Chart(since_100th_case_all).mark_line(color='steelblue').encode(x='sinceDay0', y='positive')
italy += italy.mark_text(dy=-10).encode(text='country_label').transform_filter(f"datum['sinceDay0'] == {len(since_100th_case_all)-1}")
line_chart + rule_chart + italy
```
%% Cell type:code id: tags:
``` python
since_10th_death = helper.make_since_df(df_regions, start_case=10, column='deceased', region_column='region_label')
base = alt.Chart(since_10th_death, title="Italy: total deaths since 10th death")
line_chart = plotting.make_region_since_chart(
base,
'deceased',
'sinceDay0',
'region_label',
'Days since 10th death',
'Cumulative deaths',
'Deaths',
'Region'
).properties(
width=450,
height=450
)
rule_chart = plotting.make_rule_chart(
start_case=10,
max_case=50000,
max_days=int(since_10th_death['sinceDay0'].max()),
pos_day=(12,50000),
pos_3days=(35,20000),
pos_week=(40,100))
# add the total deaths for italy to the chart
since_10th_death_all = helper.make_since_df(df_national, start_case=10, column='deceased', region_column='country')
since_10th_death_all['country_label'] = 'Italy'
italy = alt.Chart(since_10th_death_all).mark_line(color='steelblue').encode(x='sinceDay0', y='deceased')
italy += italy.mark_text(dy=-10).encode(text='country_label').transform_filter(f"datum['sinceDay0'] == {len(since_10th_death_all)-1}")
line_chart + rule_chart + italy
```
%% Cell type:code id: tags:
``` python
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment