Skip to content
Snippets Groups Projects
Commit df69a338 authored by Rok Roškar's avatar Rok Roškar
Browse files

chore: update italy notebook

parent 656ce2c0
No related branches found
No related tags found
1 merge request!135chore: update italy notebook
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# COVID-19 Case data for Italy # COVID-19 Case data for Italy
Data from [Civil Protection of Italy](https://github.com/pcm-dpc/COVID-19). Data from [Civil Protection of Italy](https://github.com/pcm-dpc/COVID-19).
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
%load_ext autoreload %load_ext autoreload
%autoreload 2 %autoreload 2
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import pandas as pd import pandas as pd
import altair as alt import altair as alt
from IPython.display import display, HTML from IPython.display import display, HTML
from covid_19_utils import helper, plotting from covid_19_utils import helper, plotting
from covid_19_utils.italy_utils import get_region_populations, prepare_dataframe from covid_19_utils.italy_utils import get_region_populations, prepare_dataframe
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
html_credits=HTML(''' html_credits=HTML('''
<p style="font-size: smaller">Data Sources: <p style="font-size: smaller">Data Sources:
<a href="https://github.com/pcm-dpc/COVID-19">Italian Civil Protection</a>, <a href="https://wikidata.org">Wikidata</a> <a href="https://github.com/pcm-dpc/COVID-19">Italian Civil Protection</a>, <a href="https://wikidata.org">Wikidata</a>
<br> <br>
Analysis and Visualization: Analysis and Visualization:
<a href="https://renkulab.io/projects/covid-19/covid-19-public-data">Covid-19 Public Data Collaboration Project</a> <a href="https://renkulab.io/projects/covid-19/covid-19-public-data">Covid-19 Public Data Collaboration Project</a>
</p>''') </p>''')
``` ```
%% Cell type:code id: tags:parameters %% Cell type:code id: tags:parameters
``` python ``` python
data_folder = "../../data/covid-19-italy/" data_folder = "../data/covid-19-italy/"
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
df_national = prepare_dataframe( df_national = prepare_dataframe(
data_folder, data_folder,
"dpc-covid19-ita-andamento-nazionale.csv", "dpc-covid19-ita-andamento-nazionale.csv",
"dati-andamento-nazionale-description.json", "dati-andamento-nazionale-description.json",
use_time_index=True use_time_index=True
) )
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# compute daily new cases # compute daily new cases
df_national.reset_index(level=0, inplace=True) df_national.reset_index(level=0, inplace=True)
df_national["New cases per day"] = df_national["total_cases"].diff() df_national["New cases per day"] = df_national["total_cases"].diff()
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## National summary of total cases and tests ## National summary of total cases and tests
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# make the charts # make the charts
base = alt.Chart(df_national).mark_bar(size=10).encode(alt.X('date')) base = alt.Chart(df_national).mark_bar(size=10).encode(alt.X('date'))
daily = base.encode(alt.Y('New cases per day')) daily = base.encode(alt.Y('New cases per day'))
total = base.mark_point(color='orange', filled=True, size=100, opacity=1).encode(alt.Y('total_cases', title="Cases (orange)")) total = base.mark_point(color='orange', filled=True, size=100, opacity=1).encode(alt.Y('total_cases', title="Cases (orange)"))
tests = base.encode(alt.Y('tests', title="Tests (blue)")) tests = base.encode(alt.Y('tests', title="Tests (blue)"))
totals = alt.layer(tests, total) totals = alt.layer(tests, total)
chart = alt.hconcat(totals, daily, chart = alt.hconcat(totals, daily,
title='Total and daily tests and cases nationwide' title='Total and daily tests and cases nationwide'
).configure_title( ).configure_title(
anchor='middle' anchor='middle'
) )
display(chart) display(chart)
display(html_credits) display(html_credits)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# set up the regional data frame # set up the regional data frame
df_regions = prepare_dataframe( df_regions = prepare_dataframe(
data_folder, data_folder,
"dpc-covid19-ita-regioni.csv", "dpc-covid19-ita-regioni.csv",
"dati-regioni-description.json", "dati-regioni-description.json",
use_time_index=True use_time_index=True
) )
df_regions.reset_index(level=0, inplace=True) df_regions.reset_index(level=0, inplace=True)
# The regional data includes Bolzano and Trento as separate regions - they are two provinces of the Trentino Alto Adige region, so merge them together # The regional data includes Bolzano and Trento as separate regions - they are two provinces of the Trentino Alto Adige region, so merge them together
df_trentino_alto_adige = df_regions.loc[df_regions['region_code'] == 4].groupby('date').sum().reset_index() df_trentino_alto_adige = df_regions.loc[df_regions['region_code'] == 4].groupby('date').sum().reset_index()
df_trentino_alto_adige['region_code'] = 4 df_trentino_alto_adige['region_code'] = 4
df_trentino_alto_adige['latitude'] = 46.4337 df_trentino_alto_adige['latitude'] = 46.4337
df_trentino_alto_adige['longitude'] = 11.1693 df_trentino_alto_adige['longitude'] = 11.1693
df_trentino_alto_adige['region'] = 'Trentino Alto Adige' df_trentino_alto_adige['region'] = 'Trentino Alto Adige'
df_trentino_alto_adige['country_code'] = 'ITA' df_trentino_alto_adige['country_code'] = 'ITA'
df_regions = df_regions[df_regions['region_code'] !=4 ].append(df_trentino_alto_adige) df_regions = df_regions[df_regions['region_code'] !=4 ].append(df_trentino_alto_adige)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Retrieve population data and include it in the dataframe # Retrieve population data and include it in the dataframe
populations = get_region_populations() populations = get_region_populations()
for field in ['total_cases', 'tests', 'dead']: for field in ['total_cases', 'tests', 'dead']:
df_regions[f'{field}_100k'] = df_regions[[field,'region_code']].apply(lambda row: row[field]/populations[row['region_code']]*100000, axis=1) df_regions[f'{field}_100k'] = df_regions[[field,'region_code']].apply(lambda row: row[field]/populations[row['region_code']]*100000, axis=1)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
max_cases = df_regions.set_index('region').max(level='region')['total_cases'].reset_index() max_cases = df_regions.set_index('region').max(level='region')['total_cases'].reset_index()
sorted_regions = max_cases.sort_values(by='total_cases', ascending=False)['region'] sorted_regions = max_cases.sort_values(by='total_cases', ascending=False)['region']
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Cases per region ## Cases per region
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Total cases and deaths per 100k population # Total cases and deaths per 100k population
base = alt.Chart(df_regions).encode(alt.X('date')) base = alt.Chart(df_regions).encode(alt.X('date'))
death = base.mark_line(color='black').encode(alt.Y('dead', title='Deaths')) death = base.mark_line(color='black').encode(alt.Y('dead', title='Deaths'))
total = base.mark_bar(color='orange', size=5).encode(alt.Y('total_cases', title="Total cases")).properties( total = base.mark_bar(color='orange', size=5).encode(alt.Y('total_cases', title="Total cases")).properties(
width=150, width=150,
height=150) height=150)
totals = alt.layer(total, death) totals = alt.layer(total, death)
chart = totals.facet(facet='region', chart = totals.facet(
columns=5, facet='region',
align='each', columns=5,
title='Total cases (orange) and deaths (black) by region' align='each',
title='Total cases (orange) and deaths (black) by region',
).configure_title( ).configure_title(
anchor='middle' anchor='middle'
) )
display(chart) display(chart)
display(html_credits) display(html_credits)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# Total cases and deaths per 100k population # Total cases and deaths per 100k population
base = alt.Chart(df_regions).encode(alt.X('date')) base = alt.Chart(df_regions).encode(alt.X('date'))
death = base.mark_line(color='black').encode(alt.Y('dead_100k', title='Deaths')) death = base.mark_line(color='black').encode(alt.Y('dead_100k', title='Deaths'))
total = base.mark_bar(color='orange', size=5).encode(alt.Y('total_cases_100k', title="Total cases")).properties( total = base.mark_bar(color='orange', size=5).encode(alt.Y('total_cases_100k', title="Total cases")).properties(
width=150, width=150,
height=150) height=150)
totals = alt.layer(total, death) totals = alt.layer(total, death)
chart = totals.facet(facet='region', chart = totals.facet(facet='region',
columns=5, columns=5,
align='each', align='each',
title='Total cases (orange) and deaths (black) by region per 100k population' title='Total cases (orange) and deaths (black) by region per 100k population'
).configure_title( ).configure_title(
anchor='middle' anchor='middle'
) )
display(chart) display(chart)
display(html_credits) display(html_credits)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# compute daily differences # compute daily differences
tdf = df_regions.sort_values(['region', 'date'], ascending=[True, False]).set_index(['region', 'date']) tdf = df_regions.sort_values(['region', 'date'], ascending=[True, False]).set_index(['region', 'date'])
diffs_df = tdf[['total_cases', 'dead']].groupby(level='region').diff(periods=-1).dropna(how='all') diffs_df = tdf[['total_cases', 'dead']].groupby(level='region').diff(periods=-1).dropna(how='all')
tdf_diff=tdf.join(diffs_df, rsuffix='_diff').reset_index() tdf_diff=tdf.join(diffs_df, rsuffix='_diff').reset_index()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# make the daily charts # make the daily charts
base = alt.Chart(tdf_diff).encode(alt.X('date')).properties( base = alt.Chart(tdf_diff).encode(alt.X('date')).properties(
width=150, width=150,
height=150, height=150,
) )
daily_new = base.mark_bar(color='orange').encode(alt.Y('total_cases_diff', title='N')) daily_new = base.mark_bar(color='orange').encode(alt.Y('total_cases_diff', title='N'))
daily_dead = base.mark_line(color='black').encode(alt.Y('dead_diff')) daily_dead = base.mark_line(color='black').encode(alt.Y('dead_diff'))
chart = (daily_new + daily_dead).facet( chart = (daily_new + daily_dead).facet(
facet='region', facet='region',
columns=5, columns=5,
align='each', align='each',
title='Daily new cases (orange) and deaths (black) by region', title='Daily new cases (orange) and deaths (black) by region',
).configure_title( ).configure_title(
anchor='middle' anchor='middle'
) )
display(chart) display(chart)
display(html_credits) display(html_credits)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Rates of growth ## Rates of growth
How quickly is the infection spreading? The charts below show data for all of the regions of Italy framed by some illustrative guiding lines to give an idea of how the rate of infection is evolving. How quickly is the infection spreading? The charts below show data for all of the regions of Italy framed by some illustrative guiding lines to give an idea of how the rate of infection is evolving.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
since_100th_case = helper.make_since_df(df_regions, column='total_cases', region_column='region') since_100th_case = helper.make_since_df(df_regions, column='total_cases', region_column='region')
base = alt.Chart(since_100th_case, title="Italy: total cases since 100th case").mark_line(interpolate='basis') base = alt.Chart(since_100th_case, title="Italy: total cases since 100th case").mark_line(interpolate='basis')
line_chart = plotting.make_region_since_chart( line_chart = plotting.make_region_since_chart(
base, base,
'total_cases', 'total_cases',
'sinceDay0', 'sinceDay0',
'region', 'region',
'Days since 100th case', 'Days since 100th case',
'Cumulative cases', 'Cumulative cases',
'Cases', 'Cases',
'Region' 'Region'
).properties( ).properties(
width=450, width=450,
height=450 height=450
) )
rule_chart = plotting.make_rule_chart(max_case=110000, max_days=40, pos_day=(12,50000), pos_3days=(35,300000), pos_week=(40,2000)) rule_chart = plotting.make_rule_chart(
max_case=110000,
max_days=int(since_100th_case['sinceDay0'].max()),
pos_day=(12,50000),
pos_3days=(35,300000),
pos_week=(40,2000)
)
# add the total cases for italy to the chart # add the total cases for italy to the chart
since_100th_case_all = helper.make_since_df(df_national, start_case=100, column='total_cases', region_column='country_code') since_100th_case_all = helper.make_since_df(df_national, start_case=100, column='total_cases', region_column='country_code')
since_100th_case_all['country_label'] = 'Italy' since_100th_case_all['country_label'] = 'Italy'
italy = alt.Chart(since_100th_case_all).mark_line(color='steelblue').encode(x='sinceDay0', y='total_cases') italy = alt.Chart(since_100th_case_all).mark_line(color='steelblue').encode(x='sinceDay0', y='total_cases')
italy += italy.mark_text(dy=-10).encode(text='country_label').transform_filter(f"datum['sinceDay0'] == {len(since_100th_case_all)-1}") italy += italy.mark_text(dy=-10).encode(text='country_label').transform_filter(f"datum['sinceDay0'] == {len(since_100th_case_all)-1}")
line_chart + rule_chart + italy line_chart + rule_chart + italy
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
since_10th_death = helper.make_since_df(df_regions, start_case=10, column='dead', region_column='region') since_10th_death = helper.make_since_df(df_regions, start_case=10, column='dead', region_column='region')
base = alt.Chart(since_10th_death, title="Italy: total deaths since 10th death") base = alt.Chart(since_10th_death, title="Italy: total deaths since 10th death")
line_chart = plotting.make_region_since_chart( line_chart = plotting.make_region_since_chart(
base, base,
'dead', 'dead',
'sinceDay0', 'sinceDay0',
'region', 'region',
'Days since 10th death', 'Days since 10th death',
'Cumulative deaths', 'Cumulative deaths',
'Deaths', 'Deaths',
'Region' 'Region'
).properties( ).properties(
width=450, width=450,
height=450 height=450
) )
rule_chart = plotting.make_rule_chart( rule_chart = plotting.make_rule_chart(
start_case=10, start_case=10,
max_case=50000, max_case=50000,
max_days=40, max_days=int(since_10th_death['sinceDay0'].max()),
pos_day=(12,50000), pos_day=(12,50000),
pos_3days=(35,20000), pos_3days=(35,20000),
pos_week=(40,100)) pos_week=(40,100))
# add the total deaths for italy to the chart # add the total deaths for italy to the chart
since_10th_death_all = helper.make_since_df(df_national, start_case=10, column='dead', region_column='country_code') since_10th_death_all = helper.make_since_df(df_national, start_case=10, column='dead', region_column='country_code')
since_10th_death_all['country_label'] = 'Italy' since_10th_death_all['country_label'] = 'Italy'
italy = alt.Chart(since_10th_death_all).mark_line(color='steelblue').encode(x='sinceDay0', y='dead') italy = alt.Chart(since_10th_death_all).mark_line(color='steelblue').encode(x='sinceDay0', y='dead')
italy += italy.mark_text(dy=-10).encode(text='country_label').transform_filter(f"datum['sinceDay0'] == {len(since_10th_death_all)-1}") italy += italy.mark_text(dy=-10).encode(text='country_label').transform_filter(f"datum['sinceDay0'] == {len(since_10th_death_all)-1}")
line_chart + rule_chart + italy line_chart + rule_chart + italy
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment