Skip to content
Snippets Groups Projects
Commit c98aca3f authored by Rok Roškar's avatar Rok Roškar
Browse files

chore: update ecdc notebook

parent 873e396b
No related branches found
No related tags found
1 merge request!378000-update-notebooks
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
```
%% Cell type:code id: tags:
``` python
from datetime import datetime
import altair as alt
import pandas as pd
from IPython.display import display, HTML
from covid_19_utils import helper, plotting
from covid_19_utils.converters import CaseConverter
```
%% Cell type:markdown id: tags:
# Covid-19 Global Data from ECDC
This dataset is collected by the European Center for Disease Prevention and Control and can be found [here](https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide).
%% Cell type:code id: tags:
``` python
html_credits=HTML('''
<p style="font-size: smaller">Data Sources:
<a href="https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide">ECDC</a>
<br>
Analysis and Visualization:
<a href="https://renkulab.io/projects/covid-19/covid-19-public-data">Covid-19 Public Data Collaboration Project @ renkulab.io</a>
</p>''')
```
%% Cell type:code id: tags:parameters
``` python
data_path = '../data/covid-19-ecdc'
atlas_path = '../data/atlas'
```
%% Cell type:code id: tags:
``` python
converter = CaseConverter(atlas_path)
df = converter.read_convert(data_path)
```
%% Cell type:code id: tags:
``` python
nthresh=500000
country_max_ser = df.set_index(['country_label', 'date'])['positive'].groupby(level='country_label').max()
df.columns
```
%% Cell type:code id: tags:
``` python
nthresh=3000
country_max_ser = df.set_index(['country_label', 'date'])['positive_100k'].groupby(level='country_label').max()
countries_over_thresh = country_max_ser[country_max_ser>nthresh].index
countries_over_thresh = [c for c in countries_over_thresh if c not in set(['Andorra', 'Iceland', 'San Marino'])]
start_date = datetime.fromisoformat('2020-02-01')
start_date = datetime.fromisoformat('2020-08-01')
thresh_df = df.loc[(df.date > start_date) & (df.country_label.isin(countries_over_thresh))]
```
%% Cell type:code id: tags:
``` python
df.columns
```
%% Cell type:markdown id: tags:
## Daily deaths globally
%% Cell type:markdown id: tags:
The chart below shows the total number of covid-19 related deaths reported worldwide since February 1st, 2020.
%% Cell type:code id: tags:
``` python
chart = alt.Chart(
df.loc[df.date > start_date].groupby('date')['deceased_daily'].sum().reset_index()
).mark_line().encode(
x=alt.X('date', title='Date'),
y=alt.Y('deceased_daily', title='Daily deaths')
)
display(chart)
display(html_credits)
```
%% Cell type:markdown id: tags:
Deaths in countries with over 300,000 cases, ordered by total number of deaths.
%% Cell type:code id: tags:
``` python
order = thresh_df.groupby(['country_label'])['deceased'].max().sort_values(ascending=False).index.tolist()
base = alt.Chart(thresh_df)
chart = base.mark_line().encode(
x=alt.X('date', title='Date'),
y=alt.Y('deceased_daily', title='Daily deaths'),
facet=alt.Facet('country_label', sort=alt.SortArray(order), columns=5, title='')
).properties(
height=150,
width=150
)
chart.resolve_scale(y='independent')
display(chart)
display(html_credits)
```
%% Cell type:code id: tags:
``` python
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment