Skip to content
Snippets Groups Projects
Unverified Commit 56f4efd2 authored by Rok Roškar's avatar Rok Roškar
Browse files

chore: update dashboard notebook

parent 36b538d4
No related branches found
No related tags found
4 merge requests!402chore: migrate to renku-python 1.0,!401Automatic update - auto-update_2021-12-14_10-38,!400Automatic update - auto-update_2021-12-14_09-39,!399Automatic update - auto-update_2021-12-14_00-25
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
```
%% Cell type:code id: tags:
``` python
import altair as alt
import numpy as np
import pandas as pd
from IPython.display import display, HTML, Markdown
from covid_19_utils import helper
```
%% Cell type:code id: tags:parameters
``` python
ts_folder = "../data/covid-19_jhu-csse/"
rates_folder = "../data/covid-19_rates/"
geodata_path = "../data/geodata/geo_data.csv"
atlas_path = "../data/atlas"
out_folder = None
PAPERMILL_OUTPUT_PATH = None
```
%% Cell type:code id: tags:
``` python
# Read in and transform the case data
from covid_19_utils.converters import CaseConverter
converter = CaseConverter(atlas_path)
jhu_df = converter.read_convert(ts_folder)
# Read in geographical data
geodata_df = helper.read_geodata(geodata_path)
# Join in the geo data
jhu_df = jhu_df.merge(
geodata_df.rename(
columns={"name": "country_label"}
)[['Latitude','Longitude','country_label', 'region_un']]
).rename(columns={'region_un': 'Geo Region'})
```
%% Cell type:code id: tags:
``` python
# case threshold
nthresh = 500
# Identify countries with {nthresh} or more cases
country_max_ser = jhu_df.set_index(['region_label', 'date'])['positive'].groupby(level='region_label').max()
countries_over_thresh = country_max_ser[country_max_ser>nthresh].index
# Filter out some countries with very high case/population ratio
countries_over_thresh = [c for c in countries_over_thresh if c not in set(['Andorra', 'Iceland', 'San Marino'])]
```
%% Cell type:markdown id: tags:
# Questions About COVID-19 and Its Spread
Understanding the spread, distribution, and deadliness of COVID-19 is difficult, despite the data available about it. Differences in rates of testing, quality of data, demographics, etc. make it difficult to compare data between countries.
All this needs to be considered when looking at the plots below. But despite those caveats, I found it helpful to plot the raw data, even though direct comparisons between countries might not be inaccurate.
%% Cell type:code id: tags:
``` python
data_ts = jhu_df.date.iloc[-1].strftime("%b %d %Y")
display(HTML(f"<em>Data up to {data_ts}; countries with {nthresh} or more confirmed cases.</em>"))
```
%% Cell type:markdown id: tags:
## How are cases per 100,000 distributed geographically?
%% Cell type:code id: tags:
``` python
latest_df = jhu_df[jhu_df.date == data_ts]
```
%% Cell type:code id: tags:
``` python
map_df = latest_df[latest_df.country_label.isin(countries_over_thresh)]
```
%% Cell type:code id: tags:
``` python
display(helper.map_of_variable(map_df, 'positive_100k', 'Positive'))
display(HTML('''
<p style="font-size: smaller">Data Sources:
<a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
<a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
<a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
</p>'''))
```
%% Cell type:code id: tags:
``` python
bars = alt.Chart(map_df).mark_bar().encode(
x=alt.X('positive_100k:Q', title='Positive cases/100k'),
y=alt.Y("country_label:N", title='Country/Region', sort='-x'),
tooltip=["country_label:N",
"positive:Q", "deceased:Q",
"positive_100k:Q", "deceased_100k:Q"]
)
text = bars.mark_text(
align='left',
baseline='middle',
dx=3 # Nudges text to right so it doesn't appear on top of the bar
).encode(
text=alt.Text('positive_100k:Q', format=".3")
)
chart = (bars + text).properties(height=1400, title=f"Confirmed cases per 100k inhabitants")
display(chart)
display(HTML('''
<p style="font-size: smaller">Data Sources:
<a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
<a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
<a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
</p>'''))
```
%% Cell type:markdown id: tags:
## How have cases been growing?
%% Cell type:code id: tags:
``` python
# select countries over a certain per capita case threshold
per_capita_thresh = 12000
per_capita_thresh = 13000
countries_over_thresh_per_capita = latest_df[latest_df.positive_100k > per_capita_thresh].country_label
countries_over_thresh_per_capita = [c for c in countries_over_thresh_per_capita if c not in set(['Andorra', 'Iceland', 'San Marino'])]
filtered_df = jhu_df[jhu_df["date"] > "2021-06-01"]
# build the charts
sort_order = latest_df.groupby('Geo Region').mean().sort_values(ascending=False, by='positive').index.tolist()
selection = alt.selection_multi(fields=['country_label'], bind='legend')
opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
base = alt.Chart(
filtered_df[filtered_df.country_label.isin(
countries_over_thresh_per_capita)]
).encode(
alt.X('date', title='Date')
).properties(
width=300,
height=200
)
cases = base.mark_line().encode(
alt.Y('positive_100k', scale=alt.Scale(type='linear'), title='Cases per 100k population'),
color=alt.Color('country_label', title='Country'),
facet=alt.Facet('Geo Region:N', columns=1, sort=alt.SortArray(sort_order), title=''),
tooltip=["country_label:N", "date:T", "positive_100k:Q"],
opacity=opacity
).add_selection(selection)
deaths = base.mark_line().encode(
alt.Y('deceased_100k', scale=alt.Scale(type='linear'), title='Deaths per 100k population'),
color=alt.Color('country_label', title='Country'),
facet=alt.Facet('Geo Region:N', columns=1, sort=alt.SortArray(sort_order), title=''),
tooltip=["country_label:N", "date:T", "deceased_100k:Q"],
opacity=opacity
).add_selection(selection)
chart = alt.hconcat(
cases, deaths, title=f"Countries with {per_capita_thresh} or more cases per 100k"
).configure_title(
anchor='middle'
)
display(chart)
display(HTML('''
<p style="font-size: smaller">Data Sources:
<a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
<a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
<a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
</p>'''))
```
%% Cell type:code id: tags:
``` python
# since_df = helper.make_since_df(
# jhu_df[jhu_df.country_label.isin(countries_over_thresh_per_capita)],
# region_column='country_label'
# )
```
%% Cell type:code id: tags:
``` python
# sort_order = since_df.groupby(
# 'country_label').max().sort_values(
# 'positive', ascending=False).index.tolist()
# # Exclude China in this plot because its numbers are far greater then everywhere else
# sort_order = [o for o in sort_order if o != 'China']
# chart = helper.facetted_growth_plot(
# since_df[since_df['country_label'] != 'China'],
# 'sinceDay0',
# 'positive_100k',
# sort_order,
# 'Italy',
# "Growth of cases per 100k population from case 100, compared to Italy",
# "Cases/100k"
# )
# display(chart)
# display(HTML('''
# <p style="font-size: smaller">Data Sources:
# <a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
# <a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
# <a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
# </p>
# <p style="font-size: smaller">Inspired by <a href="https://covid19dashboards.com/growth-analysis/">Thomas Wiecki</a>'''))
```
%% Cell type:code id: tags:
``` python
# Same with log scale
# chart = helper.facetted_growth_plot(
# since_df,
# 'sinceDay0',
# 'positive_100k',
# sort_order,
# 'Italy',
# "Growth of cases per 100k population from case 100, compared to Italy (log scale)",
# "Cases/100k",
# 'log'
# )
# display(chart)
# display(HTML('''
# <p style="font-size: smaller">Data Sources:
# <a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
# <a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
# <a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
# </p>'''))
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment