Skip to content
Snippets Groups Projects

bugfix

Merged Chandrasekhar Ramakrishnan requested to merge bugfix into master
3 files
+ 9
5
Compare changes
  • Side-by-side
  • Inline
Files
3
+ 2
2
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
```
%% Cell type:code id: tags:
``` python
import altair as alt
import numpy as np
import pandas as pd
from IPython.display import display, HTML, Markdown
from covid_19_utils import helper
```
%% Cell type:code id: tags:parameters
``` python
ts_folder = "../data/covid-19_jhu-csse/"
rates_folder = "../data/covid-19_rates/"
geodata_path = "../data/geodata/geo_data.csv"
atlas_path = "../data/atlas"
out_folder = None
PAPERMILL_OUTPUT_PATH = None
```
%% Cell type:code id: tags:
``` python
# Read in and transform the case data
from covid_19_utils.converters import CaseConverter
converter = CaseConverter(atlas_path)
jhu_df = converter.read_convert(ts_folder)
# Read in geographical data
geodata_df = helper.read_geodata(geodata_path)
# Join in the geo data
jhu_df = jhu_df.merge(
geodata_df.rename(
columns={"name": "country_label"}
)[['Latitude','Longitude','country_label', 'region_un']]
).rename(columns={'region_un': 'Geo Region'})
```
%% Cell type:code id: tags:
``` python
# case threshold
nthresh = 500
# Identify countries with {nthresh} or more cases
country_df = jhu_df.groupby('country')
countries_over_thresh = country_df.max()[country_df.max()['positive']>nthresh]['region_label']
country_max_ser = jhu_df.set_index(['region_label', 'date'])['positive'].groupby(level='region_label').max()
countries_over_thresh = country_max_ser[country_max_ser>nthresh].index
# Filter out some countries with very high case/population ratio
countries_over_thresh = [c for c in countries_over_thresh if c not in set(['Andorra', 'Iceland', 'San Marino'])]
```
%% Cell type:markdown id: tags:
# Questions About COVID-19 and Its Spread
Understanding the spread, distribution, and deadliness of COVID-19 is difficult, despite the data available about it. Differences in rates of testing, quality of data, demographics, etc. make it difficult to compare data between countries.
All this needs to be considered when looking at the plots below. But despite those caveats, I found it helpful to plot the raw data, even though direct comparisons between countries might not be inaccurate.
%% Cell type:code id: tags:
``` python
data_ts = jhu_df.date.iloc[-1].strftime("%b %d %Y")
display(HTML(f"<em>Data up to {data_ts}; countries with {nthresh} or more confirmed cases.</em>"))
```
%% Cell type:markdown id: tags:
## How are cases per 100,000 distributed geographically?
%% Cell type:code id: tags:
``` python
latest_df = jhu_df[jhu_df.date == data_ts]
```
%% Cell type:code id: tags:
``` python
map_df = latest_df[latest_df.country_label.isin(countries_over_thresh)]
```
%% Cell type:code id: tags:
``` python
display(helper.map_of_variable(map_df, 'positive_100k', 'Positive'))
display(HTML('''
<p style="font-size: smaller">Data Sources:
<a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
<a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
<a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
</p>'''))
```
%% Cell type:code id: tags:
``` python
bars = alt.Chart(map_df).mark_bar().encode(
x=alt.X('positive_100k:Q', title='Positive cases/100k'),
y=alt.Y("country_label:N", title='Country/Region', sort='-x'),
tooltip=["country_label:N",
"positive:Q", "deceased:Q",
"positive_100k:Q", "deceased_100k:Q"]
)
text = bars.mark_text(
align='left',
baseline='middle',
dx=3 # Nudges text to right so it doesn't appear on top of the bar
).encode(
text=alt.Text('positive_100k:Q', format=".3")
)
chart = (bars + text).properties(height=900, title=f"Confirmed cases per 100k inhabitants")
display(chart)
display(HTML('''
<p style="font-size: smaller">Data Sources:
<a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
<a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
<a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
</p>'''))
```
%% Cell type:markdown id: tags:
## How have cases been growing?
%% Cell type:code id: tags:
``` python
# select countries over a certain per capita case threshold
per_capita_thresh = 50
countries_over_thresh_per_capita = latest_df[latest_df.positive_100k > per_capita_thresh].country_label
countries_over_thresh_per_capita = [c for c in countries_over_thresh_per_capita if c not in set(['Andorra', 'Iceland', 'San Marino'])]
# build the charts
sort_order = latest_df.groupby('Geo Region').mean().sort_values(ascending=False, by='positive').index.tolist()
selection = alt.selection_multi(fields=['country_label'], bind='legend')
opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
base = alt.Chart(
jhu_df[jhu_df.country_label.isin(
countries_over_thresh_per_capita)]
).encode(
alt.X('date', title='Date')
).properties(
width=300,
height=200
)
cases = base.mark_line().encode(
alt.Y('positive_100k', scale=alt.Scale(type='symlog'), title='Cases per 100k population'),
color=alt.Color('country_label', title='Country'),
facet=alt.Facet('Geo Region:N', columns=1, sort=alt.SortArray(sort_order), title=''),
tooltip=["country_label:N", "date:T", "positive_100k:Q"],
opacity=opacity
).add_selection(selection)
deaths = base.mark_line().encode(
alt.Y('deceased_100k', scale=alt.Scale(type='symlog'), title='Deaths per 100k population'),
color=alt.Color('country_label', title='Country'),
facet=alt.Facet('Geo Region:N', columns=1, sort=alt.SortArray(sort_order), title=''),
tooltip=["country_label:N", "date:T", "deceased_100k:Q"],
opacity=opacity
).add_selection(selection)
chart = alt.hconcat(
cases, deaths, title=f"Countries with {per_capita_thresh} or more cases per 100k"
).configure_title(
anchor='middle'
)
display(chart)
display(HTML('''
<p style="font-size: smaller">Data Sources:
<a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
<a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
<a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
</p>'''))
```
%% Cell type:code id: tags:
``` python
since_df = helper.make_since_df(
jhu_df[jhu_df.country_label.isin(countries_over_thresh_per_capita)],
region_column='country_label'
)
```
%% Cell type:code id: tags:
``` python
sort_order = since_df.groupby(
'country_label').max().sort_values(
'positive', ascending=False).index.tolist()
# Exclude China in this plot because its numbers are far greater then everywhere else
sort_order = [o for o in sort_order if o != 'China']
chart = helper.facetted_growth_plot(
since_df[since_df['country_label'] != 'China'],
'sinceDay0',
'positive_100k',
sort_order,
'Italy',
"Growth of cases per 100k population from case 100, compared to Italy",
"Cases/100k"
)
display(chart)
display(HTML('''
<p style="font-size: smaller">Data Sources:
<a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
<a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
<a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
</p>
<p style="font-size: smaller">Inspired by <a href="https://covid19dashboards.com/growth-analysis/">Thomas Wiecki</a>'''))
```
%% Cell type:code id: tags:
``` python
# Same with log scale
chart = helper.facetted_growth_plot(
since_df,
'sinceDay0',
'positive_100k',
sort_order,
'Italy',
"Growth of cases per 100k population from case 100, compared to Italy (log scale)",
"Cases/100k",
'log'
)
display(chart)
display(HTML('''
<p style="font-size: smaller">Data Sources:
<a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
<a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
<a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
</p>'''))
```
Loading