Skip to content
Snippets Groups Projects
Commit 7c83554c authored by Chandrasekhar Ramakrishnan's avatar Chandrasekhar Ramakrishnan
Browse files

style: fixed a comment to be clearer

parent 9435b564
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags:
``` python
import pandas as pd
import numpy as np
import os
from IPython.display import display, HTML, Markdown
import covid_19_dashboard as helper
```
%% Cell type:code id: tags:parameters
``` python
ts_folder = "../data/covid-19_jhu-csse/"
rates_folder = "../data/covid-19_rates/"
geodata_path = "../data/geodata/geo_data.csv"
out_folder = None
PAPERMILL_OUTPUT_PATH = None
```
%% Cell type:code id: tags:
``` python
# Read in the data
```
%% Cell type:code id: tags:
``` python
jhu_frames_map = helper.read_jhu_frames_map(ts_folder)
rates_frames_map = helper.read_rates_frames_map(rates_folder)
geodata_df = helper.read_geodata(geodata_path)
# Identify countries with 100 or more cases
countries_over_thresh = helper.countries_with_number_of_cases(jhu_frames_map, 'confirmed', 100)
# Filter out some very small countries
# Filter out some countries with very high case/population ratio
countries_over_thresh = [c for c in countries_over_thresh if c not in set(['San Marino', 'Iceland'])]
```
%% Cell type:markdown id: tags:
# Questions About COVID-19 and Its Spread
Understanding the spread, distribution, and deadliness of COVID-19 is difficult, despite the data available about it. Differences in rates of testing, quality of data, demographics, etc. make it difficult to compare data between countries.
All this needs to be considered when looking at the plots below. But despite those caveats, I found it helpful to plot the raw data, even though direct comparisons between countries might not be inaccurate.
%% Cell type:code id: tags:
``` python
data_ts = jhu_frames_map['confirmed'].iloc[:,-1].name.strftime("%b %d %Y")
display(HTML(f"<em>Data up to {data_ts}; countries with 100 or more confirmed cases.</em>"))
```
%% Cell type:markdown id: tags:
## How are cases per 100,000 distributed geographically?
%% Cell type:code id: tags:
``` python
import altair as alt
```
%% Cell type:code id: tags:
``` python
map_df = helper.compute_map_df(rates_frames_map, jhu_frames_map, geodata_df, countries_over_thresh)
```
%% Cell type:code id: tags:
``` python
display(helper.map_of_variable(map_df, 'Confirmed/100k', 'Confirmed'))
display(HTML('''
<p style="font-size: smaller">Data Sources:
<a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
<a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
<a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
</p>'''))
```
%% Cell type:code id: tags:
``` python
bars = alt.Chart(map_df).mark_bar().encode(
x='Confirmed/100k:Q',
y=alt.Y("Country/Region:N", sort='-x'),
tooltip=["Country/Region:N",
"Confirmed:Q", "Deaths:Q", "Recovered:Q",
"Confirmed/100k:Q", "Deaths/100k:Q", "Recovered/100k:Q"]
)
text = bars.mark_text(
align='left',
baseline='middle',
dx=3 # Nudges text to right so it doesn't appear on top of the bar
).encode(
text=alt.Text('Confirmed/100k:Q', format=".3")
)
chart = (bars + text).properties(height=900, title=f"Confirmed cases per 100k inhabitants")
display(chart)
display(HTML('''
<p style="font-size: smaller">Data Sources:
<a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
<a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
<a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
</p>'''))
```
%% Cell type:markdown id: tags:
## How have cases been growing?
%% Cell type:code id: tags:
``` python
confirmed_rate_df = helper.growth_df(rates_frames_map, geodata_df, 'confirmed', countries_over_thresh, 2)
latest_confirmed_ser = confirmed_rate_df.set_index(
['Country/Region', 'Geo Region', 'Date']).drop(
['Longitude', 'Latitude'], axis=1).unstack().iloc[:,-1]
sort_order = latest_confirmed_ser.groupby('Geo Region').mean().sort_values(ascending=False).index.tolist()
```
%% Cell type:code id: tags:
``` python
base = alt.Chart(confirmed_rate_df).properties(
width=300, height=200, title="Countries with 2 or more cases per 100k")
line = base.mark_line().encode(
x='Date',
y='Confirmed/100k',
color='Country/Region',
facet=alt.Facet('Geo Region:N', columns=1, sort=alt.SortArray(sort_order), title='Geographic Region'),
tooltip=["Country/Region:N", "Date:T", "Confirmed/100k:Q"]
)
line
display(line)
display(HTML('''
<p style="font-size: smaller">Data Sources:
<a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
<a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
<a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
</p>'''))
```
%% Cell type:code id: tags:
``` python
def country_increase_df(c, df_nominal, growth_in_rate_df):
over_100 = df_nominal[df_nominal['Confirmed'] >= 100]
tdf = (over_100[['Date', 'Confirmed']] - over_100.iloc[0][['Date', 'Confirmed']]).reset_index()
tdfr = growth_in_rate_df[(growth_in_rate_df['Date'] >= over_100.iloc[0]['Date']) &
(growth_in_rate_df['Country/Region'] == c)].reset_index()
tdf['Confirmed/100k'] = tdfr['Confirmed/100k']
tdf['Country/Region'] = c
tdf['Days'] = (tdf['Date'] / np.timedelta64(1, 'D')).astype(int)
return tdf[['Country/Region', 'Days', 'Confirmed', 'Confirmed/100k']]
growth_in_rate_df = helper.growth_df(rates_frames_map, geodata_df, 'confirmed', countries_over_thresh, 0)
frame_map = {'confirmed': jhu_frames_map['confirmed'].groupby(level='Country/Region').sum()}
growth_in_value_df = helper.growth_df(frame_map, geodata_df, 'confirmed', countries_over_thresh, 1000)
growth_in_value_df = growth_in_value_df.rename({'Confirmed/100k':'Confirmed'}, axis=1)
increase_df = pd.concat([country_increase_df(c, df_nominal, growth_in_rate_df) for
c, df_nominal in growth_in_value_df.groupby('Country/Region')])
```
%% Cell type:code id: tags:
``` python
def facetted_growth_plot(df, variable, sort_order, ref_country, title):
base = alt.Chart(df).properties(
width=250, height=150)
line = base.mark_line().encode(
x='Days',
y=variable,
color='Country/Region',
tooltip=["Country/Region:N", "Days:Q", f"{variable}:Q"]
)
label_loc = increase_df[increase_df['Country/Region'] == ref_country]['Days'].iloc[-2]
ref = base.mark_line(opacity=0.3).encode(
x='Days',
y=variable,
color=alt.ColorValue('steelblue'),
).transform_filter(f"datum['Country/Region'] == '{ref_country}'")
ref += ref.mark_text().encode(text='Country/Region:N').transform_filter(f"datum['Days'] == {label_loc}")
charts = []
# make our small multiples
for country in sort_order:
smallm = line.transform_filter(f"datum['Country/Region'] == '{country}'").properties(
title=country)
smallm += ref
charts.append(smallm)
# group the small multiples into 3 horizontal charts
groups = []
c = None
for i, chart in enumerate(charts):
if not i%3:
if c != None:
groups.append(c)
c = alt.hconcat()
c |= chart
# vertically combine the horizontal charts
chart = alt.vconcat(title=title)
for c in groups:
chart &= c
return chart
```
%% Cell type:code id: tags:
``` python
sort_order = growth_in_value_df.groupby(
'Country/Region').max().sort_values(
'Confirmed', ascending=False).index.tolist()
# Exclude China in this plot because its numbers are far greater then everywhere else
sort_order = [o for o in sort_order if o != 'China']
chart = facetted_growth_plot(increase_df[increase_df['Country/Region'] != 'China'],
'Confirmed',
sort_order,
'Italy',
"Growth of cases from case 100, compared to Italy")
display(chart)
display(HTML('''
<p style="font-size: smaller">Data Sources:
<a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
<a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
<a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
</p>
<p style="font-size: smaller">Inspired by <a href="https://covid19dashboards.com/growth-analysis/">Thomas Wiecki</a>'''))
```
%% Cell type:code id: tags:
``` python
sort_order = growth_in_value_df.groupby(
'Country/Region').max().sort_values(
'Confirmed', ascending=False).index.tolist()
chart = facetted_growth_plot(increase_df,
'Confirmed/100k',
sort_order,
'Italy',
"Growth of cases/100k from case 100, compared to Italy")
display(chart)
display(HTML('''
<p style="font-size: smaller">Data Sources:
<a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
<a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
<a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
</p>'''))
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment