In [1]:
import pandas as pd
import os
from IPython.display import display, HTML, Markdown
import covid_19_dashboard as helper

In [2]:
ts_folder = "../data/covid-19_jhu-csse/"
rates_folder = "../data/covid-19_rates/"
geodata_path = "../data/geodata/geo_data.csv"
out_folder = None
PAPERMILL_OUTPUT_PATH = None

In [3]:
# Parameters
PAPERMILL_INPUT_PATH = "/tmp/fer34kve/notebooks/Dashboard.ipynb"
PAPERMILL_OUTPUT_PATH = "runs/Dashboard.run.ipynb"
ts_folder = "/tmp/fer34kve/data/covid-19_jhu-csse"
rates_folder = "/tmp/fer34kve/data/covid-19_rates"
geodata_path = "/tmp/fer34kve/data/geodata/geo_data.csv"


In [4]:
# Read in the data

In [5]:
jhu_frames_map = helper.read_jhu_frames_map(ts_folder)
rates_frames_map = helper.read_rates_frames_map(rates_folder)
geodata_df = helper.read_geodata(geodata_path)

# Identify countries with 100 or more cases
countries_over_thresh = helper.countries_with_number_of_cases(jhu_frames_map, 'confirmed', 100)

# Questions About COVID-19 and Its Spread

Understanding the spread, distribution, and deadliness of COVID-19 is difficult, despite the data available about it. Differences in rates of testing, quality of data, demographics, etc. make it difficult to compare data between countries. 

All this needs to be considered when looking at the plots below. But despite those caveats, I found it helpful to plot the raw data, even though direct comparisons between countries might not be inaccurate.

In [6]:
data_ts = jhu_frames_map['confirmed'].iloc[:,-1].name.strftime("%b %d %Y")
display(HTML(f"<em>Data up to {data_ts}; countries with 100 or more confirmed cases.</em>"))

## How are cases per 100,000 distributed geographically?

In [7]:
import altair as alt

In [8]:
map_df = helper.compute_map_df(rates_frames_map, jhu_frames_map, geodata_df, countries_over_thresh)

In [9]:
display(helper.map_of_variable(map_df, 'Confirmed/100k', 'Confirmed'))
display(HTML('''
<p style="font-size: smaller">Data Sources: 
  <a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
  <a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
  <a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
</p>'''))

In [10]:
bars = alt.Chart(map_df).mark_bar().encode(
    x='Confirmed/100k:Q',
    y=alt.Y("Country/Region:N", sort='-x'),
    tooltip=["Country/Region:N", 
         "Confirmed:Q", "Deaths:Q", "Recovered:Q",
         "Confirmed/100k:Q", "Deaths/100k:Q", "Recovered/100k:Q"]
)

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3  # Nudges text to right so it doesn't appear on top of the bar
).encode(
    text=alt.Text('Confirmed/100k:Q', format=".3")
)

chart = (bars + text).properties(height=900, title=f"Confirmed cases per 100k inhabitants")
display(chart)
display(HTML('''
<p style="font-size: smaller">Data Sources: 
  <a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a>,
  <a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>,
  <a href="https://worldmap.harvard.edu/data/geonode:country_centroids_az8">Harvard Worldmap</a>
</p>'''))

## How have cases been growing?

In [11]:
confirmed_rate_df = helper.growth_df(rates_frames_map, geodata_df, 'confirmed', countries_over_thresh)
latest_confirmed_ser = confirmed_rate_df.set_index(
    ['Country/Region', 'Geo Region', 'Date']).drop(
    ['Longitude', 'Latitude'], axis=1).unstack().iloc[:,-1]
sort_order = latest_confirmed_ser.groupby('Geo Region').mean().sort_values(ascending=False).index.tolist()

In [12]:
base = alt.Chart(confirmed_rate_df).properties(width=300, height=200, title="Countries with 2 or more cases per 100k")
line = base.mark_line().encode(
    x='Date',
    y='Confirmed/100k',
    color='Country/Region',
    facet=alt.Facet('Geo Region:N', columns=1, sort=alt.SortArray(sort_order), title='Geographic Region'),
    tooltip=["Country/Region:N", "Date:T", "Confirmed/100k:Q"]
)
line