Skip to content
Snippets Groups Projects
Commit f78b249f authored by Chandrasekhar Ramakrishnan's avatar Chandrasekhar Ramakrishnan
Browse files

feat: added Dashboard notebook

parent b8c0b9b5
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags:
``` python
import pandas as pd
import os
from IPython.display import display, HTML, Markdown
```
%% Cell type:code id: tags:parameters
``` python
ts_folder = "../data/covid-19_jhu-csse/"
rates_folder = "../data/covid-19_rates/"
out_folder = None
PAPERMILL_OUTPUT_PATH = None
```
%% Cell type:code id: tags:
``` python
# Read in the data
```
%% Cell type:code id: tags:
``` python
def read_jhu_covid_df(name):
filename = os.path.join(ts_folder, f"time_series_19-covid-{name}.csv")
df = pd.read_csv(filename)
df = df.set_index(['Province/State', 'Country/Region', 'Lat', 'Long'])
df.columns = pd.to_datetime(df.columns)
return df
jhu_frames_map = {
"confirmed": read_jhu_covid_df("Confirmed"),
"deaths": read_jhu_covid_df("Deaths"),
"recovered": read_jhu_covid_df("Recovered")
}
```
%% Cell type:code id: tags:
``` python
def read_rates_covid_df(name):
filename = os.path.join(rates_folder, f"ts_rates_19-covid-{name}.csv")
df = pd.read_csv(filename).drop("Unnamed: 0", axis=1)
df = df.set_index(['Country/Region'])
df.columns = pd.to_datetime(df.columns)
return df
rates_frames_map = {
"confirmed": read_rates_covid_df("confirmed"),
"deaths": read_rates_covid_df("deaths"),
"recovered": read_rates_covid_df("recovered")
}
```
%% Cell type:code id: tags:
``` python
# Compile data needed for the visualizations
```
%% Cell type:code id: tags:
``` python
# Compute geospatial coordinates
country_coords_df = jhu_frames_map['confirmed'].reset_index([2,3])[['Lat', 'Long']]
country_coords_df = country_coords_df.groupby(level='Country/Region').mean()
```
%% Cell type:code id: tags:
``` python
# Identify countries with 100 or more cases
case_count_ser = jhu_frames_map['confirmed'].iloc[:,-1].groupby(level='Country/Region').sum()
countries_over_thresh = case_count_ser[case_count_ser > 99].index
```
%% Cell type:markdown id: tags:
# Questions About COVID-19 and Its Spread
These plots should be taken with a large grain of salt. I am not an epidemiologist, so the analyses shown here are completely naive. There are large discrepencies in the data from different countries for a variety of reasons (rates of testing, demographics, etc.) so that make direct comparisons inaccurate. Nonetheless, I think there is a lot of interesting information in this data.
%% Cell type:code id: tags:
``` python
data_ts = jhu_frames_map['confirmed'].iloc[:,-1].name.strftime("%b %d %Y")
display(HTML(f"<em>Data up to {data_ts}</em>"))
```
%% Cell type:markdown id: tags:
## How are cases per 100,000 distributed geographically?
%% Cell type:code id: tags:
``` python
import altair as alt
from vega_datasets import data
```
%% Cell type:code id: tags:
``` python
# Compile the basic df
map_df = pd.concat([
rates_frames_map['confirmed'].iloc[:,-1],
rates_frames_map['deaths'].iloc[:,-1],
rates_frames_map['recovered'].iloc[:,-1],
country_coords_df], axis=1)
# Restrict to countries with 100 or more cases
map_df = map_df.loc[countries_over_thresh].dropna()
map_df = map_df.reset_index()
map_df.columns = ['Country/Region', 'Confirmed', 'Deaths', 'Recovered', 'Lat', 'Long']
```
%% Cell type:code id: tags:
``` python
def map_of_variable(map_df, variable):
# Data generators for the background
sphere = alt.sphere()
graticule = alt.graticule()
# Source of land data
source = alt.topo_feature(data.world_110m.url, 'countries')
# Layering and configuring the components
p = alt.layer(
alt.Chart(sphere).mark_geoshape(fill='#cae6ef'),
alt.Chart(graticule).mark_geoshape(stroke='white', strokeWidth=0.5),
alt.Chart(source).mark_geoshape(fill='#dddddd', stroke='#aaaaaa'),
alt.Chart(map_df).mark_circle(opacity=0.6).encode(
longitude='Long:Q',
latitude='Lat:Q',
size=alt.Size(f'{variable}:Q', title="Cases"),
color=alt.value('steelblue'),
tooltip=["Country/Region:N", "Confirmed:Q", "Deaths:Q", "Recovered:Q"]
)
).project(
'naturalEarth1'
).properties(width=600, height=400, title=f"{variable} cases per 100,000"
).configure_view(stroke=None)
return p
```
%% Cell type:code id: tags:
``` python
display(map_of_variable(map_df, 'Confirmed'))
display(HTML('''
<p style="font-size: smaller">Data Source:
<a href="https://github.com/CSSEGISandData/COVID-19">JHU CSSE</a> and
<a href="https://data.worldbank.org/indicator/SP.POP.TOTL">World Bank</a>
</p>'''))
```
%% Cell type:code id: tags:
``` python
bars = alt.Chart(map_df).mark_bar().encode(
x='Confirmed:Q',
y=alt.Y("Country/Region:N", sort='-x')
)
text = bars.mark_text(
align='left',
baseline='middle',
dx=3 # Nudges text to right so it doesn't appear on top of the bar
).encode(
text=alt.Text('Confirmed:Q', format=".3")
)
(bars + text).properties(height=900)
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment