In [1]:
import pandas as pd
import altair as alt
from IPython.display import display, HTML

In [2]:
html_credits=HTML('''
<p style="font-size: smaller">Data Sources: 
  <a href="https://covidtracking.com">The COVID Tracking Project</a>
<br>
Analysis and Visualization:
  <a href="https://renkulab.io/projects/covid-19/covid-19-public-data">Covid-19 Public Data Collaboration Project</a>
</p>''')

In [3]:
# Read population data
pop_df = pd.read_csv('../data/geodata/us_pop_fung_2019.csv').set_index('ST')

# Read state-level data
data_df = pd.read_json('../data/covidtracking/states-daily.json')
data_df['date'] = pd.to_datetime(data_df['date'], format="%Y%m%d")
data_df['ratio'] = data_df['positive']/data_df['total']

# Compute daily differences
tdf = data_df.sort_values(['state', 'date'], ascending=[True, False]).set_index(['state', 'date'])
diffs_df = tdf[['positive', 'negative', 'death']].groupby(level='state').diff(periods=-1).dropna(how='all')
tdf_diff=tdf.join(diffs_df, rsuffix='_diff').reset_index()

# incidence rates
tdf_diff = tdf_diff.set_index('state')
tdf_diff['positive_diff_100k'] = (tdf_diff['positive_diff'] / pop_df['Population']) * 100000
tdf_diff['death_diff_100k'] = (tdf_diff['death_diff'] / pop_df['Population']) * 100000
tdf_diff = tdf_diff.reset_index()

# "Normalizing" the totals
tdf_diff['total_10'] = tdf_diff['total']/10.

# Daily totals
daily_totals = tdf_diff.groupby('date').sum()
daily_totals.reset_index(level=0, inplace=True)


# Covid-19 Cases in U.S.

The case data from the U.S. is obtained from https://covidtracking.com, a public crowd-sourced covid-19 dataset. 

### Daily Cumulative Totals

Cumulative reported totals of positive cases and deaths. 

In [9]:
base = alt.Chart(
    daily_totals
).mark_bar(size=15).encode(
    alt.X('date', axis=alt.Axis(title='')
    )
)

cumulative = base.encode(alt.Y('positive', title = 'Cumulative cases'))
cumulative_deaths = base.encode(alt.Y('death', title = 'Cumulative deaths'))
rates = base.encode(alt.Y('positive_diff', title='Daily cases'))
rates_deaths = base.encode(alt.Y('death_diff', title='Daily deaths'))
chart = alt.vconcat(
    cumulative | rates, cumulative_deaths | rates_deaths,
    title='Cumulative Covid-19 cases in the U.S.'
).configure_title(
    anchor='middle'
)
display(chart)
display(html_credits)

### Total tests and positives per 100k population

In [5]:
most_recent_test_date = data_df['date'].max()
most_recent_df = data_df[data_df['date'] == most_recent_test_date].set_index('state')
print("Most recent test date", most_recent_test_date)
print(len(most_recent_df), "states/territories have data on this date.")

most_recent_df['total/100k'] = (most_recent_df['total'] / pop_df['Population']) * 100000
most_recent_df['positive/100k'] = (most_recent_df['positive'] / pop_df['Population']) * 100000
most_recent_df = most_recent_df.reset_index()

Most recent test date 2020-03-23 00:00:00
56 states/territories have data on this date.


In [6]:
chart = alt.Chart(most_recent_df.sort_values('total/100k'), title="Cases per 100k").encode(alt.X('state', sort=None))
tests = chart.mark_bar().encode(alt.Y('total/100k'))
positives = chart.mark_point(color='orange', filled=True, size=100, opacity=1).encode(alt.Y('positive/100k'))
display(alt.layer(tests, positives))
display(html_credits)

## Counts and rates by state

Taking a look at the three states with the highest per-capita incidence of covid-19. The red and yellow curves represent the total tests and total positive tests respectively. 

In [7]:
# produce the charts for a few states

charts=[]
for state in ['NY', 'WA', 'NM']: 
    state_df = tdf_diff[tdf_diff['state'] == state].copy()

    base = alt.Chart(state_df, title=state).encode(alt.X('date', axis=alt.Axis(title='Date'))).properties(width=250, height=150)
    dailies = base.mark_bar(size=10).encode(alt.Y('positive_diff', axis=alt.Axis(title='Daily positive')))

    totals = base.mark_line(color='red').encode(alt.Y('total_10', axis=alt.Axis(title='Total/10'))) 
    positives = totals.mark_line(color='orange').encode(alt.Y('positive', axis=alt.Axis(title='Positive')))
    cumulative = totals + positives

    ratio = base.mark_line(color='red').encode(alt.Y('ratio', axis=alt.Axis(title='Positive/Total'), scale=alt.Scale(domain=(0,1))))
    
    charts.append(alt.layer(dailies, cumulative).resolve_scale(y='independent'))

display(alt.hconcat(*charts))
display(html_credits)