Skip to content
Snippets Groups Projects
Commit 7163637d authored by Andreas Bleuler's avatar Andreas Bleuler Committed by Chandrasekhar Ramakrishnan
Browse files

feat: add converter for USA (covidtracking)

parent eddad99b
No related branches found
No related tags found
2 merge requests!107US Census,!103standardize-data
source diff could not be displayed: it is too large. Options to address this: view the blob.
......@@ -4,6 +4,6 @@ __author__ = """Chandrasekhar Ramakrishnan"""
__email__ = 'cramakri@ethz.ch'
__version__ = '0.1.0'
from .converters import italy, switzerland
from .converters import italy, switzerland, covidtracking
from .helper import *
\ No newline at end of file
"""
Covid-19 converters for data from the covidtracking.com (USA).
"""
import pandas as pd
from . import CaseConverter
from .. import helper
class CovidtrackingCaseConverter(CaseConverter):
"""
Converter for data from the United States, collected by
the Covid tracking project https://covidtracking.com/
"""
conversion_dict = {
"totalTestResults": "tested",
"total": "positive",
"death": "deceased",
"positive": "new_positive_tests",
"negative": "new_negative_tests",
}
column_list = [
"date",
"state",
"positive",
"negative",
"pending",
"hospitalized",
"death",
"total",
"hash",
"dateChecked",
"totalTestResults",
"fips",
"deathIncrease",
"hospitalizedIncrease",
"negativeIncrease",
"positiveIncrease",
"totalTestResultsIncrease",
]
@classmethod
def convert(cls, df):
# rename the existing columns
df_conv = df.rename(columns=cls.conversion_dict)
# convert date
df_conv["date"] = pd.to_datetime(df_conv["date"], format="%Y%m%d")
# make states iso-3116 2 compliant
df_conv["region_iso"] = df_conv.apply(
lambda row: f'US-{row["state"]}', axis=1
)
# get population data for US states through right-join type operation
metadata = pd.DataFrame(
helper.get_region_populations("USA")
).rename(columns={"regionLabel": "region_label"})
merged = pd.merge(df_conv, metadata, on="region_iso", how="right")
# add country information
merged["country"] = "USA"
# calculate incidence rates
merged["population"] = merged.population.astype(int)
merged["positive_100k"] = merged["positive"] / merged["population"] * 100000
merged["deceased_100k"] = merged["deceased"] / merged["population"] * 100000
return merged[CaseConverter.common_columns]
CovidtrackingCaseConverter._register()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment