Skip to content
Snippets Groups Projects
Commit bc03ef22 authored by Rok Roškar's avatar Rok Roškar
Browse files

feat: add nytimes converter and example notebook

parent 40fa9e02
No related branches found
No related tags found
1 merge request!110feat: add NYTime data
Pipeline #23219 passed with stage
in 18 seconds
This diff is collapsed.
......@@ -4,4 +4,4 @@ __author__ = """Chandrasekhar Ramakrishnan"""
__email__ = "cramakri@ethz.ch"
__version__ = "0.1.0"
from .converters import covidtracking, italy, jhu, spain, switzerland
from .converters import covidtracking, italy, jhu, nyt, spain, switzerland
"""
Covid-19 converters for data from the New York Times.
Data source: https://github.com/nytimes/covid-19-data
"""
from pathlib import Path
import numpy as np
import pandas as pd
from . import CaseConverterImpl as CaseConverter
from .. import helper
class NYTCaseConverter(CaseConverter):
"""
Converter for data from the U.S., collected by
the New York Times and hosted at https://github.com/nytimes/covid-19-data/
"""
conversion_dict = {
"county": "admin2_label",
"state": "region_label",
"cases": "positive",
"deaths": "deceased",
}
column_list = ["date", "county", "state", "fips", "cases", "deaths"]
def convert(self, df):
# rename the existing columns
df_conv = df.rename(columns=self.conversion_dict)
df_conv["country"] = "USA"
df_conv["country_label"] = "United States of America"
df_conv["tested"] = np.nan
df_conv["date"] = pd.to_datetime(df_conv["date"])
# get population data
pop = pd.read_csv(self.atlas_folder / "us_census/co-est2019-alldata.csv")
pop["fips"] = pop.STATE * 1000 + pop.COUNTY
merged = df_conv.merge(pop[["fips", "POPESTIMATE2019"]]).rename(
columns={"POPESTIMATE2019": "population"}
)
# add NYC manually based on https://github.com/nytimes/covid-19-data#geographic-exceptions
nyc_county_fips = [61, 47, 81, 5, 85]
nyc_pop = nyc_pop = pop.loc[
(pop.STATE == 36) & (pop.COUNTY.isin(nyc_county_fips))
]["POPESTIMATE2019"].sum()
merged = merged.append(
df_conv[df_conv.admin2_label == "New York City"], ignore_index=True
)
merged.loc[merged.admin2_label == "New York City", "population"] = nyc_pop
return self._set_common_columns(merged)
def read_data(self, path):
"""Read in the county data."""
return pd.read_csv(Path(path) / "us-counties.csv")
NYTCaseConverter._register()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment