diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..ba94f5e43f10b3215c811e403d2df27d41ab5749 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,8 @@ +data/covid-19_jhu-csse/time_series_19-covid-Confirmed.csv filter=lfs diff=lfs merge=lfs -text +data/covid-19_jhu-csse/time_series_19-covid-Deaths.csv filter=lfs diff=lfs merge=lfs -text +data/covid-19_jhu-csse/time_series_19-covid-Recovered.csv filter=lfs diff=lfs merge=lfs -text +data/worldbank/SP.POP.TOTL.zip filter=lfs diff=lfs merge=lfs -text +data/covid-19_rates/** filter=lfs diff=lfs merge=lfs -text +data/covid-19_rates/ts_rates_19-covid-deaths.csv filter=lfs diff=lfs merge=lfs -text +data/covid-19_rates/ts_rates_19-covid-recovered.csv filter=lfs diff=lfs merge=lfs -text +data/covid-19_rates/ts_rates_19-covid-confirmed.csv filter=lfs diff=lfs merge=lfs -text diff --git a/.renku/datasets/720e46f5-0f38-48be-86ef-1fcbae258a6f/metadata.yml b/.renku/datasets/720e46f5-0f38-48be-86ef-1fcbae258a6f/metadata.yml new file mode 100644 index 0000000000000000000000000000000000000000..d748501401f822c12badb449606bc1a84fa99717 --- /dev/null +++ b/.renku/datasets/720e46f5-0f38-48be-86ef-1fcbae258a6f/metadata.yml @@ -0,0 +1,221 @@ +'@context': + '@version': 1.1 + _id: '@id' + _project: + '@context': + '@version': 1.1 + _id: '@id' + created: schema:dateCreated + creator: + '@context': + '@version': 1.1 + _id: '@id' + affiliation: schema:affiliation + alternate_name: schema:alternateName + email: schema:email + label: rdfs:label + name: schema:name + prov: http://www.w3.org/ns/prov# + rdfs: http://www.w3.org/2000/01/rdf-schema# + schema: http://schema.org/ + '@id': schema:creator + name: schema:name + prov: http://www.w3.org/ns/prov# + schema: http://schema.org/ + updated: schema:dateUpdated + version: schema:schemaVersion + '@id': schema:isPartOf + based_on: schema:isBasedOn + created: schema:dateCreated + creator: + '@context': + '@version': 1.1 + _id: '@id' + affiliation: schema:affiliation + alternate_name: schema:alternateName + email: schema:email + label: rdfs:label + name: schema:name + prov: http://www.w3.org/ns/prov# + rdfs: http://www.w3.org/2000/01/rdf-schema# + schema: http://schema.org/ + '@id': schema:creator + date_published: schema:datePublished + description: schema:description + files: + '@context': + '@version': 1.1 + _id: '@id' + _project: + '@context': + '@version': 1.1 + _id: '@id' + created: schema:dateCreated + creator: + '@context': + '@version': 1.1 + _id: '@id' + affiliation: schema:affiliation + alternate_name: schema:alternateName + email: schema:email + label: rdfs:label + name: schema:name + prov: http://www.w3.org/ns/prov# + rdfs: http://www.w3.org/2000/01/rdf-schema# + schema: http://schema.org/ + '@id': schema:creator + name: schema:name + prov: http://www.w3.org/ns/prov# + schema: http://schema.org/ + updated: schema:dateUpdated + version: schema:schemaVersion + '@id': schema:isPartOf + added: schema:dateCreated + based_on: schema:isBasedOn + creator: + '@context': + '@version': 1.1 + _id: '@id' + affiliation: schema:affiliation + alternate_name: schema:alternateName + email: schema:email + label: rdfs:label + name: schema:name + prov: http://www.w3.org/ns/prov# + rdfs: http://www.w3.org/2000/01/rdf-schema# + schema: http://schema.org/ + '@id': schema:creator + name: schema:name + path: prov:atLocation + prov: http://www.w3.org/ns/prov# + schema: http://schema.org/ + url: schema:url + wfprov: http://purl.org/wf4ever/wfprov# + '@id': schema:hasPart + identifier: schema:identifier + in_language: + '@context': + '@version': 1.1 + alternate_name: schema:alternateName + name: schema:name + schema: http://schema.org/ + '@id': schema:inLanguage + keywords: schema:keywords + license: schema:license + name: schema:name + path: prov:atLocation + prov: http://www.w3.org/ns/prov# + same_as: + '@context': + '@version': 1.1 + _id: '@id' + schema: http://schema.org/ + url: schema:url + '@id': schema:sameAs + schema: http://schema.org/ + short_name: schema:alternateName + tags: + '@context': + '@version': 1.1 + _id: '@id' + commit: schema:location + created: schema:startDate + dataset: schema:about + description: schema:description + name: schema:name + schema: http://schema.org/ + '@id': schema:subjectOf + url: schema:url + version: schema:version + wfprov: http://purl.org/wf4ever/wfprov# +'@type': +- prov:Entity +- schema:Dataset +- wfprov:Artifact +_id: https://dev.renku.ch/datasets/720e46f5-0f38-48be-86ef-1fcbae258a6f +_label: 720e46f5-0f38-48be-86ef-1fcbae258a6f +_project: + '@type': + - prov:Location + - schema:Project + _id: https://dev.renku.ch/projects/cramakri/covid-19-dashboard + created: '2020-03-11T21:43:12.736000+00:00' + creator: + '@type': + - prov:Person + - schema:Person + _id: mailto:cramakri@ethz.ch + affiliation: null + alternate_name: null + email: cramakri@ethz.ch + label: Chandrasekhar Ramakrishnan + name: Chandrasekhar Ramakrishnan + name: covid-19-dashboard + updated: '2020-03-11T21:43:12.736000+00:00' + version: '1' +based_on: null +created: '2020-03-11T21:59:24.774290+00:00' +creator: +- '@type': + - prov:Person + - schema:Person + _id: mailto:cramakri@ethz.ch + affiliation: null + alternate_name: null + email: cramakri@ethz.ch + label: Chandrasekhar Ramakrishnan + name: Chandrasekhar Ramakrishnan +date_published: null +description: null +files: +- '@type': + - prov:Entity + - schema:DigitalDocument + - wfprov:Artifact + _id: https://dev.renku.ch/blob/f9328ad834b29347e00c73c32bc2a99a91103247/data/worldbank/SP.POP.TOTL.zip + _label: data/worldbank/SP.POP.TOTL.zip@f9328ad834b29347e00c73c32bc2a99a91103247 + _project: + '@type': + - prov:Location + - schema:Project + _id: https://dev.renku.ch/projects/cramakri/covid-19-dashboard + created: '2020-03-11T21:43:12.736000+00:00' + creator: + '@type': + - prov:Person + - schema:Person + _id: mailto:cramakri@ethz.ch + affiliation: null + alternate_name: null + email: cramakri@ethz.ch + label: Chandrasekhar Ramakrishnan + name: Chandrasekhar Ramakrishnan + name: covid-19-dashboard + updated: '2020-03-11T21:43:12.736000+00:00' + version: '1' + added: '2020-03-11T22:05:58.843468+00:00' + based_on: null + creator: + - '@type': + - prov:Person + - schema:Person + _id: mailto:cramakri@ethz.ch + affiliation: null + alternate_name: null + email: cramakri@ethz.ch + label: Chandrasekhar Ramakrishnan + name: Chandrasekhar Ramakrishnan + name: SP.POP.TOTL.zip + path: data/worldbank/SP.POP.TOTL.zip + url: http://api.worldbank.org/v2/en/indicator/SP.POP.TOTL?downloadformat=csv +identifier: 720e46f5-0f38-48be-86ef-1fcbae258a6f +in_language: null +keywords: [] +license: null +name: worldbank +path: .renku/datasets/720e46f5-0f38-48be-86ef-1fcbae258a6f +same_as: null +short_name: worldbank +tags: [] +url: https://dev.renku.ch/datasets/720e46f5-0f38-48be-86ef-1fcbae258a6f +version: null diff --git a/.renku/datasets/e9212b58-4994-482b-801c-ed90a960bd04/metadata.yml b/.renku/datasets/e9212b58-4994-482b-801c-ed90a960bd04/metadata.yml new file mode 100644 index 0000000000000000000000000000000000000000..d34c6681c31b3ddc3a487be585a57150f179b0c0 --- /dev/null +++ b/.renku/datasets/e9212b58-4994-482b-801c-ed90a960bd04/metadata.yml @@ -0,0 +1,301 @@ +'@context': + '@version': 1.1 + _id: '@id' + _project: + '@context': + '@version': 1.1 + _id: '@id' + created: schema:dateCreated + creator: + '@context': + '@version': 1.1 + _id: '@id' + affiliation: schema:affiliation + alternate_name: schema:alternateName + email: schema:email + label: rdfs:label + name: schema:name + prov: http://www.w3.org/ns/prov# + rdfs: http://www.w3.org/2000/01/rdf-schema# + schema: http://schema.org/ + '@id': schema:creator + name: schema:name + prov: http://www.w3.org/ns/prov# + schema: http://schema.org/ + updated: schema:dateUpdated + version: schema:schemaVersion + '@id': schema:isPartOf + based_on: schema:isBasedOn + created: schema:dateCreated + creator: + '@context': + '@version': 1.1 + _id: '@id' + affiliation: schema:affiliation + alternate_name: schema:alternateName + email: schema:email + label: rdfs:label + name: schema:name + prov: http://www.w3.org/ns/prov# + rdfs: http://www.w3.org/2000/01/rdf-schema# + schema: http://schema.org/ + '@id': schema:creator + date_published: schema:datePublished + description: schema:description + files: + '@context': + '@version': 1.1 + _id: '@id' + _project: + '@context': + '@version': 1.1 + _id: '@id' + created: schema:dateCreated + creator: + '@context': + '@version': 1.1 + _id: '@id' + affiliation: schema:affiliation + alternate_name: schema:alternateName + email: schema:email + label: rdfs:label + name: schema:name + prov: http://www.w3.org/ns/prov# + rdfs: http://www.w3.org/2000/01/rdf-schema# + schema: http://schema.org/ + '@id': schema:creator + name: schema:name + prov: http://www.w3.org/ns/prov# + schema: http://schema.org/ + updated: schema:dateUpdated + version: schema:schemaVersion + '@id': schema:isPartOf + added: schema:dateCreated + based_on: schema:isBasedOn + creator: + '@context': + '@version': 1.1 + _id: '@id' + affiliation: schema:affiliation + alternate_name: schema:alternateName + email: schema:email + label: rdfs:label + name: schema:name + prov: http://www.w3.org/ns/prov# + rdfs: http://www.w3.org/2000/01/rdf-schema# + schema: http://schema.org/ + '@id': schema:creator + name: schema:name + path: prov:atLocation + prov: http://www.w3.org/ns/prov# + schema: http://schema.org/ + url: schema:url + wfprov: http://purl.org/wf4ever/wfprov# + '@id': schema:hasPart + identifier: schema:identifier + in_language: + '@context': + '@version': 1.1 + alternate_name: schema:alternateName + name: schema:name + schema: http://schema.org/ + '@id': schema:inLanguage + keywords: schema:keywords + license: schema:license + name: schema:name + path: prov:atLocation + prov: http://www.w3.org/ns/prov# + same_as: + '@context': + '@version': 1.1 + _id: '@id' + schema: http://schema.org/ + url: schema:url + '@id': schema:sameAs + schema: http://schema.org/ + short_name: schema:alternateName + tags: + '@context': + '@version': 1.1 + _id: '@id' + commit: schema:location + created: schema:startDate + dataset: schema:about + description: schema:description + name: schema:name + schema: http://schema.org/ + '@id': schema:subjectOf + url: schema:url + version: schema:version + wfprov: http://purl.org/wf4ever/wfprov# +'@type': +- prov:Entity +- schema:Dataset +- wfprov:Artifact +_id: https://dev.renku.ch/datasets/e9212b58-4994-482b-801c-ed90a960bd04 +_label: e9212b58-4994-482b-801c-ed90a960bd04 +_project: + '@type': + - prov:Location + - schema:Project + _id: https://dev.renku.ch/projects/cramakri/covid-19-dashboard + created: '2020-03-11T21:43:12.736000+00:00' + creator: + '@type': + - prov:Person + - schema:Person + _id: mailto:cramakri@ethz.ch + affiliation: null + alternate_name: null + email: cramakri@ethz.ch + label: Chandrasekhar Ramakrishnan + name: Chandrasekhar Ramakrishnan + name: covid-19-dashboard + updated: '2020-03-11T21:43:12.736000+00:00' + version: '1' +based_on: null +created: '2020-03-12T22:44:43.719042+00:00' +creator: +- '@type': + - prov:Person + - schema:Person + _id: mailto:cramakri@ethz.ch + affiliation: null + alternate_name: null + email: cramakri@ethz.ch + label: Chandrasekhar Ramakrishnan + name: Chandrasekhar Ramakrishnan +date_published: null +description: null +files: +- '@type': + - prov:Entity + - schema:DigitalDocument + - wfprov:Artifact + _id: https://dev.renku.ch/blob/5cbef5a5898d761176f551823f2fa51f28e8bd41/data/covid-19_rates/ts_rates_19-covid-confirmed.csv + _label: data/covid-19_rates/ts_rates_19-covid-confirmed.csv@5cbef5a5898d761176f551823f2fa51f28e8bd41 + _project: + '@type': + - prov:Location + - schema:Project + _id: https://dev.renku.ch/projects/cramakri/covid-19-dashboard + created: '2020-03-11T21:43:12.736000+00:00' + creator: + '@type': + - prov:Person + - schema:Person + _id: mailto:cramakri@ethz.ch + affiliation: null + alternate_name: null + email: cramakri@ethz.ch + label: Chandrasekhar Ramakrishnan + name: Chandrasekhar Ramakrishnan + name: covid-19-dashboard + updated: '2020-03-11T21:43:12.736000+00:00' + version: '1' + added: '2020-03-13T14:55:17.141133+00:00' + based_on: null + creator: + - '@type': + - prov:Person + - schema:Person + _id: mailto:cramakri@ethz.ch + affiliation: null + alternate_name: null + email: cramakri@ethz.ch + label: Chandrasekhar Ramakrishnan + name: Chandrasekhar Ramakrishnan + name: ts_rates_19-covid-confirmed.csv + path: data/covid-19_rates/ts_rates_19-covid-confirmed.csv + url: data/covid-19_rates/ts_rates_19-covid-confirmed.csv +- '@type': + - prov:Entity + - schema:DigitalDocument + - wfprov:Artifact + _id: https://dev.renku.ch/blob/5cbef5a5898d761176f551823f2fa51f28e8bd41/data/covid-19_rates/ts_rates_19-covid-deaths.csv + _label: data/covid-19_rates/ts_rates_19-covid-deaths.csv@5cbef5a5898d761176f551823f2fa51f28e8bd41 + _project: + '@type': + - prov:Location + - schema:Project + _id: https://dev.renku.ch/projects/cramakri/covid-19-dashboard + created: '2020-03-11T21:43:12.736000+00:00' + creator: + '@type': + - prov:Person + - schema:Person + _id: mailto:cramakri@ethz.ch + affiliation: null + alternate_name: null + email: cramakri@ethz.ch + label: Chandrasekhar Ramakrishnan + name: Chandrasekhar Ramakrishnan + name: covid-19-dashboard + updated: '2020-03-11T21:43:12.736000+00:00' + version: '1' + added: '2020-03-13T14:55:17.163968+00:00' + based_on: null + creator: + - '@type': + - prov:Person + - schema:Person + _id: mailto:cramakri@ethz.ch + affiliation: null + alternate_name: null + email: cramakri@ethz.ch + label: Chandrasekhar Ramakrishnan + name: Chandrasekhar Ramakrishnan + name: ts_rates_19-covid-deaths.csv + path: data/covid-19_rates/ts_rates_19-covid-deaths.csv + url: data/covid-19_rates/ts_rates_19-covid-deaths.csv +- '@type': + - prov:Entity + - schema:DigitalDocument + - wfprov:Artifact + _id: https://dev.renku.ch/blob/5cbef5a5898d761176f551823f2fa51f28e8bd41/data/covid-19_rates/ts_rates_19-covid-recovered.csv + _label: data/covid-19_rates/ts_rates_19-covid-recovered.csv@5cbef5a5898d761176f551823f2fa51f28e8bd41 + _project: + '@type': + - prov:Location + - schema:Project + _id: https://dev.renku.ch/projects/cramakri/covid-19-dashboard + created: '2020-03-11T21:43:12.736000+00:00' + creator: + '@type': + - prov:Person + - schema:Person + _id: mailto:cramakri@ethz.ch + affiliation: null + alternate_name: null + email: cramakri@ethz.ch + label: Chandrasekhar Ramakrishnan + name: Chandrasekhar Ramakrishnan + name: covid-19-dashboard + updated: '2020-03-11T21:43:12.736000+00:00' + version: '1' + added: '2020-03-13T14:55:17.183695+00:00' + based_on: null + creator: + - '@type': + - prov:Person + - schema:Person + _id: mailto:cramakri@ethz.ch + affiliation: null + alternate_name: null + email: cramakri@ethz.ch + label: Chandrasekhar Ramakrishnan + name: Chandrasekhar Ramakrishnan + name: ts_rates_19-covid-recovered.csv + path: data/covid-19_rates/ts_rates_19-covid-recovered.csv + url: data/covid-19_rates/ts_rates_19-covid-recovered.csv +identifier: e9212b58-4994-482b-801c-ed90a960bd04 +in_language: null +keywords: [] +license: null +name: covid-19-rates +path: .renku/datasets/e9212b58-4994-482b-801c-ed90a960bd04 +same_as: null +short_name: covid-19-rates +tags: [] +url: https://dev.renku.ch/datasets/e9212b58-4994-482b-801c-ed90a960bd04 +version: null diff --git a/.renku/datasets/f6726a5b-f973-45d5-b873-30fa0dff772f/metadata.yml b/.renku/datasets/f6726a5b-f973-45d5-b873-30fa0dff772f/metadata.yml new file mode 100644 index 0000000000000000000000000000000000000000..51cf86cdfe865ac460256abe28d53851fec9c556 --- /dev/null +++ b/.renku/datasets/f6726a5b-f973-45d5-b873-30fa0dff772f/metadata.yml @@ -0,0 +1,394 @@ +'@context': + '@version': 1.1 + _id: '@id' + _project: + '@context': + '@version': 1.1 + _id: '@id' + created: schema:dateCreated + creator: + '@context': + '@version': 1.1 + _id: '@id' + affiliation: schema:affiliation + alternate_name: schema:alternateName + email: schema:email + label: rdfs:label + name: schema:name + prov: http://www.w3.org/ns/prov# + rdfs: http://www.w3.org/2000/01/rdf-schema# + schema: http://schema.org/ + '@id': schema:creator + name: schema:name + prov: http://www.w3.org/ns/prov# + schema: http://schema.org/ + updated: schema:dateUpdated + version: schema:schemaVersion + '@id': schema:isPartOf + based_on: schema:isBasedOn + created: schema:dateCreated + creator: + '@context': + '@version': 1.1 + _id: '@id' + affiliation: schema:affiliation + alternate_name: schema:alternateName + email: schema:email + label: rdfs:label + name: schema:name + prov: http://www.w3.org/ns/prov# + rdfs: http://www.w3.org/2000/01/rdf-schema# + schema: http://schema.org/ + '@id': schema:creator + date_published: schema:datePublished + description: schema:description + files: + '@context': + '@version': 1.1 + _id: '@id' + _project: + '@context': + '@version': 1.1 + _id: '@id' + created: schema:dateCreated + creator: + '@context': + '@version': 1.1 + _id: '@id' + affiliation: schema:affiliation + alternate_name: schema:alternateName + email: schema:email + label: rdfs:label + name: schema:name + prov: http://www.w3.org/ns/prov# + rdfs: http://www.w3.org/2000/01/rdf-schema# + schema: http://schema.org/ + '@id': schema:creator + name: schema:name + prov: http://www.w3.org/ns/prov# + schema: http://schema.org/ + updated: schema:dateUpdated + version: schema:schemaVersion + '@id': schema:isPartOf + added: schema:dateCreated + based_on: schema:isBasedOn + creator: + '@context': + '@version': 1.1 + _id: '@id' + affiliation: schema:affiliation + alternate_name: schema:alternateName + email: schema:email + label: rdfs:label + name: schema:name + prov: http://www.w3.org/ns/prov# + rdfs: http://www.w3.org/2000/01/rdf-schema# + schema: http://schema.org/ + '@id': schema:creator + name: schema:name + path: prov:atLocation + prov: http://www.w3.org/ns/prov# + schema: http://schema.org/ + url: schema:url + wfprov: http://purl.org/wf4ever/wfprov# + '@id': schema:hasPart + identifier: schema:identifier + in_language: + '@context': + '@version': 1.1 + alternate_name: schema:alternateName + name: schema:name + schema: http://schema.org/ + '@id': schema:inLanguage + keywords: schema:keywords + license: schema:license + name: schema:name + path: prov:atLocation + prov: http://www.w3.org/ns/prov# + same_as: + '@context': + '@version': 1.1 + _id: '@id' + schema: http://schema.org/ + url: schema:url + '@id': schema:sameAs + schema: http://schema.org/ + short_name: schema:alternateName + tags: + '@context': + '@version': 1.1 + _id: '@id' + commit: schema:location + created: schema:startDate + dataset: schema:about + description: schema:description + name: schema:name + schema: http://schema.org/ + '@id': schema:subjectOf + url: schema:url + version: schema:version + wfprov: http://purl.org/wf4ever/wfprov# +'@type': +- prov:Entity +- schema:Dataset +- wfprov:Artifact +_id: https://dev.renku.ch/datasets/f6726a5b-f973-45d5-b873-30fa0dff772f +_label: f6726a5b-f973-45d5-b873-30fa0dff772f +_project: + '@type': + - prov:Location + - schema:Project + _id: https://dev.renku.ch/projects/cramakri/covid-19-dashboard + created: '2020-03-11T21:43:12.736000+00:00' + creator: + '@type': + - prov:Person + - schema:Person + _id: mailto:cramakri@ethz.ch + affiliation: null + alternate_name: null + email: cramakri@ethz.ch + label: Chandrasekhar Ramakrishnan + name: Chandrasekhar Ramakrishnan + name: covid-19-dashboard + updated: '2020-03-11T21:43:12.736000+00:00' + version: '1' +based_on: null +created: '2020-03-11T21:53:27.485291+00:00' +creator: +- '@type': + - prov:Person + - schema:Person + _id: mailto:cramakri@ethz.ch + affiliation: null + alternate_name: null + email: cramakri@ethz.ch + label: Chandrasekhar Ramakrishnan + name: Chandrasekhar Ramakrishnan +date_published: null +description: null +files: +- '@type': + - prov:Entity + - schema:DigitalDocument + - wfprov:Artifact + _id: https://dev.renku.ch/blob/9dec8b0e7ac4d630fffa7e5182393d027f58ebb7/data/covid-19_jhu-csse/time_series_19-covid-Confirmed.csv + _label: data/covid-19_jhu-csse/time_series_19-covid-Confirmed.csv@9dec8b0e7ac4d630fffa7e5182393d027f58ebb7 + _project: + '@type': + - prov:Location + - schema:Project + _id: https://localhost/projects/cramakri/covid-19-dashboard + created: '2020-03-11T21:43:12.736000+00:00' + creator: + '@type': + - prov:Person + - schema:Person + _id: mailto:cramakri@ethz.ch + affiliation: null + alternate_name: null + email: cramakri@ethz.ch + label: Chandrasekhar Ramakrishnan + name: Chandrasekhar Ramakrishnan + name: covid-19-dashboard + updated: '2020-03-11T21:43:12.736000+00:00' + version: '1' + added: '2020-03-11T21:57:23.593776+00:00' + based_on: + '@type': + - prov:Entity + - schema:DigitalDocument + - wfprov:Artifact + _id: https://github.com/blob/e0ae6f6c8ab359ef6582f51453c852094255b1f9/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv + _label: csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv@e0ae6f6c8ab359ef6582f51453c852094255b1f9 + _project: null + added: '2020-03-11T21:57:23.351514+00:00' + based_on: null + creator: [] + name: time_series_19-covid-Confirmed.csv + path: csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv + url: https://github.com/CSSEGISandData/COVID-19.git + creator: + - '@type': + - prov:Person + - schema:Person + _id: mailto:ryan.lau@jhuapl.edu + affiliation: null + alternate_name: null + email: ryan.lau@jhuapl.edu + label: Ryan Lau + name: Ryan Lau + - '@type': + - prov:Person + - schema:Person + _id: mailto:jhusystems@gmail.com + affiliation: null + alternate_name: null + email: jhusystems@gmail.com + label: CSSEGISandData + name: CSSEGISandData + - '@type': + - prov:Person + - schema:Person + _id: mailto:60674295+CSSEGISandData@users.noreply.github.com + affiliation: null + alternate_name: null + email: 60674295+CSSEGISandData@users.noreply.github.com + label: CSSEGISandData + name: CSSEGISandData + name: time_series_19-covid-Confirmed.csv + path: data/covid-19_jhu-csse/time_series_19-covid-Confirmed.csv + url: https://github.com/CSSEGISandData/COVID-19.git +- '@type': + - prov:Entity + - schema:DigitalDocument + - wfprov:Artifact + _id: https://dev.renku.ch/blob/9be1f743e9800aff20164e2e75e398643376dccc/data/covid-19_jhu-csse/time_series_19-covid-Deaths.csv + _label: data/covid-19_jhu-csse/time_series_19-covid-Deaths.csv@9be1f743e9800aff20164e2e75e398643376dccc + _project: + '@type': + - prov:Location + - schema:Project + _id: https://localhost/projects/cramakri/covid-19-dashboard + created: '2020-03-11T21:43:12.736000+00:00' + creator: + '@type': + - prov:Person + - schema:Person + _id: mailto:cramakri@ethz.ch + affiliation: null + alternate_name: null + email: cramakri@ethz.ch + label: Chandrasekhar Ramakrishnan + name: Chandrasekhar Ramakrishnan + name: covid-19-dashboard + updated: '2020-03-11T21:43:12.736000+00:00' + version: '1' + added: '2020-03-11T21:57:57.824943+00:00' + based_on: + '@type': + - prov:Entity + - schema:DigitalDocument + - wfprov:Artifact + _id: https://github.com/blob/34a4321664b5364982c5fd30d6d4014d7f00d1f1/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv + _label: csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv@34a4321664b5364982c5fd30d6d4014d7f00d1f1 + _project: null + added: '2020-03-11T21:57:57.577124+00:00' + based_on: null + creator: [] + name: time_series_19-covid-Deaths.csv + path: csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv + url: https://github.com/CSSEGISandData/COVID-19.git + creator: + - '@type': + - prov:Person + - schema:Person + _id: mailto:ryan.lau@jhuapl.edu + affiliation: null + alternate_name: null + email: ryan.lau@jhuapl.edu + label: Ryan Lau + name: Ryan Lau + - '@type': + - prov:Person + - schema:Person + _id: mailto:jhusystems@gmail.com + affiliation: null + alternate_name: null + email: jhusystems@gmail.com + label: CSSEGISandData + name: CSSEGISandData + - '@type': + - prov:Person + - schema:Person + _id: mailto:60674295+CSSEGISandData@users.noreply.github.com + affiliation: null + alternate_name: null + email: 60674295+CSSEGISandData@users.noreply.github.com + label: CSSEGISandData + name: CSSEGISandData + name: time_series_19-covid-Deaths.csv + path: data/covid-19_jhu-csse/time_series_19-covid-Deaths.csv + url: https://github.com/CSSEGISandData/COVID-19.git +- '@type': + - prov:Entity + - schema:DigitalDocument + - wfprov:Artifact + _id: https://dev.renku.ch/blob/effbb6108613e4bf9730f7b818ac96f10b259274/data/covid-19_jhu-csse/time_series_19-covid-Recovered.csv + _label: data/covid-19_jhu-csse/time_series_19-covid-Recovered.csv@effbb6108613e4bf9730f7b818ac96f10b259274 + _project: + '@type': + - prov:Location + - schema:Project + _id: https://dev.renku.ch/projects/cramakri/covid-19-dashboard + created: '2020-03-11T21:43:12.736000+00:00' + creator: + '@type': + - prov:Person + - schema:Person + _id: mailto:cramakri@ethz.ch + affiliation: null + alternate_name: null + email: cramakri@ethz.ch + label: Chandrasekhar Ramakrishnan + name: Chandrasekhar Ramakrishnan + name: covid-19-dashboard + updated: '2020-03-11T21:43:12.736000+00:00' + version: '1' + added: '2020-03-11T21:58:08.577597+00:00' + based_on: + '@type': + - prov:Entity + - schema:DigitalDocument + - wfprov:Artifact + _id: https://github.com/blob/e69d4ce27fd320b1b8aaadab7a74717adb755f45/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv + _label: csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv@e69d4ce27fd320b1b8aaadab7a74717adb755f45 + _project: null + added: '2020-03-11T21:58:08.338883+00:00' + based_on: null + creator: [] + name: time_series_19-covid-Recovered.csv + path: csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv + url: https://github.com/CSSEGISandData/COVID-19.git + creator: + - '@type': + - prov:Person + - schema:Person + _id: mailto:ryan.lau@jhuapl.edu + affiliation: null + alternate_name: null + email: ryan.lau@jhuapl.edu + label: Ryan Lau + name: Ryan Lau + - '@type': + - prov:Person + - schema:Person + _id: mailto:jhusystems@gmail.com + affiliation: null + alternate_name: null + email: jhusystems@gmail.com + label: CSSEGISandData + name: CSSEGISandData + - '@type': + - prov:Person + - schema:Person + _id: mailto:60674295+CSSEGISandData@users.noreply.github.com + affiliation: null + alternate_name: null + email: 60674295+CSSEGISandData@users.noreply.github.com + label: CSSEGISandData + name: CSSEGISandData + name: time_series_19-covid-Recovered.csv + path: data/covid-19_jhu-csse/time_series_19-covid-Recovered.csv + url: https://github.com/CSSEGISandData/COVID-19.git +identifier: f6726a5b-f973-45d5-b873-30fa0dff772f +in_language: null +keywords: [] +license: null +name: covid-19_jhu-csse +path: .renku/datasets/f6726a5b-f973-45d5-b873-30fa0dff772f +same_as: null +short_name: covid-19_jhu-csse +tags: [] +url: https://dev.renku.ch/datasets/f6726a5b-f973-45d5-b873-30fa0dff772f +version: null diff --git a/.renku/refs/datasets/covid-19-rates b/.renku/refs/datasets/covid-19-rates new file mode 120000 index 0000000000000000000000000000000000000000..b74ac67bb0bc3e64fb31976109c688187493a3f6 --- /dev/null +++ b/.renku/refs/datasets/covid-19-rates @@ -0,0 +1 @@ +../../datasets/e9212b58-4994-482b-801c-ed90a960bd04/metadata.yml \ No newline at end of file diff --git a/.renku/refs/datasets/covid-19_jhu-csse b/.renku/refs/datasets/covid-19_jhu-csse new file mode 120000 index 0000000000000000000000000000000000000000..054b0e0f613e712e8266e6d8600347867a74aa62 --- /dev/null +++ b/.renku/refs/datasets/covid-19_jhu-csse @@ -0,0 +1 @@ +../../datasets/f6726a5b-f973-45d5-b873-30fa0dff772f/metadata.yml \ No newline at end of file diff --git a/.renku/refs/datasets/worldbank b/.renku/refs/datasets/worldbank new file mode 120000 index 0000000000000000000000000000000000000000..ed44611538f29b4e780c9a409455041420c5c50f --- /dev/null +++ b/.renku/refs/datasets/worldbank @@ -0,0 +1 @@ +../../datasets/720e46f5-0f38-48be-86ef-1fcbae258a6f/metadata.yml \ No newline at end of file diff --git a/.renku/renku.ini b/.renku/renku.ini index 5a3d87dcf4758670ef89657de1ca745dfdaf9253..fffe2f42884b0c252d2cba3520a9a10d848ad1c9 100644 --- a/.renku/renku.ini +++ b/.renku/renku.ini @@ -1,2 +1,4 @@ [renku "interactive"] default_url = /lab +lfs_auto_fetch = true + diff --git a/.renku/workflow/a38f8d703e0c4c55a2e3f49bbf15466e_papermill.cwl b/.renku/workflow/a38f8d703e0c4c55a2e3f49bbf15466e_papermill.cwl new file mode 100644 index 0000000000000000000000000000000000000000..ee8c3ca5ce03d4f3c0ec3a8f6aefcba58f7aa740 --- /dev/null +++ b/.renku/workflow/a38f8d703e0c4c55a2e3f49bbf15466e_papermill.cwl @@ -0,0 +1,115 @@ +arguments: [] +baseCommand: +- papermill +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: ts_folder + inputBinding: + position: 1 + prefix: -p + separate: true + shellQuote: true + streamable: false + type: string + input_2: + default: + class: Directory + listing: [] + path: ../../data/covid-19_jhu-csse + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: Directory + input_3: + default: wb_path + inputBinding: + position: 3 + prefix: -p + separate: true + shellQuote: true + streamable: false + type: string + input_4: + default: + class: File + path: ../../data/worldbank/SP.POP.TOTL.zip + inputBinding: + position: 4 + separate: true + shellQuote: true + streamable: false + type: File + input_5: + default: out_folder + inputBinding: + position: 5 + prefix: -p + separate: true + shellQuote: true + streamable: false + type: string + input_6: + default: data/covid-19_rates + inputBinding: + position: 6 + separate: true + shellQuote: true + streamable: false + type: string + input_7: + default: + class: File + path: ../../notebooks/ToRates.ipynb + inputBinding: + position: 7 + prefix: --inject-paths + separate: true + shellQuote: true + streamable: false + type: File + input_8: + default: runs/ToRates.run.ipynb + inputBinding: + position: 8 + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_8) + streamable: false + type: File + output_1: + outputBinding: + glob: $(inputs.input_6) + streamable: false + type: Directory +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: runs + writable: true + - entry: '$({"listing": [], "class": "Directory"})' + entryname: data/covid-19_rates + writable: true + - entry: $(inputs.input_2) + entryname: data/covid-19_jhu-csse + writable: false + - entry: $(inputs.input_4) + entryname: data/worldbank/SP.POP.TOTL.zip + writable: false + - entry: $(inputs.input_7) + entryname: notebooks/ToRates.ipynb + writable: false +successCodes: [] +temporaryFailCodes: [] diff --git a/.renku/workflow/b1c0a33b463d406a820ff0a61fb3a526_papermill.cwl b/.renku/workflow/b1c0a33b463d406a820ff0a61fb3a526_papermill.cwl new file mode 100644 index 0000000000000000000000000000000000000000..335e476c4a827ba842a68d5ded0cab96ecc5ad9e --- /dev/null +++ b/.renku/workflow/b1c0a33b463d406a820ff0a61fb3a526_papermill.cwl @@ -0,0 +1,91 @@ +arguments: [] +baseCommand: +- papermill +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: ts_folder + inputBinding: + position: 1 + prefix: -p + separate: true + shellQuote: true + streamable: false + type: string + input_2: + default: + class: Directory + listing: [] + path: ../../data/covid-19_jhu-csse + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: Directory + input_3: + default: rates_folder + inputBinding: + position: 3 + prefix: -p + separate: true + shellQuote: true + streamable: false + type: string + input_4: + default: + class: Directory + listing: [] + path: ../../data/covid-19_rates + inputBinding: + position: 4 + separate: true + shellQuote: true + streamable: false + type: Directory + input_5: + default: + class: File + path: ../../notebooks/Dashboard.ipynb + inputBinding: + position: 5 + prefix: --inject-paths + separate: true + shellQuote: true + streamable: false + type: File + input_6: + default: runs/Dashboard.run.ipynb + inputBinding: + position: 6 + separate: true + shellQuote: true + streamable: false + type: string +outputs: + output_0: + outputBinding: + glob: $(inputs.input_6) + streamable: false + type: File +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: runs + writable: true + - entry: $(inputs.input_2) + entryname: data/covid-19_jhu-csse + writable: false + - entry: $(inputs.input_4) + entryname: data/covid-19_rates + writable: false + - entry: $(inputs.input_5) + entryname: notebooks/Dashboard.ipynb + writable: false +successCodes: [] +temporaryFailCodes: [] diff --git a/README.md b/README.md index a135e86b0a990be4804fecf64c775187f201c0eb..6162ee1b495f28c56215a42efa34ebb51a26d6c5 100644 --- a/README.md +++ b/README.md @@ -1,36 +1,15 @@ # covid-19-dashboard -This is a Renku project - basically a git repository with some -bells and whistles. You'll find we have already created some -useful things like `data` and `notebooks` directories and -a `Dockerfile`. +A Renku project for exploring Covid-19 data sources. -## Working with the project +# Data Sources -The simplest way to start your project is right from the Renku -platform - just click on the `Environments` tab and start a new session. -This will start an interactive environment right in your browser. +## Covid-19 -To work with the project anywhere outside the Renku platform, -click the `Settings` tab where you will find the -git repo URLs - use `git` to clone the project on whichever machine you want. +- https://github.com/CSSEGISandData/COVID-19 +- https://github.com/COVID19Tracking/covid-19-crawler +- https://github.com/openZH/covid_19 -### Changing interactive environment dependencies +## General -Initially we install a very minimal set of packages to keep the images small. -However, you can add python and conda packages in `requirements.txt` and -`environment.yml` to your heart's content. If you need more fine-grained -control over your environment, please see [the documentation](https://renku.readthedocs.io/en/latest/user/advanced_interfaces.html#dockerfile-modifications). - -## Project configuration - -Project options can be found in `.renku/renku.ini`. In this -project there is currently only one option, which specifies -the default type of environment to open, in this case `/lab` for -JupyterLab. You may also choose `/tree` to get to the "classic" Jupyter -interface. - -## Moving forward - -Once you feel at home with your project, we recommend that you replace -this README file with your own project documentation! Happy data wrangling! +- https://data.worldbank.org/indicator/SP.POP.TOTL diff --git a/data/covid-19_jhu-csse/time_series_19-covid-Confirmed.csv b/data/covid-19_jhu-csse/time_series_19-covid-Confirmed.csv new file mode 100644 index 0000000000000000000000000000000000000000..40bf63b8c748900b9b4b02affb4c569d80807754 --- /dev/null +++ b/data/covid-19_jhu-csse/time_series_19-covid-Confirmed.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2c16b5036fc6eab53b7e85db11dcfbc9f0eb187092219feb8c8767af7b4113a +size 50017 diff --git a/data/covid-19_jhu-csse/time_series_19-covid-Deaths.csv b/data/covid-19_jhu-csse/time_series_19-covid-Deaths.csv new file mode 100644 index 0000000000000000000000000000000000000000..10fd330af2955add95e1a994e69af63791c56a94 --- /dev/null +++ b/data/covid-19_jhu-csse/time_series_19-covid-Deaths.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:292b7833b07ced4c7388eac4a6783299ad90b3d3261a3366757b1366b92e7489 +size 46566 diff --git a/data/covid-19_jhu-csse/time_series_19-covid-Recovered.csv b/data/covid-19_jhu-csse/time_series_19-covid-Recovered.csv new file mode 100644 index 0000000000000000000000000000000000000000..cf8446fa038c9230edef9cdaf82d63b4649ed8fe --- /dev/null +++ b/data/covid-19_jhu-csse/time_series_19-covid-Recovered.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81dfbd00775fd18e47153f39c001ce3b3d9c2f2ab3f722a54391dc83b696f0b3 +size 48321 diff --git a/data/covid-19_rates/ts_rates_19-covid-confirmed.csv b/data/covid-19_rates/ts_rates_19-covid-confirmed.csv new file mode 100644 index 0000000000000000000000000000000000000000..68469b73899f1a13e5dfc305c01e517f2372300f --- /dev/null +++ b/data/covid-19_rates/ts_rates_19-covid-confirmed.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:953941d2e4414e15f0e840d13c806b56d0cfda23f402d2d8daf840ef9e6ba666 +size 55521 diff --git a/data/covid-19_rates/ts_rates_19-covid-deaths.csv b/data/covid-19_rates/ts_rates_19-covid-deaths.csv new file mode 100644 index 0000000000000000000000000000000000000000..927fd93f7b7d3b87e3dc685bc8e283c4d007d41d --- /dev/null +++ b/data/covid-19_rates/ts_rates_19-covid-deaths.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d07625575b61e275be3ba1b85e04dc96984df348680d2c3f5866a4443144138 +size 28842 diff --git a/data/covid-19_rates/ts_rates_19-covid-recovered.csv b/data/covid-19_rates/ts_rates_19-covid-recovered.csv new file mode 100644 index 0000000000000000000000000000000000000000..c35fa7b034cd61892a7d26c0bb897288662cf439 --- /dev/null +++ b/data/covid-19_rates/ts_rates_19-covid-recovered.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae1d5842d547e04eec2706bb61a5f5c019f244150dbab899c675ad67f166524 +size 38957 diff --git a/data/worldbank/SP.POP.TOTL.zip b/data/worldbank/SP.POP.TOTL.zip new file mode 100644 index 0000000000000000000000000000000000000000..3fb3f388f06154e4b2db0637095cd43c233b031f --- /dev/null +++ b/data/worldbank/SP.POP.TOTL.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd4769dd8f6f38f1b8548ac4c8cebb933d866eaf496ff9cc4f48ed2c990e19a8 +size 78909 diff --git a/notebooks/Dashboard.ipynb b/notebooks/Dashboard.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..b7f64b2ba3651d24319f568fbe5b5810b04fd4ac --- /dev/null +++ b/notebooks/Dashboard.ipynb @@ -0,0 +1,257 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os\n", + "from IPython.display import display, HTML, Markdown" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "ts_folder = \"../data/covid-19_jhu-csse/\"\n", + "rates_folder = \"../data/covid-19_rates/\"\n", + "out_folder = None\n", + "PAPERMILL_OUTPUT_PATH = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Read in the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def read_jhu_covid_df(name):\n", + " filename = os.path.join(ts_folder, f\"time_series_19-covid-{name}.csv\")\n", + " df = pd.read_csv(filename)\n", + " df = df.set_index(['Province/State', 'Country/Region', 'Lat', 'Long'])\n", + " df.columns = pd.to_datetime(df.columns)\n", + " return df\n", + "\n", + "\n", + "jhu_frames_map = {\n", + " \"confirmed\": read_jhu_covid_df(\"Confirmed\"),\n", + " \"deaths\": read_jhu_covid_df(\"Deaths\"),\n", + " \"recovered\": read_jhu_covid_df(\"Recovered\")\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def read_rates_covid_df(name):\n", + " filename = os.path.join(rates_folder, f\"ts_rates_19-covid-{name}.csv\")\n", + " df = pd.read_csv(filename).drop(\"Unnamed: 0\", axis=1)\n", + " df = df.set_index(['Country/Region'])\n", + " df.columns = pd.to_datetime(df.columns)\n", + " return df\n", + "\n", + "\n", + "rates_frames_map = {\n", + " \"confirmed\": read_rates_covid_df(\"confirmed\"),\n", + " \"deaths\": read_rates_covid_df(\"deaths\"),\n", + " \"recovered\": read_rates_covid_df(\"recovered\")\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Compile data needed for the visualizations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Compute geospatial coordinates\n", + "country_coords_df = jhu_frames_map['confirmed'].reset_index([2,3])[['Lat', 'Long']]\n", + "country_coords_df = country_coords_df.groupby(level='Country/Region').mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Identify countries with 100 or more cases\n", + "case_count_ser = jhu_frames_map['confirmed'].iloc[:,-1].groupby(level='Country/Region').sum()\n", + "countries_over_thresh = case_count_ser[case_count_ser > 99].index" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Questions About COVID-19 and Its Spread\n", + "\n", + "These plots should be taken with a large grain of salt. I am not an epidemiologist, so the analyses shown here are completely naive. There are large discrepencies in the data from different countries for a variety of reasons (rates of testing, demographics, etc.) so that make direct comparisons inaccurate. Nonetheless, I think there is a lot of interesting information in this data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_ts = jhu_frames_map['confirmed'].iloc[:,-1].name.strftime(\"%b %d %Y\")\n", + "display(HTML(f\"<em>Data up to {data_ts}</em>\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## How are cases per 100,000 distributed geographically?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import altair as alt\n", + "from vega_datasets import data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Compile the basic df\n", + "map_df = pd.concat([\n", + " rates_frames_map['confirmed'].iloc[:,-1],\n", + " rates_frames_map['deaths'].iloc[:,-1],\n", + " rates_frames_map['recovered'].iloc[:,-1],\n", + " country_coords_df], axis=1)\n", + "# Restrict to countries with 100 or more cases\n", + "map_df = map_df.loc[countries_over_thresh].dropna()\n", + "map_df = map_df.reset_index()\n", + "map_df.columns = ['Country/Region', 'Confirmed', 'Deaths', 'Recovered', 'Lat', 'Long']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def map_of_variable(map_df, variable):\n", + " # Data generators for the background\n", + " sphere = alt.sphere()\n", + " graticule = alt.graticule()\n", + "\n", + " # Source of land data\n", + " source = alt.topo_feature(data.world_110m.url, 'countries')\n", + "\n", + " # Layering and configuring the components\n", + " p = alt.layer(\n", + " alt.Chart(sphere).mark_geoshape(fill='#cae6ef'),\n", + " alt.Chart(graticule).mark_geoshape(stroke='white', strokeWidth=0.5),\n", + " alt.Chart(source).mark_geoshape(fill='#dddddd', stroke='#aaaaaa'),\n", + " alt.Chart(map_df).mark_circle(opacity=0.6).encode(\n", + " longitude='Long:Q',\n", + " latitude='Lat:Q',\n", + " size=alt.Size(f'{variable}:Q', title=\"Cases\"),\n", + " color=alt.value('steelblue'),\n", + " tooltip=[\"Country/Region:N\", \"Confirmed:Q\", \"Deaths:Q\", \"Recovered:Q\"]\n", + " )\n", + " ).project(\n", + " 'naturalEarth1'\n", + " ).properties(width=600, height=400, title=f\"{variable} cases per 100,000\"\n", + " ).configure_view(stroke=None)\n", + " return p" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(map_of_variable(map_df, 'Confirmed'))\n", + "display(HTML('''\n", + "<p style=\"font-size: smaller\">Data Source: \n", + " <a href=\"https://github.com/CSSEGISandData/COVID-19\">JHU CSSE</a> and\n", + " <a href=\"https://data.worldbank.org/indicator/SP.POP.TOTL\">World Bank</a>\n", + "</p>'''))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bars = alt.Chart(map_df).mark_bar().encode(\n", + " x='Confirmed:Q',\n", + " y=alt.Y(\"Country/Region:N\", sort='-x')\n", + ")\n", + "\n", + "text = bars.mark_text(\n", + " align='left',\n", + " baseline='middle',\n", + " dx=3 # Nudges text to right so it doesn't appear on top of the bar\n", + ").encode(\n", + " text=alt.Text('Confirmed:Q', format=\".3\")\n", + ")\n", + "\n", + "(bars + text).properties(height=900)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/Preprocessing-Play.ipynb b/notebooks/Preprocessing-Play.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..71d7c4c09ee6ccfa63f74dde68db7b2a5bae675c --- /dev/null +++ b/notebooks/Preprocessing-Play.ipynb @@ -0,0 +1,804 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Read in JHU CSSE data\n", + "\n", + "I will switch to [xarray](http://xarray.pydata.org/en/stable/), but ATM, it's easier like this..." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def read_jhu_covid_df(name):\n", + " filename = f\"../data/covid-19_jhu-csse/time_series_19-covid-{name}.csv\"\n", + " df = pd.read_csv(filename)\n", + " df = df.set_index(['Province/State', 'Country/Region', 'Lat', 'Long'])\n", + " df.columns = pd.to_datetime(df.columns)\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "frames_map = {\n", + " \"confirmed\": read_jhu_covid_df(\"Confirmed\"),\n", + " \"deaths\": read_jhu_covid_df(\"Deaths\"),\n", + " \"recovered\": read_jhu_covid_df(\"Recovered\")\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def current_region_totals_df(frames_map):\n", + " sers = [df.groupby(level='Country/Region').sum().iloc[:,-1].sort_values(ascending=False)\n", + " for name, df in frames_map.items()]\n", + " for name, ser in zip(frames_map, sers):\n", + " ser.name = name\n", + " return pd.concat(sers, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>confirmed</th>\n", + " <th>deaths</th>\n", + " <th>recovered</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Mainland China</th>\n", + " <td>80757</td>\n", + " <td>3136</td>\n", + " <td>60106</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Italy</th>\n", + " <td>10149</td>\n", + " <td>631</td>\n", + " <td>724</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Iran (Islamic Republic of)</th>\n", + " <td>8042</td>\n", + " <td>291</td>\n", + " <td>2731</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Republic of Korea</th>\n", + " <td>7513</td>\n", + " <td>54</td>\n", + " <td>247</td>\n", + " </tr>\n", + " <tr>\n", + " <th>France</th>\n", + " <td>1784</td>\n", + " <td>33</td>\n", + " <td>12</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Spain</th>\n", + " <td>1695</td>\n", + " <td>35</td>\n", + " <td>32</td>\n", + " </tr>\n", + " <tr>\n", + " <th>US</th>\n", + " <td>1670</td>\n", + " <td>56</td>\n", + " <td>15</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Germany</th>\n", + " <td>1457</td>\n", + " <td>2</td>\n", + " <td>18</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Others</th>\n", + " <td>696</td>\n", + " <td>6</td>\n", + " <td>40</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Japan</th>\n", + " <td>581</td>\n", + " <td>10</td>\n", + " <td>101</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Switzerland</th>\n", + " <td>491</td>\n", + " <td>3</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Norway</th>\n", + " <td>400</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>UK</th>\n", + " <td>382</td>\n", + " <td>6</td>\n", + " <td>18</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Netherlands</th>\n", + " <td>382</td>\n", + " <td>4</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Sweden</th>\n", + " <td>355</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Belgium</th>\n", + " <td>267</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Denmark</th>\n", + " <td>262</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Austria</th>\n", + " <td>182</td>\n", + " <td>0</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Singapore</th>\n", + " <td>160</td>\n", + " <td>0</td>\n", + " <td>78</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Malaysia</th>\n", + " <td>129</td>\n", + " <td>0</td>\n", + " <td>24</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Hong Kong SAR</th>\n", + " <td>120</td>\n", + " <td>3</td>\n", + " <td>65</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Bahrain</th>\n", + " <td>110</td>\n", + " <td>0</td>\n", + " <td>22</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Australia</th>\n", + " <td>107</td>\n", + " <td>3</td>\n", + " <td>21</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " confirmed deaths recovered\n", + "Mainland China 80757 3136 60106\n", + "Italy 10149 631 724\n", + "Iran (Islamic Republic of) 8042 291 2731\n", + "Republic of Korea 7513 54 247\n", + "France 1784 33 12\n", + "Spain 1695 35 32\n", + "US 1670 56 15\n", + "Germany 1457 2 18\n", + "Others 696 6 40\n", + "Japan 581 10 101\n", + "Switzerland 491 3 3\n", + "Norway 400 0 1\n", + "UK 382 6 18\n", + "Netherlands 382 4 0\n", + "Sweden 355 0 1\n", + "Belgium 267 0 1\n", + "Denmark 262 0 1\n", + "Austria 182 0 4\n", + "Singapore 160 0 78\n", + "Malaysia 129 0 24\n", + "Hong Kong SAR 120 3 65\n", + "Bahrain 110 0 22\n", + "Australia 107 3 21" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "current_totals_df = current_region_totals_df(frames_map)\n", + "current_totals_df[current_totals_df['confirmed'] > 100]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Read in World Bank data" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import zipfile\n", + "zf = zipfile.ZipFile(\"../data/worldbank/SP.POP.TOTL.zip\")\n", + "pop_df = pd.read_csv(zf.open(\"API_SP.POP.TOTL_DS2_en_csv_v2_821007.csv\"), skiprows=4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There is 2018 pop data for all countries/regions except Eritrea" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Country Name</th>\n", + " <th>Country Code</th>\n", + " <th>Indicator Name</th>\n", + " <th>Indicator Code</th>\n", + " <th>1960</th>\n", + " <th>1961</th>\n", + " <th>1962</th>\n", + " <th>1963</th>\n", + " <th>1964</th>\n", + " <th>1965</th>\n", + " <th>...</th>\n", + " <th>2011</th>\n", + " <th>2012</th>\n", + " <th>2013</th>\n", + " <th>2014</th>\n", + " <th>2015</th>\n", + " <th>2016</th>\n", + " <th>2017</th>\n", + " <th>2018</th>\n", + " <th>2019</th>\n", + " <th>Unnamed: 64</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>67</th>\n", + " <td>Eritrea</td>\n", + " <td>ERI</td>\n", + " <td>Population, total</td>\n", + " <td>SP.POP.TOTL</td>\n", + " <td>1007590.0</td>\n", + " <td>1033328.0</td>\n", + " <td>1060486.0</td>\n", + " <td>1088854.0</td>\n", + " <td>1118159.0</td>\n", + " <td>1148189.0</td>\n", + " <td>...</td>\n", + " <td>3213972.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>108</th>\n", + " <td>Not classified</td>\n", + " <td>INX</td>\n", + " <td>Population, total</td>\n", + " <td>SP.POP.TOTL</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>2 rows × 65 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Country Name Country Code Indicator Name Indicator Code 1960 \\\n", + "67 Eritrea ERI Population, total SP.POP.TOTL 1007590.0 \n", + "108 Not classified INX Population, total SP.POP.TOTL NaN \n", + "\n", + " 1961 1962 1963 1964 1965 ... 2011 \\\n", + "67 1033328.0 1060486.0 1088854.0 1118159.0 1148189.0 ... 3213972.0 \n", + "108 NaN NaN NaN NaN NaN ... NaN \n", + "\n", + " 2012 2013 2014 2015 2016 2017 2018 2019 Unnamed: 64 \n", + "67 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "108 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "\n", + "[2 rows x 65 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop_df[pd.isna(pop_df['2018'])]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fix the country/region names that differ between the World Bank population data and the JHU CSSE data." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "region_wb_jhu_map = {\n", + " 'China': 'Mainland China',\n", + " 'Iran, Islamic Rep.': 'Iran (Islamic Republic of)',\n", + " 'Korea, Rep.': 'Republic of Korea',\n", + " 'United States': 'US',\n", + " 'United Kingdom': 'UK',\n", + " 'Hong Kong SAR, China': 'Hong Kong SAR',\n", + " 'Egypt, Arab Rep.': 'Egypt',\n", + " 'Vietnam': 'Viet Nam',\n", + " 'Macao SAR, China': 'Macao SAR',\n", + " 'Slovak Republic': 'Slovakia',\n", + " 'Moldova': 'Republic of Moldova',\n", + " 'St. Martin (French part)': 'Saint Martin',\n", + " 'Brunei Darussalam': 'Brunei'\n", + "}\n", + "current_pop_ser = pop_df[['Country Name', '2018']].copy().replace(region_wb_jhu_map).set_index('Country Name')['2018']\n", + "data_pop_ser = current_pop_ser[current_pop_ser.index.isin(current_totals_df.index)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are some regions that we cannot resolve, but we will just ignore these." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>confirmed</th>\n", + " <th>deaths</th>\n", + " <th>recovered</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Others</th>\n", + " <td>696</td>\n", + " <td>6</td>\n", + " <td>40</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Taipei and environs</th>\n", + " <td>47</td>\n", + " <td>1</td>\n", + " <td>17</td>\n", + " </tr>\n", + " <tr>\n", + " <th>occupied Palestinian territory</th>\n", + " <td>25</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>French Guiana</th>\n", + " <td>5</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Martinique</th>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Holy See</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Saint Barthelemy</th>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " confirmed deaths recovered\n", + "Others 696 6 40\n", + "Taipei and environs 47 1 17\n", + "occupied Palestinian territory 25 0 0\n", + "French Guiana 5 0 0\n", + "Martinique 2 0 0\n", + "Holy See 1 0 0\n", + "Saint Barthelemy 1 0 0" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "current_totals_df[current_totals_df.index.isin(data_pop_ser.index) == False]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Compute rates per 100,000 for regions with more than 100 cases" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>confirmed</th>\n", + " <th>deaths</th>\n", + " <th>recovered</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Italy</th>\n", + " <td>16.794282</td>\n", + " <td>1.044161</td>\n", + " <td>1.198055</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Republic of Korea</th>\n", + " <td>14.550136</td>\n", + " <td>0.104580</td>\n", + " <td>0.478355</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Iran (Islamic Republic of)</th>\n", + " <td>9.831264</td>\n", + " <td>0.355745</td>\n", + " <td>3.338620</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Norway</th>\n", + " <td>7.526810</td>\n", + " <td>0.000000</td>\n", + " <td>0.018817</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Bahrain</th>\n", + " <td>7.008874</td>\n", + " <td>0.000000</td>\n", + " <td>1.401775</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Mainland China</th>\n", + " <td>5.798468</td>\n", + " <td>0.225169</td>\n", + " <td>4.315697</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Switzerland</th>\n", + " <td>5.765250</td>\n", + " <td>0.035226</td>\n", + " <td>0.035226</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Denmark</th>\n", + " <td>4.519231</td>\n", + " <td>0.000000</td>\n", + " <td>0.017249</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Spain</th>\n", + " <td>3.627705</td>\n", + " <td>0.074908</td>\n", + " <td>0.068488</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Sweden</th>\n", + " <td>3.486143</td>\n", + " <td>0.000000</td>\n", + " <td>0.009820</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Singapore</th>\n", + " <td>2.837546</td>\n", + " <td>0.000000</td>\n", + " <td>1.383303</td>\n", + " </tr>\n", + " <tr>\n", + " <th>France</th>\n", + " <td>2.663194</td>\n", + " <td>0.049263</td>\n", + " <td>0.017914</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Belgium</th>\n", + " <td>2.337580</td>\n", + " <td>0.000000</td>\n", + " <td>0.008755</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Netherlands</th>\n", + " <td>2.216932</td>\n", + " <td>0.023214</td>\n", + " <td>0.000000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Austria</th>\n", + " <td>2.057186</td>\n", + " <td>0.000000</td>\n", + " <td>0.045213</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Germany</th>\n", + " <td>1.756947</td>\n", + " <td>0.002412</td>\n", + " <td>0.021706</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Hong Kong SAR</th>\n", + " <td>1.610522</td>\n", + " <td>0.040263</td>\n", + " <td>0.872366</td>\n", + " </tr>\n", + " <tr>\n", + " <th>UK</th>\n", + " <td>0.574531</td>\n", + " <td>0.009024</td>\n", + " <td>0.027072</td>\n", + " </tr>\n", + " <tr>\n", + " <th>US</th>\n", + " <td>0.510442</td>\n", + " <td>0.017117</td>\n", + " <td>0.004585</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Japan</th>\n", + " <td>0.459183</td>\n", + " <td>0.007903</td>\n", + " <td>0.079824</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Australia</th>\n", + " <td>0.428131</td>\n", + " <td>0.012004</td>\n", + " <td>0.084026</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Malaysia</th>\n", + " <td>0.409153</td>\n", + " <td>0.000000</td>\n", + " <td>0.076121</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " confirmed deaths recovered\n", + "Italy 16.794282 1.044161 1.198055\n", + "Republic of Korea 14.550136 0.104580 0.478355\n", + "Iran (Islamic Republic of) 9.831264 0.355745 3.338620\n", + "Norway 7.526810 0.000000 0.018817\n", + "Bahrain 7.008874 0.000000 1.401775\n", + "Mainland China 5.798468 0.225169 4.315697\n", + "Switzerland 5.765250 0.035226 0.035226\n", + "Denmark 4.519231 0.000000 0.017249\n", + "Spain 3.627705 0.074908 0.068488\n", + "Sweden 3.486143 0.000000 0.009820\n", + "Singapore 2.837546 0.000000 1.383303\n", + "France 2.663194 0.049263 0.017914\n", + "Belgium 2.337580 0.000000 0.008755\n", + "Netherlands 2.216932 0.023214 0.000000\n", + "Austria 2.057186 0.000000 0.045213\n", + "Germany 1.756947 0.002412 0.021706\n", + "Hong Kong SAR 1.610522 0.040263 0.872366\n", + "UK 0.574531 0.009024 0.027072\n", + "US 0.510442 0.017117 0.004585\n", + "Japan 0.459183 0.007903 0.079824\n", + "Australia 0.428131 0.012004 0.084026\n", + "Malaysia 0.409153 0.000000 0.076121" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "current_per_100000_df = current_totals_df[current_totals_df['confirmed'] > 100]\n", + "current_per_100000_df = current_per_100000_df.div(data_pop_ser, 'index').mul(100000).dropna()\n", + "current_per_100000_df.sort_values('confirmed', ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/ToRates.ipynb b/notebooks/ToRates.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..6a200ae19ce750b8e141b32f30a2dc919ecf3eb1 --- /dev/null +++ b/notebooks/ToRates.ipynb @@ -0,0 +1,220 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Convert Series to Rates per 100,000" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "ts_folder = \"../data/covid-19_jhu-csse/\"\n", + "wb_path = \"../data/worldbank/SP.POP.TOTL.zip\"\n", + "out_folder = None\n", + "PAPERMILL_OUTPUT_PATH = None" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [ + "parameters" + ] + }, + "source": [ + "## Read in JHU CSSE data\n", + "\n", + "I will switch to [xarray](http://xarray.pydata.org/en/stable/), but ATM, it's easier like this..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def read_jhu_covid_region_df(name):\n", + " filename = os.path.join(ts_folder, f\"time_series_19-covid-{name}.csv\")\n", + " df = pd.read_csv(filename)\n", + " df = df.set_index(['Country/Region', 'Province/State', 'Lat', 'Long'])\n", + " df.columns = pd.to_datetime(df.columns)\n", + " region_df = df.groupby(level='Country/Region').sum()\n", + " loc_df = df.reset_index([2,3]).groupby(level='Country/Region').mean()[['Long', 'Lat']]\n", + " return region_df.join(loc_df).set_index(['Long', 'Lat'], append=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "frames_map = {\n", + " \"confirmed\": read_jhu_covid_region_df(\"Confirmed\"),\n", + " \"deaths\": read_jhu_covid_region_df(\"Deaths\"),\n", + " \"recovered\": read_jhu_covid_region_df(\"Recovered\")\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "frames_map['confirmed'].sort_values(frames_map['confirmed'].columns[-1], ascending=False).head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Read in World Bank data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import zipfile\n", + "zf = zipfile.ZipFile(wb_path)\n", + "pop_df = pd.read_csv(zf.open(\"API_SP.POP.TOTL_DS2_en_csv_v2_821007.csv\"), skiprows=4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There is 2018 pop data for all countries/regions except Eritrea" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pop_df[pd.isna(pop_df['2018'])]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fix the country/region names that differ between the World Bank population data and the JHU CSSE data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "region_wb_jhu_map = {\n", + " 'China': 'Mainland China',\n", + " 'Iran, Islamic Rep.': 'Iran (Islamic Republic of)',\n", + " 'Korea, Rep.': 'Republic of Korea',\n", + " 'United States': 'US',\n", + " 'United Kingdom': 'UK',\n", + " 'Hong Kong SAR, China': 'Hong Kong SAR',\n", + " 'Egypt, Arab Rep.': 'Egypt',\n", + " 'Vietnam': 'Viet Nam',\n", + " 'Macao SAR, China': 'Macao SAR',\n", + " 'Slovak Republic': 'Slovakia',\n", + " 'Moldova': 'Republic of Moldova',\n", + " 'St. Martin (French part)': 'Saint Martin',\n", + " 'Brunei Darussalam': 'Brunei'\n", + "}\n", + "current_pop_ser = pop_df[['Country Name', '2018']].copy().replace(region_wb_jhu_map).set_index('Country Name')['2018']\n", + "data_pop_ser = current_pop_ser[current_pop_ser.index.isin(frames_map['confirmed'].index.levels[0])]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are some regions that we cannot resolve, but we will just ignore these." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Compute rates per 100,000 for regions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def cases_to_rates_df(df):\n", + " per_100000_df = df.reset_index([1, 2], drop=True)\n", + " per_100000_df = per_100000_df.div(data_pop_ser, 'index').mul(100000).dropna()\n", + " per_100000_df.index.name = 'Country/Region'\n", + " return per_100000_df\n", + " \n", + "def frames_to_rates(frames_map):\n", + " return {k: cases_to_rates_df(v) for k,v in frames_map.items()}\n", + "\n", + "\n", + "rates_map = frames_to_rates(frames_map)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if PAPERMILL_OUTPUT_PATH:\n", + " for k, v in rates_map.items():\n", + " out_path = os.path.join(out_folder, f\"ts_rates_19-covid-{k}.csv\")\n", + " v.reset_index().to_csv(out_path)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/requirements.txt b/requirements.txt index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..bf54e26b92023e8cb36b44ad827574dc9703aa79 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1,98 @@ +alembic==1.4.0 +altair==4.0.1 +ansiwrap==0.8.4 +argcomplete==1.11.1 +async-generator==1.10 +attrs==19.3.0 +backcall==0.1.0 +bleach==3.1.1 +blinker==1.4 +certifi==2019.11.28 +certipy==0.1.3 +cffi==1.13.2 +chardet==3.0.4 +Click==7.0 +colorama==0.4.3 +conda==4.7.12 +conda-package-handling==1.6.0 +cryptography==2.8 +decorator==4.4.1 +defusedxml==0.6.0 +distro==1.4.0 +entrypoints==0.3 +future==0.18.2 +gitdb==4.0.2 +GitPython==3.1.0 +idna==2.9 +importlib-metadata==1.5.0 +ipykernel==5.1.4 +ipython==7.12.0 +ipython-genutils==0.2.0 +jedi==0.16.0 +Jinja2==2.11.1 +json5==0.9.0 +jsonschema==3.2.0 +jupyter-client==6.0.0 +jupyter-core==4.6.3 +jupyter-telemetry==0.0.5 +jupyterhub==0.9.6 +jupyterlab==1.2.5 +jupyterlab-git==0.9.0 +jupyterlab-server==1.0.6 +Mako==1.1.0 +MarkupSafe==1.1.1 +mistune==0.8.4 +nbconvert==5.6.1 +nbdime==1.1.0 +nbformat==5.0.4 +notebook==6.0.3 +numpy==1.18.1 +oauthlib==3.0.1 +pamela==1.0.0 +pandas==1.0.1 +pandocfilters==1.4.2 +papermill==1.1.0 +parso==0.6.1 +pexpect==4.8.0 +pickleshare==0.7.5 +pipx==0.15.1.3 +powerline-shell==0.7.0 +prometheus-client==0.7.1 +prompt-toolkit==3.0.3 +ptyprocess==0.6.0 +pycosat==0.6.3 +pycparser==2.19 +pycurl==7.43.0.5 +Pygments==2.5.2 +PyJWT==1.7.1 +pyOpenSSL==19.1.0 +pyrsistent==0.15.7 +PySocks==1.7.1 +python-dateutil==2.8.1 +python-editor==1.0.4 +python-json-logger==0.1.11 +python-oauth2==1.1.1 +pytz==2019.3 +PyYAML==5.3 +pyzmq==19.0.0 +requests==2.23.0 +ruamel-yaml==0.15.80 +ruamel.yaml.clib==0.2.0 +Send2Trash==1.5.0 +six==1.14.0 +smmap==3.0.1 +SQLAlchemy==1.3.13 +tenacity==6.1.0 +terminado==0.8.3 +testpath==0.4.4 +textwrap3==0.9.2 +toolz==0.10.0 +tornado==6.0.3 +tqdm==4.43.0 +traitlets==4.3.3 +urllib3==1.25.7 +userpath==1.3.0 +vega-datasets==0.8.0 +wcwidth==0.1.8 +webencodings==0.5.1 +zipp==3.0.0 diff --git a/runs/Dashboard.run.ipynb b/runs/Dashboard.run.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..8ee68b5da01772bb9e712ece6093a1cbf2df62c1 --- /dev/null +++ b/runs/Dashboard.run.ipynb @@ -0,0 +1,587 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "papermill": { + "duration": 0.35409, + "end_time": "2020-03-13T17:04:15.061697", + "exception": false, + "start_time": "2020-03-13T17:04:14.707607", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os\n", + "from IPython.display import display, HTML, Markdown" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "papermill": { + "duration": 0.02437, + "end_time": "2020-03-13T17:04:15.106586", + "exception": false, + "start_time": "2020-03-13T17:04:15.082216", + "status": "completed" + }, + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "ts_folder = \"../data/covid-19_jhu-csse/\"\n", + "rates_folder = \"../data/covid-19_rates/\"\n", + "out_folder = None\n", + "PAPERMILL_OUTPUT_PATH = None" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "papermill": { + "duration": 0.022646, + "end_time": "2020-03-13T17:04:15.137496", + "exception": false, + "start_time": "2020-03-13T17:04:15.114850", + "status": "completed" + }, + "tags": [ + "injected-parameters" + ] + }, + "outputs": [], + "source": [ + "# Parameters\n", + "PAPERMILL_INPUT_PATH = \"notebooks/Dashboard.ipynb\"\n", + "PAPERMILL_OUTPUT_PATH = \"runs/Dashboard.run.ipynb\"\n", + "ts_folder = \"./data/covid-19_jhu-csse/\"\n", + "rates_folder = \"./data/covid-19_rates/\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "papermill": { + "duration": 0.021072, + "end_time": "2020-03-13T17:04:15.168525", + "exception": false, + "start_time": "2020-03-13T17:04:15.147453", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Read in the data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "papermill": { + "duration": 0.065756, + "end_time": "2020-03-13T17:04:15.243131", + "exception": false, + "start_time": "2020-03-13T17:04:15.177375", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def read_jhu_covid_df(name):\n", + " filename = os.path.join(ts_folder, f\"time_series_19-covid-{name}.csv\")\n", + " df = pd.read_csv(filename)\n", + " df = df.set_index(['Province/State', 'Country/Region', 'Lat', 'Long'])\n", + " df.columns = pd.to_datetime(df.columns)\n", + " return df\n", + "\n", + "\n", + "jhu_frames_map = {\n", + " \"confirmed\": read_jhu_covid_df(\"Confirmed\"),\n", + " \"deaths\": read_jhu_covid_df(\"Deaths\"),\n", + " \"recovered\": read_jhu_covid_df(\"Recovered\")\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "papermill": { + "duration": 0.038039, + "end_time": "2020-03-13T17:04:15.289236", + "exception": false, + "start_time": "2020-03-13T17:04:15.251197", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def read_rates_covid_df(name):\n", + " filename = os.path.join(rates_folder, f\"ts_rates_19-covid-{name}.csv\")\n", + " df = pd.read_csv(filename).drop(\"Unnamed: 0\", axis=1)\n", + " df = df.set_index(['Country/Region'])\n", + " df.columns = pd.to_datetime(df.columns)\n", + " return df\n", + "\n", + "\n", + "rates_frames_map = {\n", + " \"confirmed\": read_rates_covid_df(\"confirmed\"),\n", + " \"deaths\": read_rates_covid_df(\"deaths\"),\n", + " \"recovered\": read_rates_covid_df(\"recovered\")\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "papermill": { + "duration": 0.017688, + "end_time": "2020-03-13T17:04:15.314301", + "exception": false, + "start_time": "2020-03-13T17:04:15.296613", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Compile data needed for the visualizations" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "papermill": { + "duration": 0.040604, + "end_time": "2020-03-13T17:04:15.374996", + "exception": false, + "start_time": "2020-03-13T17:04:15.334392", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Compute geospatial coordinates\n", + "country_coords_df = jhu_frames_map['confirmed'].reset_index([2,3])[['Lat', 'Long']]\n", + "country_coords_df = country_coords_df.groupby(level='Country/Region').mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "papermill": { + "duration": 0.034326, + "end_time": "2020-03-13T17:04:15.424734", + "exception": false, + "start_time": "2020-03-13T17:04:15.390408", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Identify countries with 100 or more cases\n", + "case_count_ser = jhu_frames_map['confirmed'].iloc[:,-1].groupby(level='Country/Region').sum()\n", + "countries_over_thresh = case_count_ser[case_count_ser > 99].index" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.009134, + "end_time": "2020-03-13T17:04:15.448316", + "exception": false, + "start_time": "2020-03-13T17:04:15.439182", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Questions About COVID-19 and Its Spread\n", + "\n", + "These plots should be taken with a large grain of salt. I am not an epidemiologist, so the analyses shown here are completely naive. There are large discrepencies in the data from different countries for a variety of reasons (rates of testing, demographics, etc.) so that make direct comparisons inaccurate. Nonetheless, I think there is a lot of interesting information in this data." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "papermill": { + "duration": 0.041449, + "end_time": "2020-03-13T17:04:15.498158", + "exception": false, + "start_time": "2020-03-13T17:04:15.456709", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "<em>Data up to Mar 10 2020</em>" + ], + "text/plain": [ + "<IPython.core.display.HTML object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "data_ts = jhu_frames_map['confirmed'].iloc[:,-1].name.strftime(\"%b %d %Y\")\n", + "display(HTML(f\"<em>Data up to {data_ts}</em>\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.009938, + "end_time": "2020-03-13T17:04:15.526404", + "exception": false, + "start_time": "2020-03-13T17:04:15.516466", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## How are cases per 100,000 distributed geographically?" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "papermill": { + "duration": 0.199352, + "end_time": "2020-03-13T17:04:15.733984", + "exception": false, + "start_time": "2020-03-13T17:04:15.534632", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import altair as alt\n", + "from vega_datasets import data" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "papermill": { + "duration": 0.045428, + "end_time": "2020-03-13T17:04:15.799713", + "exception": false, + "start_time": "2020-03-13T17:04:15.754285", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Compile the basic df\n", + "map_df = pd.concat([\n", + " rates_frames_map['confirmed'].iloc[:,-1],\n", + " rates_frames_map['deaths'].iloc[:,-1],\n", + " rates_frames_map['recovered'].iloc[:,-1],\n", + " country_coords_df], axis=1)\n", + "# Restrict to countries with 100 or more cases\n", + "map_df = map_df.loc[countries_over_thresh].dropna()\n", + "map_df = map_df.reset_index()\n", + "map_df.columns = ['Country/Region', 'Confirmed', 'Deaths', 'Recovered', 'Lat', 'Long']" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "papermill": { + "duration": 0.030132, + "end_time": "2020-03-13T17:04:15.844929", + "exception": false, + "start_time": "2020-03-13T17:04:15.814797", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def map_of_variable(map_df, variable):\n", + " # Data generators for the background\n", + " sphere = alt.sphere()\n", + " graticule = alt.graticule()\n", + "\n", + " # Source of land data\n", + " source = alt.topo_feature(data.world_110m.url, 'countries')\n", + "\n", + " # Layering and configuring the components\n", + " p = alt.layer(\n", + " alt.Chart(sphere).mark_geoshape(fill='#cae6ef'),\n", + " alt.Chart(graticule).mark_geoshape(stroke='white', strokeWidth=0.5),\n", + " alt.Chart(source).mark_geoshape(fill='#dddddd', stroke='#aaaaaa'),\n", + " alt.Chart(map_df).mark_circle(opacity=0.6).encode(\n", + " longitude='Long:Q',\n", + " latitude='Lat:Q',\n", + " size=alt.Size(f'{variable}:Q', title=\"Cases\"),\n", + " color=alt.value('steelblue'),\n", + " tooltip=[\"Country/Region:N\", \"Confirmed:Q\", \"Deaths:Q\", \"Recovered:Q\"]\n", + " )\n", + " ).project(\n", + " 'naturalEarth1'\n", + " ).properties(width=600, height=400, title=f\"{variable} cases per 100,000\"\n", + " ).configure_view(stroke=None)\n", + " return p" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "papermill": { + "duration": 0.100208, + "end_time": "2020-03-13T17:04:15.958509", + "exception": false, + "start_time": "2020-03-13T17:04:15.858301", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "<div id=\"altair-viz-b79eb79d0e8f4b50be1688903bb8f9f0\"></div>\n", + "<script type=\"text/javascript\">\n", + " (function(spec, embedOpt){\n", + " const outputDiv = document.getElementById(\"altair-viz-b79eb79d0e8f4b50be1688903bb8f9f0\");\n", + " const paths = {\n", + " \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n", + " \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n", + " \"vega-lite\": \"https://cdn.jsdelivr.net/npm//vega-lite@4.0.2?noext\",\n", + " \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n", + " };\n", + "\n", + " function loadScript(lib) {\n", + " return new Promise(function(resolve, reject) {\n", + " var s = document.createElement('script');\n", + " s.src = paths[lib];\n", + " s.async = true;\n", + " s.onload = () => resolve(paths[lib]);\n", + " s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n", + " document.getElementsByTagName(\"head\")[0].appendChild(s);\n", + " });\n", + " }\n", + "\n", + " function showError(err) {\n", + " outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n", + " throw err;\n", + " }\n", + "\n", + " function displayChart(vegaEmbed) {\n", + " vegaEmbed(outputDiv, spec, embedOpt)\n", + " .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n", + " }\n", + "\n", + " if(typeof define === \"function\" && define.amd) {\n", + " requirejs.config({paths});\n", + " require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n", + " } else if (typeof vegaEmbed === \"function\") {\n", + " displayChart(vegaEmbed);\n", + " } else {\n", + " loadScript(\"vega\")\n", + " .then(() => loadScript(\"vega-lite\"))\n", + " .then(() => loadScript(\"vega-embed\"))\n", + " .catch(showError)\n", + " .then(() => displayChart(vegaEmbed));\n", + " }\n", + " })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300, \"stroke\": null}}, \"layer\": [{\"data\": {\"sphere\": true}, \"mark\": {\"type\": \"geoshape\", \"fill\": \"#cae6ef\"}}, {\"data\": {\"graticule\": true}, \"mark\": {\"type\": \"geoshape\", \"stroke\": \"white\", \"strokeWidth\": 0.5}}, {\"data\": {\"url\": \"https://vega.github.io/vega-datasets/data/world-110m.json\", \"format\": {\"feature\": \"countries\", \"type\": \"topojson\"}}, \"mark\": {\"type\": \"geoshape\", \"fill\": \"#dddddd\", \"stroke\": \"#aaaaaa\"}}, {\"data\": {\"name\": \"data-433f706ddfa6221156e78c265aebad1f\"}, \"mark\": {\"type\": \"circle\", \"opacity\": 0.6}, \"encoding\": {\"color\": {\"value\": \"steelblue\"}, \"latitude\": {\"field\": \"Lat\", \"type\": \"quantitative\"}, \"longitude\": {\"field\": \"Long\", \"type\": \"quantitative\"}, \"size\": {\"type\": \"quantitative\", \"field\": \"Confirmed\", \"title\": \"Cases\"}, \"tooltip\": [{\"type\": \"nominal\", \"field\": \"Country/Region\"}, {\"type\": \"quantitative\", \"field\": \"Confirmed\"}, {\"type\": \"quantitative\", \"field\": \"Deaths\"}, {\"type\": \"quantitative\", \"field\": \"Recovered\"}]}}], \"height\": 400, \"projection\": {\"type\": \"naturalEarth1\"}, \"title\": \"Confirmed cases per 100,000\", \"width\": 600, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.0.2.json\", \"datasets\": {\"data-433f706ddfa6221156e78c265aebad1f\": [{\"Country/Region\": \"Australia\", \"Confirmed\": 0.4281306826095598, \"Deaths\": 0.012003663998398872, \"Recovered\": 0.0840256479887921, \"Lat\": -23.1315375, \"Long\": 140.0609875}, {\"Country/Region\": \"Austria\", \"Confirmed\": 2.057185925638154, \"Deaths\": 0.0, \"Recovered\": 0.04521287748655284, \"Lat\": 47.5162, \"Long\": 14.5501}, {\"Country/Region\": \"Bahrain\", \"Confirmed\": 7.00887387149166, \"Deaths\": 0.0, \"Recovered\": 1.4017747742983322, \"Lat\": 26.0275, \"Long\": 50.55}, {\"Country/Region\": \"Belgium\", \"Confirmed\": 2.3375802000128174, \"Deaths\": 0.0, \"Recovered\": 0.008754982022519914, \"Lat\": 50.8333, \"Long\": 4.0}, {\"Country/Region\": \"Denmark\", \"Confirmed\": 4.519231399481772, \"Deaths\": 0.0, \"Recovered\": 0.01724897480718234, \"Lat\": 56.2639, \"Long\": 9.5018}, {\"Country/Region\": \"France\", \"Confirmed\": 2.663193607427707, \"Deaths\": 0.04926311045129727, \"Recovered\": 0.017913858345926282, \"Lat\": 47.0, \"Long\": 2.0}, {\"Country/Region\": \"Germany\", \"Confirmed\": 1.7569474368355689, \"Deaths\": 0.0024117329263357162, \"Recovered\": 0.021705596337021446, \"Lat\": 51.0, \"Long\": 9.0}, {\"Country/Region\": \"Hong Kong SAR\", \"Confirmed\": 1.61052207757348, \"Deaths\": 0.040263051939337005, \"Recovered\": 0.8723661253523016, \"Lat\": 22.3, \"Long\": 114.2}, {\"Country/Region\": \"Iran (Islamic Republic of)\", \"Confirmed\": 9.831263513326588, \"Deaths\": 0.3557445514023921, \"Recovered\": 3.3386198277660917, \"Lat\": 32.0, \"Long\": 53.0}, {\"Country/Region\": \"Italy\", \"Confirmed\": 16.794281862259982, \"Deaths\": 1.0441611838689575, \"Recovered\": 1.198054987513669, \"Lat\": 43.0, \"Long\": 12.0}, {\"Country/Region\": \"Japan\", \"Confirmed\": 0.4591829073311989, \"Deaths\": 0.007903320263876057, \"Recovered\": 0.07982353466514817, \"Lat\": 36.0, \"Long\": 138.0}, {\"Country/Region\": \"Mainland China\", \"Confirmed\": 5.798467757569664, \"Deaths\": 0.2251692718617392, \"Recovered\": 4.315696509732684, \"Lat\": 33.40693612903227, \"Long\": 111.54290322580646}, {\"Country/Region\": \"Malaysia\", \"Confirmed\": 0.4091525198482584, \"Deaths\": 0.0, \"Recovered\": 0.07612139904153642, \"Lat\": 2.5, \"Long\": 112.5}, {\"Country/Region\": \"Netherlands\", \"Confirmed\": 2.216932407413909, \"Deaths\": 0.02321395191009329, \"Recovered\": 0.0, \"Lat\": 52.1326, \"Long\": 5.2913}, {\"Country/Region\": \"Norway\", \"Confirmed\": 7.526810498997428, \"Deaths\": 0.0, \"Recovered\": 0.018817026247493568, \"Lat\": 60.472, \"Long\": 8.4689}, {\"Country/Region\": \"Republic of Korea\", \"Confirmed\": 14.550136054326911, \"Deaths\": 0.10457970809711876, \"Recovered\": 0.4783553314812655, \"Lat\": 36.0, \"Long\": 128.0}, {\"Country/Region\": \"Singapore\", \"Confirmed\": 2.837545551473431, \"Deaths\": 0.0, \"Recovered\": 1.3833034563432978, \"Lat\": 1.2833, \"Long\": 103.8333}, {\"Country/Region\": \"Spain\", \"Confirmed\": 3.6277054737195855, \"Deaths\": 0.07490837261367876, \"Recovered\": 0.06848765496107773, \"Lat\": 40.0, \"Long\": -4.0}, {\"Country/Region\": \"Sweden\", \"Confirmed\": 3.4861425832316537, \"Deaths\": 0.0, \"Recovered\": 0.009820119952765223, \"Lat\": 63.0, \"Long\": 16.0}, {\"Country/Region\": \"Switzerland\", \"Confirmed\": 5.7652500550986465, \"Deaths\": 0.035225560418118015, \"Recovered\": 0.035225560418118015, \"Lat\": 46.8182, \"Long\": 8.2275}, {\"Country/Region\": \"UK\", \"Confirmed\": 0.5745312032182892, \"Deaths\": 0.009024050312329149, \"Recovered\": 0.027072150936987446, \"Lat\": 55.0, \"Long\": -3.0}, {\"Country/Region\": \"US\", \"Confirmed\": 0.5104420019995022, \"Deaths\": 0.017116618031121033, \"Recovered\": 0.004584808401193134, \"Lat\": 38.77302746113991, \"Long\": -93.61047823834195}]}}, {\"mode\": \"vega-lite\"});\n", + "</script>" + ], + "text/plain": [ + "alt.LayerChart(...)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "<p style=\"font-size: smaller\">Data Source: \n", + " <a href=\"https://github.com/CSSEGISandData/COVID-19\">JHU CSSE</a> and\n", + " <a href=\"https://data.worldbank.org/indicator/SP.POP.TOTL\">World Bank</a>\n", + "</p>" + ], + "text/plain": [ + "<IPython.core.display.HTML object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(map_of_variable(map_df, 'Confirmed'))\n", + "display(HTML('''\n", + "<p style=\"font-size: smaller\">Data Source: \n", + " <a href=\"https://github.com/CSSEGISandData/COVID-19\">JHU CSSE</a> and\n", + " <a href=\"https://data.worldbank.org/indicator/SP.POP.TOTL\">World Bank</a>\n", + "</p>'''))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "papermill": { + "duration": 0.062654, + "end_time": "2020-03-13T17:04:16.037936", + "exception": false, + "start_time": "2020-03-13T17:04:15.975282", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "<div id=\"altair-viz-a0d74fce564e44529149f3e190cf879e\"></div>\n", + "<script type=\"text/javascript\">\n", + " (function(spec, embedOpt){\n", + " const outputDiv = document.getElementById(\"altair-viz-a0d74fce564e44529149f3e190cf879e\");\n", + " const paths = {\n", + " \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n", + " \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n", + " \"vega-lite\": \"https://cdn.jsdelivr.net/npm//vega-lite@4.0.2?noext\",\n", + " \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n", + " };\n", + "\n", + " function loadScript(lib) {\n", + " return new Promise(function(resolve, reject) {\n", + " var s = document.createElement('script');\n", + " s.src = paths[lib];\n", + " s.async = true;\n", + " s.onload = () => resolve(paths[lib]);\n", + " s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n", + " document.getElementsByTagName(\"head\")[0].appendChild(s);\n", + " });\n", + " }\n", + "\n", + " function showError(err) {\n", + " outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n", + " throw err;\n", + " }\n", + "\n", + " function displayChart(vegaEmbed) {\n", + " vegaEmbed(outputDiv, spec, embedOpt)\n", + " .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n", + " }\n", + "\n", + " if(typeof define === \"function\" && define.amd) {\n", + " requirejs.config({paths});\n", + " require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n", + " } else if (typeof vegaEmbed === \"function\") {\n", + " displayChart(vegaEmbed);\n", + " } else {\n", + " loadScript(\"vega\")\n", + " .then(() => loadScript(\"vega-lite\"))\n", + " .then(() => loadScript(\"vega-embed\"))\n", + " .catch(showError)\n", + " .then(() => displayChart(vegaEmbed));\n", + " }\n", + " })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"layer\": [{\"mark\": \"bar\", \"encoding\": {\"x\": {\"type\": \"quantitative\", \"field\": \"Confirmed\"}, \"y\": {\"type\": \"nominal\", \"field\": \"Country/Region\", \"sort\": \"-x\"}}}, {\"mark\": {\"type\": \"text\", \"align\": \"left\", \"baseline\": \"middle\", \"dx\": 3}, \"encoding\": {\"text\": {\"type\": \"quantitative\", \"field\": \"Confirmed\", \"format\": \".3\"}, \"x\": {\"type\": \"quantitative\", \"field\": \"Confirmed\"}, \"y\": {\"type\": \"nominal\", \"field\": \"Country/Region\", \"sort\": \"-x\"}}}], \"data\": {\"name\": \"data-433f706ddfa6221156e78c265aebad1f\"}, \"height\": 900, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.0.2.json\", \"datasets\": {\"data-433f706ddfa6221156e78c265aebad1f\": [{\"Country/Region\": \"Australia\", \"Confirmed\": 0.4281306826095598, \"Deaths\": 0.012003663998398872, \"Recovered\": 0.0840256479887921, \"Lat\": -23.1315375, \"Long\": 140.0609875}, {\"Country/Region\": \"Austria\", \"Confirmed\": 2.057185925638154, \"Deaths\": 0.0, \"Recovered\": 0.04521287748655284, \"Lat\": 47.5162, \"Long\": 14.5501}, {\"Country/Region\": \"Bahrain\", \"Confirmed\": 7.00887387149166, \"Deaths\": 0.0, \"Recovered\": 1.4017747742983322, \"Lat\": 26.0275, \"Long\": 50.55}, {\"Country/Region\": \"Belgium\", \"Confirmed\": 2.3375802000128174, \"Deaths\": 0.0, \"Recovered\": 0.008754982022519914, \"Lat\": 50.8333, \"Long\": 4.0}, {\"Country/Region\": \"Denmark\", \"Confirmed\": 4.519231399481772, \"Deaths\": 0.0, \"Recovered\": 0.01724897480718234, \"Lat\": 56.2639, \"Long\": 9.5018}, {\"Country/Region\": \"France\", \"Confirmed\": 2.663193607427707, \"Deaths\": 0.04926311045129727, \"Recovered\": 0.017913858345926282, \"Lat\": 47.0, \"Long\": 2.0}, {\"Country/Region\": \"Germany\", \"Confirmed\": 1.7569474368355689, \"Deaths\": 0.0024117329263357162, \"Recovered\": 0.021705596337021446, \"Lat\": 51.0, \"Long\": 9.0}, {\"Country/Region\": \"Hong Kong SAR\", \"Confirmed\": 1.61052207757348, \"Deaths\": 0.040263051939337005, \"Recovered\": 0.8723661253523016, \"Lat\": 22.3, \"Long\": 114.2}, {\"Country/Region\": \"Iran (Islamic Republic of)\", \"Confirmed\": 9.831263513326588, \"Deaths\": 0.3557445514023921, \"Recovered\": 3.3386198277660917, \"Lat\": 32.0, \"Long\": 53.0}, {\"Country/Region\": \"Italy\", \"Confirmed\": 16.794281862259982, \"Deaths\": 1.0441611838689575, \"Recovered\": 1.198054987513669, \"Lat\": 43.0, \"Long\": 12.0}, {\"Country/Region\": \"Japan\", \"Confirmed\": 0.4591829073311989, \"Deaths\": 0.007903320263876057, \"Recovered\": 0.07982353466514817, \"Lat\": 36.0, \"Long\": 138.0}, {\"Country/Region\": \"Mainland China\", \"Confirmed\": 5.798467757569664, \"Deaths\": 0.2251692718617392, \"Recovered\": 4.315696509732684, \"Lat\": 33.40693612903227, \"Long\": 111.54290322580646}, {\"Country/Region\": \"Malaysia\", \"Confirmed\": 0.4091525198482584, \"Deaths\": 0.0, \"Recovered\": 0.07612139904153642, \"Lat\": 2.5, \"Long\": 112.5}, {\"Country/Region\": \"Netherlands\", \"Confirmed\": 2.216932407413909, \"Deaths\": 0.02321395191009329, \"Recovered\": 0.0, \"Lat\": 52.1326, \"Long\": 5.2913}, {\"Country/Region\": \"Norway\", \"Confirmed\": 7.526810498997428, \"Deaths\": 0.0, \"Recovered\": 0.018817026247493568, \"Lat\": 60.472, \"Long\": 8.4689}, {\"Country/Region\": \"Republic of Korea\", \"Confirmed\": 14.550136054326911, \"Deaths\": 0.10457970809711876, \"Recovered\": 0.4783553314812655, \"Lat\": 36.0, \"Long\": 128.0}, {\"Country/Region\": \"Singapore\", \"Confirmed\": 2.837545551473431, \"Deaths\": 0.0, \"Recovered\": 1.3833034563432978, \"Lat\": 1.2833, \"Long\": 103.8333}, {\"Country/Region\": \"Spain\", \"Confirmed\": 3.6277054737195855, \"Deaths\": 0.07490837261367876, \"Recovered\": 0.06848765496107773, \"Lat\": 40.0, \"Long\": -4.0}, {\"Country/Region\": \"Sweden\", \"Confirmed\": 3.4861425832316537, \"Deaths\": 0.0, \"Recovered\": 0.009820119952765223, \"Lat\": 63.0, \"Long\": 16.0}, {\"Country/Region\": \"Switzerland\", \"Confirmed\": 5.7652500550986465, \"Deaths\": 0.035225560418118015, \"Recovered\": 0.035225560418118015, \"Lat\": 46.8182, \"Long\": 8.2275}, {\"Country/Region\": \"UK\", \"Confirmed\": 0.5745312032182892, \"Deaths\": 0.009024050312329149, \"Recovered\": 0.027072150936987446, \"Lat\": 55.0, \"Long\": -3.0}, {\"Country/Region\": \"US\", \"Confirmed\": 0.5104420019995022, \"Deaths\": 0.017116618031121033, \"Recovered\": 0.004584808401193134, \"Lat\": 38.77302746113991, \"Long\": -93.61047823834195}]}}, {\"mode\": \"vega-lite\"});\n", + "</script>" + ], + "text/plain": [ + "alt.LayerChart(...)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bars = alt.Chart(map_df).mark_bar().encode(\n", + " x='Confirmed:Q',\n", + " y=alt.Y(\"Country/Region:N\", sort='-x')\n", + ")\n", + "\n", + "text = bars.mark_text(\n", + " align='left',\n", + " baseline='middle',\n", + " dx=3 # Nudges text to right so it doesn't appear on top of the bar\n", + ").encode(\n", + " text=alt.Text('Confirmed:Q', format=\".3\")\n", + ")\n", + "\n", + "(bars + text).properties(height=900)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "papermill": { + "duration": 2.520956, + "end_time": "2020-03-13T17:04:16.371464", + "environment_variables": {}, + "exception": null, + "input_path": "notebooks/Dashboard.ipynb", + "output_path": "runs/Dashboard.run.ipynb", + "parameters": { + "PAPERMILL_INPUT_PATH": "notebooks/Dashboard.ipynb", + "PAPERMILL_OUTPUT_PATH": "runs/Dashboard.run.ipynb", + "rates_folder": "./data/covid-19_rates/", + "ts_folder": "./data/covid-19_jhu-csse/" + }, + "start_time": "2020-03-13T17:04:13.850508", + "version": "1.1.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/runs/ToRates.run.ipynb b/runs/ToRates.run.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..2925f4eefd7a6bff1c917d01551b49198f9d0e50 --- /dev/null +++ b/runs/ToRates.run.ipynb @@ -0,0 +1,827 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.019737, + "end_time": "2020-03-13T14:54:48.276145", + "exception": false, + "start_time": "2020-03-13T14:54:48.256408", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Convert Series to Rates per 100,000" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "papermill": { + "duration": 0.327112, + "end_time": "2020-03-13T14:54:48.613995", + "exception": false, + "start_time": "2020-03-13T14:54:48.286883", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "papermill": { + "duration": 0.025647, + "end_time": "2020-03-13T14:54:48.658686", + "exception": false, + "start_time": "2020-03-13T14:54:48.633039", + "status": "completed" + }, + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "ts_folder = \"../data/covid-19_jhu-csse/\"\n", + "wb_path = \"../data/worldbank/SP.POP.TOTL.zip\"\n", + "out_folder = None\n", + "PAPERMILL_OUTPUT_PATH = None" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "papermill": { + "duration": 0.027378, + "end_time": "2020-03-13T14:54:48.697099", + "exception": false, + "start_time": "2020-03-13T14:54:48.669721", + "status": "completed" + }, + "tags": [ + "injected-parameters" + ] + }, + "outputs": [], + "source": [ + "# Parameters\n", + "PAPERMILL_INPUT_PATH = \"notebooks/ToRates.ipynb\"\n", + "PAPERMILL_OUTPUT_PATH = \"runs/ToRates.run.ipynb\"\n", + "ts_folder = \"./data/covid-19_jhu-csse/\"\n", + "wb_path = \"./data/worldbank/SP.POP.TOTL.zip\"\n", + "out_folder = \"./data/covid-19_rates/\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.009984, + "end_time": "2020-03-13T14:54:48.724400", + "exception": false, + "start_time": "2020-03-13T14:54:48.714416", + "status": "completed" + }, + "tags": [ + "parameters" + ] + }, + "source": [ + "## Read in JHU CSSE data\n", + "\n", + "I will switch to [xarray](http://xarray.pydata.org/en/stable/), but ATM, it's easier like this..." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "papermill": { + "duration": 0.044299, + "end_time": "2020-03-13T14:54:48.778200", + "exception": false, + "start_time": "2020-03-13T14:54:48.733901", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def read_jhu_covid_region_df(name):\n", + " filename = os.path.join(ts_folder, f\"time_series_19-covid-{name}.csv\")\n", + " df = pd.read_csv(filename)\n", + " df = df.set_index(['Country/Region', 'Province/State', 'Lat', 'Long'])\n", + " df.columns = pd.to_datetime(df.columns)\n", + " region_df = df.groupby(level='Country/Region').sum()\n", + " loc_df = df.reset_index([2,3]).groupby(level='Country/Region').mean()[['Long', 'Lat']]\n", + " return region_df.join(loc_df).set_index(['Long', 'Lat'], append=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "papermill": { + "duration": 0.126546, + "end_time": "2020-03-13T14:54:48.922552", + "exception": false, + "start_time": "2020-03-13T14:54:48.796006", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "frames_map = {\n", + " \"confirmed\": read_jhu_covid_region_df(\"Confirmed\"),\n", + " \"deaths\": read_jhu_covid_region_df(\"Deaths\"),\n", + " \"recovered\": read_jhu_covid_region_df(\"Recovered\")\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "papermill": { + "duration": 0.05437, + "end_time": "2020-03-13T14:54:48.984940", + "exception": false, + "start_time": "2020-03-13T14:54:48.930570", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th>2020-01-22</th>\n", + " <th>2020-01-23</th>\n", + " <th>2020-01-24</th>\n", + " <th>2020-01-25</th>\n", + " <th>2020-01-26</th>\n", + " <th>2020-01-27</th>\n", + " <th>2020-01-28</th>\n", + " <th>2020-01-29</th>\n", + " <th>2020-01-30</th>\n", + " <th>2020-01-31</th>\n", + " <th>...</th>\n", + " <th>2020-03-01</th>\n", + " <th>2020-03-02</th>\n", + " <th>2020-03-03</th>\n", + " <th>2020-03-04</th>\n", + " <th>2020-03-05</th>\n", + " <th>2020-03-06</th>\n", + " <th>2020-03-07</th>\n", + " <th>2020-03-08</th>\n", + " <th>2020-03-09</th>\n", + " <th>2020-03-10</th>\n", + " </tr>\n", + " <tr>\n", + " <th>Country/Region</th>\n", + " <th>Long</th>\n", + " <th>Lat</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Mainland China</th>\n", + " <th>111.542903</th>\n", + " <th>33.406936</th>\n", + " <td>547</td>\n", + " <td>639</td>\n", + " <td>916</td>\n", + " <td>1399</td>\n", + " <td>2062</td>\n", + " <td>2863</td>\n", + " <td>5494</td>\n", + " <td>6070</td>\n", + " <td>8124</td>\n", + " <td>9783</td>\n", + " <td>...</td>\n", + " <td>79826</td>\n", + " <td>80026</td>\n", + " <td>80151</td>\n", + " <td>80271</td>\n", + " <td>80422</td>\n", + " <td>80573</td>\n", + " <td>80652</td>\n", + " <td>80699</td>\n", + " <td>80735</td>\n", + " <td>80757</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Italy</th>\n", + " <th>12.000000</th>\n", + " <th>43.000000</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>...</td>\n", + " <td>1694</td>\n", + " <td>2036</td>\n", + " <td>2502</td>\n", + " <td>3089</td>\n", + " <td>3858</td>\n", + " <td>4636</td>\n", + " <td>5883</td>\n", + " <td>7375</td>\n", + " <td>9172</td>\n", + " <td>10149</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Iran (Islamic Republic of)</th>\n", + " <th>53.000000</th>\n", + " <th>32.000000</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>...</td>\n", + " <td>978</td>\n", + " <td>1501</td>\n", + " <td>2336</td>\n", + " <td>2922</td>\n", + " <td>3513</td>\n", + " <td>4747</td>\n", + " <td>5823</td>\n", + " <td>6566</td>\n", + " <td>7161</td>\n", + " <td>8042</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Republic of Korea</th>\n", + " <th>128.000000</th>\n", + " <th>36.000000</th>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>4</td>\n", + " <td>4</td>\n", + " <td>4</td>\n", + " <td>11</td>\n", + " <td>...</td>\n", + " <td>3736</td>\n", + " <td>4335</td>\n", + " <td>5186</td>\n", + " <td>5621</td>\n", + " <td>6088</td>\n", + " <td>6593</td>\n", + " <td>7041</td>\n", + " <td>7314</td>\n", + " <td>7478</td>\n", + " <td>7513</td>\n", + " </tr>\n", + " <tr>\n", + " <th>France</th>\n", + " <th>2.000000</th>\n", + " <th>47.000000</th>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>3</td>\n", + " <td>3</td>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " <td>5</td>\n", + " <td>5</td>\n", + " <td>...</td>\n", + " <td>130</td>\n", + " <td>191</td>\n", + " <td>204</td>\n", + " <td>285</td>\n", + " <td>377</td>\n", + " <td>653</td>\n", + " <td>949</td>\n", + " <td>1126</td>\n", + " <td>1209</td>\n", + " <td>1784</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>5 rows × 49 columns</p>\n", + "</div>" + ], + "text/plain": [ + " 2020-01-22 2020-01-23 \\\n", + "Country/Region Long Lat \n", + "Mainland China 111.542903 33.406936 547 639 \n", + "Italy 12.000000 43.000000 0 0 \n", + "Iran (Islamic Republic of) 53.000000 32.000000 0 0 \n", + "Republic of Korea 128.000000 36.000000 1 1 \n", + "France 2.000000 47.000000 0 0 \n", + "\n", + " 2020-01-24 2020-01-25 \\\n", + "Country/Region Long Lat \n", + "Mainland China 111.542903 33.406936 916 1399 \n", + "Italy 12.000000 43.000000 0 0 \n", + "Iran (Islamic Republic of) 53.000000 32.000000 0 0 \n", + "Republic of Korea 128.000000 36.000000 2 2 \n", + "France 2.000000 47.000000 2 3 \n", + "\n", + " 2020-01-26 2020-01-27 \\\n", + "Country/Region Long Lat \n", + "Mainland China 111.542903 33.406936 2062 2863 \n", + "Italy 12.000000 43.000000 0 0 \n", + "Iran (Islamic Republic of) 53.000000 32.000000 0 0 \n", + "Republic of Korea 128.000000 36.000000 3 4 \n", + "France 2.000000 47.000000 3 3 \n", + "\n", + " 2020-01-28 2020-01-29 \\\n", + "Country/Region Long Lat \n", + "Mainland China 111.542903 33.406936 5494 6070 \n", + "Italy 12.000000 43.000000 0 0 \n", + "Iran (Islamic Republic of) 53.000000 32.000000 0 0 \n", + "Republic of Korea 128.000000 36.000000 4 4 \n", + "France 2.000000 47.000000 4 5 \n", + "\n", + " 2020-01-30 2020-01-31 ... \\\n", + "Country/Region Long Lat ... \n", + "Mainland China 111.542903 33.406936 8124 9783 ... \n", + "Italy 12.000000 43.000000 0 2 ... \n", + "Iran (Islamic Republic of) 53.000000 32.000000 0 0 ... \n", + "Republic of Korea 128.000000 36.000000 4 11 ... \n", + "France 2.000000 47.000000 5 5 ... \n", + "\n", + " 2020-03-01 2020-03-02 \\\n", + "Country/Region Long Lat \n", + "Mainland China 111.542903 33.406936 79826 80026 \n", + "Italy 12.000000 43.000000 1694 2036 \n", + "Iran (Islamic Republic of) 53.000000 32.000000 978 1501 \n", + "Republic of Korea 128.000000 36.000000 3736 4335 \n", + "France 2.000000 47.000000 130 191 \n", + "\n", + " 2020-03-03 2020-03-04 \\\n", + "Country/Region Long Lat \n", + "Mainland China 111.542903 33.406936 80151 80271 \n", + "Italy 12.000000 43.000000 2502 3089 \n", + "Iran (Islamic Republic of) 53.000000 32.000000 2336 2922 \n", + "Republic of Korea 128.000000 36.000000 5186 5621 \n", + "France 2.000000 47.000000 204 285 \n", + "\n", + " 2020-03-05 2020-03-06 \\\n", + "Country/Region Long Lat \n", + "Mainland China 111.542903 33.406936 80422 80573 \n", + "Italy 12.000000 43.000000 3858 4636 \n", + "Iran (Islamic Republic of) 53.000000 32.000000 3513 4747 \n", + "Republic of Korea 128.000000 36.000000 6088 6593 \n", + "France 2.000000 47.000000 377 653 \n", + "\n", + " 2020-03-07 2020-03-08 \\\n", + "Country/Region Long Lat \n", + "Mainland China 111.542903 33.406936 80652 80699 \n", + "Italy 12.000000 43.000000 5883 7375 \n", + "Iran (Islamic Republic of) 53.000000 32.000000 5823 6566 \n", + "Republic of Korea 128.000000 36.000000 7041 7314 \n", + "France 2.000000 47.000000 949 1126 \n", + "\n", + " 2020-03-09 2020-03-10 \n", + "Country/Region Long Lat \n", + "Mainland China 111.542903 33.406936 80735 80757 \n", + "Italy 12.000000 43.000000 9172 10149 \n", + "Iran (Islamic Republic of) 53.000000 32.000000 7161 8042 \n", + "Republic of Korea 128.000000 36.000000 7478 7513 \n", + "France 2.000000 47.000000 1209 1784 \n", + "\n", + "[5 rows x 49 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "frames_map['confirmed'].sort_values(frames_map['confirmed'].columns[-1], ascending=False).head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.011038, + "end_time": "2020-03-13T14:54:49.012475", + "exception": false, + "start_time": "2020-03-13T14:54:49.001437", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Read in World Bank data" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "papermill": { + "duration": 0.043639, + "end_time": "2020-03-13T14:54:49.064213", + "exception": false, + "start_time": "2020-03-13T14:54:49.020574", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import zipfile\n", + "zf = zipfile.ZipFile(wb_path)\n", + "pop_df = pd.read_csv(zf.open(\"API_SP.POP.TOTL_DS2_en_csv_v2_821007.csv\"), skiprows=4)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.010307, + "end_time": "2020-03-13T14:54:49.091394", + "exception": false, + "start_time": "2020-03-13T14:54:49.081087", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "There is 2018 pop data for all countries/regions except Eritrea" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "papermill": { + "duration": 0.055137, + "end_time": "2020-03-13T14:54:49.154951", + "exception": false, + "start_time": "2020-03-13T14:54:49.099814", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Country Name</th>\n", + " <th>Country Code</th>\n", + " <th>Indicator Name</th>\n", + " <th>Indicator Code</th>\n", + " <th>1960</th>\n", + " <th>1961</th>\n", + " <th>1962</th>\n", + " <th>1963</th>\n", + " <th>1964</th>\n", + " <th>1965</th>\n", + " <th>...</th>\n", + " <th>2011</th>\n", + " <th>2012</th>\n", + " <th>2013</th>\n", + " <th>2014</th>\n", + " <th>2015</th>\n", + " <th>2016</th>\n", + " <th>2017</th>\n", + " <th>2018</th>\n", + " <th>2019</th>\n", + " <th>Unnamed: 64</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>67</th>\n", + " <td>Eritrea</td>\n", + " <td>ERI</td>\n", + " <td>Population, total</td>\n", + " <td>SP.POP.TOTL</td>\n", + " <td>1007590.0</td>\n", + " <td>1033328.0</td>\n", + " <td>1060486.0</td>\n", + " <td>1088854.0</td>\n", + " <td>1118159.0</td>\n", + " <td>1148189.0</td>\n", + " <td>...</td>\n", + " <td>3213972.0</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>108</th>\n", + " <td>Not classified</td>\n", + " <td>INX</td>\n", + " <td>Population, total</td>\n", + " <td>SP.POP.TOTL</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>2 rows × 65 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Country Name Country Code Indicator Name Indicator Code 1960 \\\n", + "67 Eritrea ERI Population, total SP.POP.TOTL 1007590.0 \n", + "108 Not classified INX Population, total SP.POP.TOTL NaN \n", + "\n", + " 1961 1962 1963 1964 1965 ... 2011 \\\n", + "67 1033328.0 1060486.0 1088854.0 1118159.0 1148189.0 ... 3213972.0 \n", + "108 NaN NaN NaN NaN NaN ... NaN \n", + "\n", + " 2012 2013 2014 2015 2016 2017 2018 2019 Unnamed: 64 \n", + "67 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "108 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", + "\n", + "[2 rows x 65 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop_df[pd.isna(pop_df['2018'])]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.009197, + "end_time": "2020-03-13T14:54:49.179422", + "exception": false, + "start_time": "2020-03-13T14:54:49.170225", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Fix the country/region names that differ between the World Bank population data and the JHU CSSE data." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "papermill": { + "duration": 0.048983, + "end_time": "2020-03-13T14:54:49.237289", + "exception": false, + "start_time": "2020-03-13T14:54:49.188306", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "region_wb_jhu_map = {\n", + " 'China': 'Mainland China',\n", + " 'Iran, Islamic Rep.': 'Iran (Islamic Republic of)',\n", + " 'Korea, Rep.': 'Republic of Korea',\n", + " 'United States': 'US',\n", + " 'United Kingdom': 'UK',\n", + " 'Hong Kong SAR, China': 'Hong Kong SAR',\n", + " 'Egypt, Arab Rep.': 'Egypt',\n", + " 'Vietnam': 'Viet Nam',\n", + " 'Macao SAR, China': 'Macao SAR',\n", + " 'Slovak Republic': 'Slovakia',\n", + " 'Moldova': 'Republic of Moldova',\n", + " 'St. Martin (French part)': 'Saint Martin',\n", + " 'Brunei Darussalam': 'Brunei'\n", + "}\n", + "current_pop_ser = pop_df[['Country Name', '2018']].copy().replace(region_wb_jhu_map).set_index('Country Name')['2018']\n", + "data_pop_ser = current_pop_ser[current_pop_ser.index.isin(frames_map['confirmed'].index.levels[0])]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.010552, + "end_time": "2020-03-13T14:54:49.263442", + "exception": false, + "start_time": "2020-03-13T14:54:49.252890", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "There are some regions that we cannot resolve, but we will just ignore these." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.008224, + "end_time": "2020-03-13T14:54:49.280239", + "exception": false, + "start_time": "2020-03-13T14:54:49.272015", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Compute rates per 100,000 for regions" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "papermill": { + "duration": 0.081057, + "end_time": "2020-03-13T14:54:49.370031", + "exception": false, + "start_time": "2020-03-13T14:54:49.288974", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def cases_to_rates_df(df):\n", + " per_100000_df = df.reset_index([1, 2], drop=True)\n", + " per_100000_df = per_100000_df.div(data_pop_ser, 'index').mul(100000).dropna()\n", + " per_100000_df.index.name = 'Country/Region'\n", + " return per_100000_df\n", + " \n", + "def frames_to_rates(frames_map):\n", + " return {k: cases_to_rates_df(v) for k,v in frames_map.items()}\n", + "\n", + "\n", + "rates_map = frames_to_rates(frames_map)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "papermill": { + "duration": 0.065887, + "end_time": "2020-03-13T14:54:49.458887", + "exception": false, + "start_time": "2020-03-13T14:54:49.393000", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "if PAPERMILL_OUTPUT_PATH:\n", + " for k, v in rates_map.items():\n", + " out_path = os.path.join(out_folder, f\"ts_rates_19-covid-{k}.csv\")\n", + " v.reset_index().to_csv(out_path)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "papermill": { + "duration": 2.37776, + "end_time": "2020-03-13T14:54:49.786899", + "environment_variables": {}, + "exception": null, + "input_path": "notebooks/ToRates.ipynb", + "output_path": "runs/ToRates.run.ipynb", + "parameters": { + "PAPERMILL_INPUT_PATH": "notebooks/ToRates.ipynb", + "PAPERMILL_OUTPUT_PATH": "runs/ToRates.run.ipynb", + "out_folder": "./data/covid-19_rates/", + "ts_folder": "./data/covid-19_jhu-csse/", + "wb_path": "./data/worldbank/SP.POP.TOTL.zip" + }, + "start_time": "2020-03-13T14:54:47.409139", + "version": "1.1.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file