From 7d740935ed3610597292f0028b0be1bda03925d0 Mon Sep 17 00:00:00 2001 From: Chandrasekhar Ramakrishnan <cramakri@ethz.ch> Date: Wed, 18 Mar 2020 18:14:10 +0000 Subject: [PATCH] feat: incorporate covidtracking data for US --- .../process/download-covidtracking-data.ipynb | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 notebooks/process/download-covidtracking-data.ipynb diff --git a/notebooks/process/download-covidtracking-data.ipynb b/notebooks/process/download-covidtracking-data.ipynb new file mode 100644 index 00000000..9a6a27bd --- /dev/null +++ b/notebooks/process/download-covidtracking-data.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import os\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "out_folder = \"../data/covidtracking/\"\n", + "PAPERMILL_OUTPUT_PATH = None" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Download state metadata\n", + "\n", + "Download a dataset of URLs for data for each US state and several territories. See [Google Doc](https://docs.google.com/spreadsheets/d/18oVRrHj3c183mHmq3m89_163yuYltLNlOmPerQ18E8w/htmlview?sle=true)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "url = 'http://covidtracking.com/api/states/info'\n", + "r = requests.get(url, allow_redirects=True)\n", + "states_metadata_json = r.content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# save the result\n", + "if PAPERMILL_OUTPUT_PATH:\n", + " out_path = os.path.join(out_folder, 'states-metadata.json')\n", + " with open(out_path, 'wb') as f:\n", + " f.write(states_metadata_json)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metadata_df = pd.read_json(states_metadata_json)\n", + "print(len(metadata_df), \"states and territories have metadata\")\n", + "metadata_df.head(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Download daily state data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "url = 'https://covidtracking.com/api/states/daily'\n", + "r = requests.get(url, allow_redirects=True)\n", + "states_daily_json = r.content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# save the result\n", + "if PAPERMILL_OUTPUT_PATH:\n", + " out_path = os.path.join(out_folder, 'states-daily.json')\n", + " with open(out_path, 'wb') as f:\n", + " f.write(states_daily_json)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_df = pd.read_json(states_daily_json)\n", + "print(len(data_df), \"data points\")\n", + "data_df.head(2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} -- GitLab