Newer
Older
Chandrasekhar Ramakrishnan
committed
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"papermill": {
Chandrasekhar Ramakrishnan
committed
"duration": 0.582742,
"end_time": "2020-03-20T09:12:16.167813",
Chandrasekhar Ramakrishnan
committed
"exception": false,
Chandrasekhar Ramakrishnan
committed
"start_time": "2020-03-20T09:12:15.585071",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"import requests\n",
"import os\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"papermill": {
Chandrasekhar Ramakrishnan
committed
"duration": 0.017681,
"end_time": "2020-03-20T09:12:16.194097",
Chandrasekhar Ramakrishnan
committed
"exception": false,
Chandrasekhar Ramakrishnan
committed
"start_time": "2020-03-20T09:12:16.176416",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"out_folder = \"../data/covidtracking/\"\n",
"PAPERMILL_OUTPUT_PATH = None"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"papermill": {
Chandrasekhar Ramakrishnan
committed
"duration": 0.019941,
"end_time": "2020-03-20T09:12:16.222810",
Chandrasekhar Ramakrishnan
committed
"exception": false,
Chandrasekhar Ramakrishnan
committed
"start_time": "2020-03-20T09:12:16.202869",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": [
"injected-parameters"
]
},
"outputs": [],
"source": [
"# Parameters\n",
Chandrasekhar Ramakrishnan
committed
"PAPERMILL_INPUT_PATH = \"/tmp/tq93huw7/notebooks/process/download-covidtracking-data.ipynb\"\n",
Chandrasekhar Ramakrishnan
committed
"PAPERMILL_OUTPUT_PATH = \"runs/download-covidtracking-data.runs.ipynb\"\n",
Chandrasekhar Ramakrishnan
committed
"out_folder = \"data/covidtracking\"\n"
Chandrasekhar Ramakrishnan
committed
]
},
{
"cell_type": "markdown",
"metadata": {
"papermill": {
Chandrasekhar Ramakrishnan
committed
"duration": 0.006852,
"end_time": "2020-03-20T09:12:16.239661",
Chandrasekhar Ramakrishnan
committed
"exception": false,
Chandrasekhar Ramakrishnan
committed
"start_time": "2020-03-20T09:12:16.232809",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"source": [
"# Download state metadata\n",
"\n",
"Download a dataset of URLs for data for each US state and several territories. See [Google Doc](https://docs.google.com/spreadsheets/d/18oVRrHj3c183mHmq3m89_163yuYltLNlOmPerQ18E8w/htmlview?sle=true)."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"papermill": {
Chandrasekhar Ramakrishnan
committed
"duration": 1.543652,
"end_time": "2020-03-20T09:12:17.791818",
Chandrasekhar Ramakrishnan
committed
"exception": false,
Chandrasekhar Ramakrishnan
committed
"start_time": "2020-03-20T09:12:16.248166",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"url = 'http://covidtracking.com/api/states/info'\n",
"r = requests.get(url, allow_redirects=True)\n",
"states_metadata_json = r.content"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"papermill": {
Chandrasekhar Ramakrishnan
committed
"duration": 0.019479,
"end_time": "2020-03-20T09:12:17.820781",
Chandrasekhar Ramakrishnan
committed
"exception": false,
Chandrasekhar Ramakrishnan
committed
"start_time": "2020-03-20T09:12:17.801302",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# save the result\n",
"if PAPERMILL_OUTPUT_PATH:\n",
" out_path = os.path.join(out_folder, 'states-metadata.json')\n",
" with open(out_path, 'wb') as f:\n",
" f.write(states_metadata_json)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"papermill": {
Chandrasekhar Ramakrishnan
committed
"duration": 0.067162,
"end_time": "2020-03-20T09:12:17.897197",
Chandrasekhar Ramakrishnan
committed
"exception": false,
Chandrasekhar Ramakrishnan
committed
"start_time": "2020-03-20T09:12:17.830035",
Chandrasekhar Ramakrishnan
committed
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"56 states and territories have metadata\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>state</th>\n",
Chandrasekhar Ramakrishnan
committed
" <th>covid19SiteOld</th>\n",
Chandrasekhar Ramakrishnan
committed
" <th>covid19Site</th>\n",
Chandrasekhar Ramakrishnan
committed
" <th>covid19SiteSecondary</th>\n",
Chandrasekhar Ramakrishnan
committed
" <th>twitter</th>\n",
" <th>pui</th>\n",
" <th>pum</th>\n",
" <th>notes</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>AK</td>\n",
" <td>http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...</td>\n",
" <td>http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-...</td>\n",
Chandrasekhar Ramakrishnan
committed
" <td>NaN</td>\n",
Chandrasekhar Ramakrishnan
committed
" <td>@Alaska_DHSS</td>\n",
" <td>All data</td>\n",
" <td>False</td>\n",
" <td>Unclear if their reported number means \"person...</td>\n",
" <td>Alaska</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>AL</td>\n",
" <td>http://www.alabamapublichealth.gov/infectiousd...</td>\n",
Chandrasekhar Ramakrishnan
committed
" <td>https://alpublichealth.maps.arcgis.com/apps/op...</td>\n",
" <td>NaN</td>\n",
Chandrasekhar Ramakrishnan
committed
" <td>@alpublichealth</td>\n",
" <td>No data</td>\n",
" <td>False</td>\n",
" <td>Last negative count from 3/16.</td>\n",
" <td>Alabama</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
Chandrasekhar Ramakrishnan
committed
" state covid19SiteOld \\\n",
Chandrasekhar Ramakrishnan
committed
"0 AK http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-... \n",
"1 AL http://www.alabamapublichealth.gov/infectiousd... \n",
"\n",
Chandrasekhar Ramakrishnan
committed
" covid19Site covid19SiteSecondary \\\n",
"0 http://dhss.alaska.gov/dph/Epi/id/Pages/COVID-... NaN \n",
"1 https://alpublichealth.maps.arcgis.com/apps/op... NaN \n",
"\n",
" twitter pui pum \\\n",
"0 @Alaska_DHSS All data False \n",
"1 @alpublichealth No data False \n",
Chandrasekhar Ramakrishnan
committed
"\n",
Chandrasekhar Ramakrishnan
committed
" notes name \n",
"0 Unclear if their reported number means \"person... Alaska \n",
"1 Last negative count from 3/16. Alabama "
Chandrasekhar Ramakrishnan
committed
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"metadata_df = pd.read_json(states_metadata_json)\n",
"print(len(metadata_df), \"states and territories have metadata\")\n",
"metadata_df.head(2)"
]
},
{
"cell_type": "markdown",
"metadata": {
"papermill": {
Chandrasekhar Ramakrishnan
committed
"duration": 0.010887,
"end_time": "2020-03-20T09:12:17.919775",
Chandrasekhar Ramakrishnan
committed
"exception": false,
Chandrasekhar Ramakrishnan
committed
"start_time": "2020-03-20T09:12:17.908888",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"source": [
"# Download daily state data"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"papermill": {
Chandrasekhar Ramakrishnan
committed
"duration": 0.134228,
"end_time": "2020-03-20T09:12:18.065352",
Chandrasekhar Ramakrishnan
committed
"exception": false,
Chandrasekhar Ramakrishnan
committed
"start_time": "2020-03-20T09:12:17.931124",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"url = 'https://covidtracking.com/api/states/daily'\n",
"r = requests.get(url, allow_redirects=True)\n",
"states_daily_json = r.content"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"papermill": {
Chandrasekhar Ramakrishnan
committed
"duration": 0.02218,
"end_time": "2020-03-20T09:12:18.097912",
Chandrasekhar Ramakrishnan
committed
"exception": false,
Chandrasekhar Ramakrishnan
committed
"start_time": "2020-03-20T09:12:18.075732",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# save the result\n",
"if PAPERMILL_OUTPUT_PATH:\n",
" out_path = os.path.join(out_folder, 'states-daily.json')\n",
" with open(out_path, 'wb') as f:\n",
" f.write(states_daily_json)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"papermill": {
Chandrasekhar Ramakrishnan
committed
"duration": 0.058368,
"end_time": "2020-03-20T09:12:18.166166",
Chandrasekhar Ramakrishnan
committed
"exception": false,
Chandrasekhar Ramakrishnan
committed
"start_time": "2020-03-20T09:12:18.107798",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
Chandrasekhar Ramakrishnan
committed
"757 data points\n"
Chandrasekhar Ramakrishnan
committed
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>state</th>\n",
" <th>positive</th>\n",
" <th>negative</th>\n",
" <th>pending</th>\n",
" <th>death</th>\n",
" <th>total</th>\n",
" <th>dateChecked</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
Chandrasekhar Ramakrishnan
committed
" <td>20200319</td>\n",
Chandrasekhar Ramakrishnan
committed
" <td>AK</td>\n",
Chandrasekhar Ramakrishnan
committed
" <td>6</td>\n",
" <td>400.0</td>\n",
Chandrasekhar Ramakrishnan
committed
" <td>NaN</td>\n",
" <td>NaN</td>\n",
Chandrasekhar Ramakrishnan
committed
" <td>406</td>\n",
" <td>2020-03-19T20:00:00Z</td>\n",
Chandrasekhar Ramakrishnan
committed
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
Chandrasekhar Ramakrishnan
committed
" <td>20200319</td>\n",
Chandrasekhar Ramakrishnan
committed
" <td>AL</td>\n",
Chandrasekhar Ramakrishnan
committed
" <td>68</td>\n",
Chandrasekhar Ramakrishnan
committed
" <td>28.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
Chandrasekhar Ramakrishnan
committed
" <td>96</td>\n",
" <td>2020-03-19T20:00:00Z</td>\n",
Chandrasekhar Ramakrishnan
committed
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date state positive negative pending death total \\\n",
Chandrasekhar Ramakrishnan
committed
"0 20200319 AK 6 400.0 NaN NaN 406 \n",
"1 20200319 AL 68 28.0 NaN 0.0 96 \n",
Chandrasekhar Ramakrishnan
committed
"\n",
" dateChecked \n",
Chandrasekhar Ramakrishnan
committed
"0 2020-03-19T20:00:00Z \n",
"1 2020-03-19T20:00:00Z "
Chandrasekhar Ramakrishnan
committed
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_df = pd.read_json(states_daily_json)\n",
"print(len(data_df), \"data points\")\n",
"data_df.head(2)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
Chandrasekhar Ramakrishnan
committed
"version": "3.7.3"
Chandrasekhar Ramakrishnan
committed
},
"papermill": {
Chandrasekhar Ramakrishnan
committed
"duration": 3.92686,
"end_time": "2020-03-20T09:12:18.486365",
Chandrasekhar Ramakrishnan
committed
"environment_variables": {},
"exception": null,
Chandrasekhar Ramakrishnan
committed
"input_path": "/tmp/tq93huw7/notebooks/process/download-covidtracking-data.ipynb",
Chandrasekhar Ramakrishnan
committed
"output_path": "runs/download-covidtracking-data.runs.ipynb",
"parameters": {
Chandrasekhar Ramakrishnan
committed
"PAPERMILL_INPUT_PATH": "/tmp/tq93huw7/notebooks/process/download-covidtracking-data.ipynb",
Chandrasekhar Ramakrishnan
committed
"PAPERMILL_OUTPUT_PATH": "runs/download-covidtracking-data.runs.ipynb",
Chandrasekhar Ramakrishnan
committed
"out_folder": "data/covidtracking"
Chandrasekhar Ramakrishnan
committed
},
Chandrasekhar Ramakrishnan
committed
"start_time": "2020-03-20T09:12:14.559505",
Chandrasekhar Ramakrishnan
committed
"version": "1.1.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}