Newer
Older
Chandrasekhar Ramakrishnan
committed
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"papermill": {
"duration": 0.031319,
"end_time": "2020-03-13T17:58:04.199968",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:04.168649",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"source": [
"# Convert Series to Rates per 100,000"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"papermill": {
"duration": 0.365213,
"end_time": "2020-03-13T17:58:04.574239",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:04.209026",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"papermill": {
"duration": 0.018416,
"end_time": "2020-03-13T17:58:04.613366",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:04.594950",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"ts_folder = \"../data/covid-19_jhu-csse/\"\n",
"wb_path = \"../data/worldbank/SP.POP.TOTL.zip\"\n",
"out_folder = None\n",
"PAPERMILL_OUTPUT_PATH = None"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"papermill": {
"duration": 0.018935,
"end_time": "2020-03-13T17:58:04.641455",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:04.622520",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": [
"injected-parameters"
]
},
"outputs": [],
"source": [
"# Parameters\n",
"PAPERMILL_INPUT_PATH = \"/tmp/hmyw2rom/notebooks/ToRates.ipynb\"\n",
Chandrasekhar Ramakrishnan
committed
"PAPERMILL_OUTPUT_PATH = \"runs/ToRates.run.ipynb\"\n",
"ts_folder = \"/tmp/hmyw2rom/data/covid-19_jhu-csse\"\n",
"wb_path = \"/tmp/hmyw2rom/data/worldbank/SP.POP.TOTL.zip\"\n",
"out_folder = \"data/covid-19_rates\"\n"
Chandrasekhar Ramakrishnan
committed
]
},
{
"cell_type": "markdown",
"metadata": {
"papermill": {
"duration": 0.00793,
"end_time": "2020-03-13T17:58:04.657374",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:04.649444",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": [
"parameters"
]
},
"source": [
"## Read in JHU CSSE data\n",
"\n",
"I will switch to [xarray](http://xarray.pydata.org/en/stable/), but ATM, it's easier like this..."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"papermill": {
"duration": 0.02791,
"end_time": "2020-03-13T17:58:04.692567",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:04.664657",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"def read_jhu_covid_region_df(name):\n",
" filename = os.path.join(ts_folder, f\"time_series_19-covid-{name}.csv\")\n",
" df = pd.read_csv(filename)\n",
" df = df.set_index(['Country/Region', 'Province/State', 'Lat', 'Long'])\n",
" df.columns = pd.to_datetime(df.columns)\n",
" region_df = df.groupby(level='Country/Region').sum()\n",
" loc_df = df.reset_index([2,3]).groupby(level='Country/Region').mean()[['Long', 'Lat']]\n",
" return region_df.join(loc_df).set_index(['Long', 'Lat'], append=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"papermill": {
"duration": 0.127122,
"end_time": "2020-03-13T17:58:04.831576",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:04.704454",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"frames_map = {\n",
" \"confirmed\": read_jhu_covid_region_df(\"Confirmed\"),\n",
" \"deaths\": read_jhu_covid_region_df(\"Deaths\"),\n",
" \"recovered\": read_jhu_covid_region_df(\"Recovered\")\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"papermill": {
"duration": 0.052319,
"end_time": "2020-03-13T17:58:04.901105",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:04.848786",
Chandrasekhar Ramakrishnan
committed
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th>2020-01-22</th>\n",
" <th>2020-01-23</th>\n",
" <th>2020-01-24</th>\n",
" <th>2020-01-25</th>\n",
" <th>2020-01-26</th>\n",
" <th>2020-01-27</th>\n",
" <th>2020-01-28</th>\n",
" <th>2020-01-29</th>\n",
" <th>2020-01-30</th>\n",
" <th>2020-01-31</th>\n",
" <th>...</th>\n",
" <th>2020-03-03</th>\n",
" <th>2020-03-04</th>\n",
" <th>2020-03-05</th>\n",
" <th>2020-03-06</th>\n",
" <th>2020-03-07</th>\n",
" <th>2020-03-08</th>\n",
" <th>2020-03-09</th>\n",
" <th>2020-03-10</th>\n",
" <th>2020-03-11</th>\n",
" <th>2020-03-12</th>\n",
Chandrasekhar Ramakrishnan
committed
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
" </tr>\n",
" <tr>\n",
" <th>Country/Region</th>\n",
" <th>Long</th>\n",
" <th>Lat</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>China</th>\n",
" <th>111.649082</th>\n",
" <th>32.828385</th>\n",
" <td>548</td>\n",
" <td>643</td>\n",
" <td>920</td>\n",
" <td>1406</td>\n",
" <td>2075</td>\n",
" <td>2877</td>\n",
" <td>5509</td>\n",
" <td>6087</td>\n",
" <td>8141</td>\n",
" <td>9802</td>\n",
Chandrasekhar Ramakrishnan
committed
" <td>...</td>\n",
" <td>80261</td>\n",
" <td>80386</td>\n",
" <td>80537</td>\n",
" <td>80690</td>\n",
" <td>80770</td>\n",
" <td>80823</td>\n",
" <td>80860</td>\n",
" <td>80887</td>\n",
" <td>80921</td>\n",
" <td>80932</td>\n",
Chandrasekhar Ramakrishnan
committed
" </tr>\n",
" <tr>\n",
" <th>Italy</th>\n",
" <th>12.000000</th>\n",
" <th>43.000000</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>2502</td>\n",
" <td>3089</td>\n",
" <td>3858</td>\n",
" <td>4636</td>\n",
" <td>5883</td>\n",
" <td>7375</td>\n",
" <td>9172</td>\n",
" <td>10149</td>\n",
" <td>12462</td>\n",
" <td>12462</td>\n",
Chandrasekhar Ramakrishnan
committed
" </tr>\n",
" <tr>\n",
Chandrasekhar Ramakrishnan
committed
" <th>53.000000</th>\n",
" <th>32.000000</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>2336</td>\n",
" <td>2922</td>\n",
" <td>3513</td>\n",
" <td>4747</td>\n",
" <td>5823</td>\n",
" <td>6566</td>\n",
" <td>7161</td>\n",
" <td>8042</td>\n",
" <td>9000</td>\n",
" <td>10075</td>\n",
Chandrasekhar Ramakrishnan
committed
" </tr>\n",
" <tr>\n",
Chandrasekhar Ramakrishnan
committed
" <th>128.000000</th>\n",
" <th>36.000000</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>11</td>\n",
" <td>...</td>\n",
" <td>5186</td>\n",
" <td>5621</td>\n",
" <td>6088</td>\n",
" <td>6593</td>\n",
" <td>7041</td>\n",
" <td>7314</td>\n",
" <td>7478</td>\n",
" <td>7513</td>\n",
" <td>7755</td>\n",
" <td>7869</td>\n",
Chandrasekhar Ramakrishnan
committed
" </tr>\n",
" <tr>\n",
" <th>France</th>\n",
" <th>-41.223233</th>\n",
" <th>27.399467</th>\n",
Chandrasekhar Ramakrishnan
committed
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>...</td>\n",
" <td>204</td>\n",
" <td>288</td>\n",
" <td>380</td>\n",
" <td>656</td>\n",
" <td>952</td>\n",
" <td>1129</td>\n",
" <td>1212</td>\n",
" <td>1787</td>\n",
" <td>2284</td>\n",
" <td>2284</td>\n",
Chandrasekhar Ramakrishnan
committed
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 51 columns</p>\n",
Chandrasekhar Ramakrishnan
committed
"</div>"
],
"text/plain": [
" 2020-01-22 2020-01-23 2020-01-24 \\\n",
"Country/Region Long Lat \n",
"China 111.649082 32.828385 548 643 920 \n",
"Italy 12.000000 43.000000 0 0 0 \n",
"Iran 53.000000 32.000000 0 0 0 \n",
"Korea, South 128.000000 36.000000 1 1 2 \n",
"France -41.223233 27.399467 0 0 2 \n",
Chandrasekhar Ramakrishnan
committed
"\n",
" 2020-01-25 2020-01-26 2020-01-27 \\\n",
"Country/Region Long Lat \n",
"China 111.649082 32.828385 1406 2075 2877 \n",
"Italy 12.000000 43.000000 0 0 0 \n",
"Iran 53.000000 32.000000 0 0 0 \n",
"Korea, South 128.000000 36.000000 2 3 4 \n",
"France -41.223233 27.399467 3 3 3 \n",
Chandrasekhar Ramakrishnan
committed
"\n",
" 2020-01-28 2020-01-29 2020-01-30 \\\n",
"Country/Region Long Lat \n",
"China 111.649082 32.828385 5509 6087 8141 \n",
"Italy 12.000000 43.000000 0 0 0 \n",
"Iran 53.000000 32.000000 0 0 0 \n",
"Korea, South 128.000000 36.000000 4 4 4 \n",
"France -41.223233 27.399467 4 5 5 \n",
Chandrasekhar Ramakrishnan
committed
"\n",
" 2020-01-31 ... 2020-03-03 2020-03-04 \\\n",
"Country/Region Long Lat ... \n",
"China 111.649082 32.828385 9802 ... 80261 80386 \n",
"Italy 12.000000 43.000000 2 ... 2502 3089 \n",
"Iran 53.000000 32.000000 0 ... 2336 2922 \n",
"Korea, South 128.000000 36.000000 11 ... 5186 5621 \n",
"France -41.223233 27.399467 5 ... 204 288 \n",
Chandrasekhar Ramakrishnan
committed
"\n",
" 2020-03-05 2020-03-06 2020-03-07 \\\n",
"Country/Region Long Lat \n",
"China 111.649082 32.828385 80537 80690 80770 \n",
"Italy 12.000000 43.000000 3858 4636 5883 \n",
"Iran 53.000000 32.000000 3513 4747 5823 \n",
"Korea, South 128.000000 36.000000 6088 6593 7041 \n",
"France -41.223233 27.399467 380 656 952 \n",
Chandrasekhar Ramakrishnan
committed
"\n",
" 2020-03-08 2020-03-09 2020-03-10 \\\n",
"Country/Region Long Lat \n",
"China 111.649082 32.828385 80823 80860 80887 \n",
"Italy 12.000000 43.000000 7375 9172 10149 \n",
"Iran 53.000000 32.000000 6566 7161 8042 \n",
"Korea, South 128.000000 36.000000 7314 7478 7513 \n",
"France -41.223233 27.399467 1129 1212 1787 \n",
Chandrasekhar Ramakrishnan
committed
"\n",
" 2020-03-11 2020-03-12 \n",
"Country/Region Long Lat \n",
"China 111.649082 32.828385 80921 80932 \n",
"Italy 12.000000 43.000000 12462 12462 \n",
"Iran 53.000000 32.000000 9000 10075 \n",
"Korea, South 128.000000 36.000000 7755 7869 \n",
"France -41.223233 27.399467 2284 2284 \n",
Chandrasekhar Ramakrishnan
committed
"\n",
Chandrasekhar Ramakrishnan
committed
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frames_map['confirmed'].sort_values(frames_map['confirmed'].columns[-1], ascending=False).head()"
]
},
{
"cell_type": "markdown",
"metadata": {
"papermill": {
"duration": 0.012028,
"end_time": "2020-03-13T17:58:04.932580",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:04.920552",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"source": [
"# Read in World Bank data"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"papermill": {
"duration": 0.044223,
"end_time": "2020-03-13T17:58:04.984764",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:04.940541",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"import zipfile\n",
"zf = zipfile.ZipFile(wb_path)\n",
"pop_df = pd.read_csv(zf.open(\"API_SP.POP.TOTL_DS2_en_csv_v2_821007.csv\"), skiprows=4)"
]
},
{
"cell_type": "markdown",
"metadata": {
"papermill": {
"duration": 0.010228,
"end_time": "2020-03-13T17:58:05.011248",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:05.001020",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"source": [
"There is 2018 pop data for all countries/regions except Eritrea"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"papermill": {
"duration": 0.052242,
"end_time": "2020-03-13T17:58:05.071551",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:05.019309",
Chandrasekhar Ramakrishnan
committed
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Country Name</th>\n",
" <th>Country Code</th>\n",
" <th>Indicator Name</th>\n",
" <th>Indicator Code</th>\n",
" <th>1960</th>\n",
" <th>1961</th>\n",
" <th>1962</th>\n",
" <th>1963</th>\n",
" <th>1964</th>\n",
" <th>1965</th>\n",
" <th>...</th>\n",
" <th>2011</th>\n",
" <th>2012</th>\n",
" <th>2013</th>\n",
" <th>2014</th>\n",
" <th>2015</th>\n",
" <th>2016</th>\n",
" <th>2017</th>\n",
" <th>2018</th>\n",
" <th>2019</th>\n",
" <th>Unnamed: 64</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>67</th>\n",
" <td>Eritrea</td>\n",
" <td>ERI</td>\n",
" <td>Population, total</td>\n",
" <td>SP.POP.TOTL</td>\n",
" <td>1007590.0</td>\n",
" <td>1033328.0</td>\n",
" <td>1060486.0</td>\n",
" <td>1088854.0</td>\n",
" <td>1118159.0</td>\n",
" <td>1148189.0</td>\n",
" <td>...</td>\n",
" <td>3213972.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>108</th>\n",
" <td>Not classified</td>\n",
" <td>INX</td>\n",
" <td>Population, total</td>\n",
" <td>SP.POP.TOTL</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2 rows × 65 columns</p>\n",
"</div>"
],
"text/plain": [
" Country Name Country Code Indicator Name Indicator Code 1960 \\\n",
"67 Eritrea ERI Population, total SP.POP.TOTL 1007590.0 \n",
"108 Not classified INX Population, total SP.POP.TOTL NaN \n",
"\n",
" 1961 1962 1963 1964 1965 ... 2011 \\\n",
"67 1033328.0 1060486.0 1088854.0 1118159.0 1148189.0 ... 3213972.0 \n",
"108 NaN NaN NaN NaN NaN ... NaN \n",
"\n",
" 2012 2013 2014 2015 2016 2017 2018 2019 Unnamed: 64 \n",
"67 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n",
"108 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n",
"\n",
"[2 rows x 65 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pop_df[pd.isna(pop_df['2018'])]"
]
},
{
"cell_type": "markdown",
"metadata": {
"papermill": {
"duration": 0.009568,
"end_time": "2020-03-13T17:58:05.097349",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:05.087781",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"source": [
"Fix the country/region names that differ between the World Bank population data and the JHU CSSE data."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"papermill": {
"duration": 0.038944,
"end_time": "2020-03-13T17:58:05.144958",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:05.106014",
Chandrasekhar Ramakrishnan
committed
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"region_wb_jhu_map = {\n",
" 'China': 'Mainland China',\n",
" 'Iran, Islamic Rep.': 'Iran (Islamic Republic of)',\n",
" 'Korea, Rep.': 'Republic of Korea',\n",
" 'United States': 'US',\n",
" 'United Kingdom': 'UK',\n",
" 'Hong Kong SAR, China': 'Hong Kong SAR',\n",
" 'Egypt, Arab Rep.': 'Egypt',\n",
" 'Vietnam': 'Viet Nam',\n",
" 'Macao SAR, China': 'Macao SAR',\n",
" 'Slovak Republic': 'Slovakia',\n",
" 'Moldova': 'Republic of Moldova',\n",
" 'St. Martin (French part)': 'Saint Martin',\n",
" 'Brunei Darussalam': 'Brunei'\n",
"}\n",
"current_pop_ser = pop_df[['Country Name', '2018']].copy().replace(region_wb_jhu_map).set_index('Country Name')['2018']\n",
"data_pop_ser = current_pop_ser[current_pop_ser.index.isin(frames_map['confirmed'].index.levels[0])]"
]
},
{
"cell_type": "markdown",
"metadata": {
"papermill": {
"duration": 0.010532,
"end_time": "2020-03-13T17:58:05.172203",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:05.161671",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"source": [
"There are some regions that we cannot resolve, but we will just ignore these."
]
},
{
"cell_type": "markdown",
"metadata": {
"papermill": {
"duration": 0.008912,
"end_time": "2020-03-13T17:58:05.189883",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:05.180971",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"source": [
"# Compute rates per 100,000 for regions"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"papermill": {
"duration": 0.076031,
"end_time": "2020-03-13T17:58:05.275002",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:05.198971",
Chandrasekhar Ramakrishnan
committed
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"def cases_to_rates_df(df):\n",
" per_100000_df = df.reset_index([1, 2], drop=True)\n",
" per_100000_df = per_100000_df.div(data_pop_ser, 'index').mul(100000).dropna()\n",
" per_100000_df.index.name = 'Country/Region'\n",
" return per_100000_df\n",
" \n",
"def frames_to_rates(frames_map):\n",
" return {k: cases_to_rates_df(v) for k,v in frames_map.items()}\n",
"\n",
"\n",
"rates_map = frames_to_rates(frames_map)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"papermill": {
"duration": 0.062766,
"end_time": "2020-03-13T17:58:05.356294",
Chandrasekhar Ramakrishnan
committed
"exception": false,
"start_time": "2020-03-13T17:58:05.293528",
Chandrasekhar Ramakrishnan
committed
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"if PAPERMILL_OUTPUT_PATH:\n",
" for k, v in rates_map.items():\n",
" out_path = os.path.join(out_folder, f\"ts_rates_19-covid-{k}.csv\")\n",
" v.reset_index().to_csv(out_path)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
},
"papermill": {
"duration": 2.356374,
"end_time": "2020-03-13T17:58:05.683344",
Chandrasekhar Ramakrishnan
committed
"environment_variables": {},
"exception": null,
"input_path": "/tmp/hmyw2rom/notebooks/ToRates.ipynb",
Chandrasekhar Ramakrishnan
committed
"output_path": "runs/ToRates.run.ipynb",
"parameters": {
"PAPERMILL_INPUT_PATH": "/tmp/hmyw2rom/notebooks/ToRates.ipynb",
Chandrasekhar Ramakrishnan
committed
"PAPERMILL_OUTPUT_PATH": "runs/ToRates.run.ipynb",
"out_folder": "data/covid-19_rates",
"ts_folder": "/tmp/hmyw2rom/data/covid-19_jhu-csse",
"wb_path": "/tmp/hmyw2rom/data/worldbank/SP.POP.TOTL.zip"
Chandrasekhar Ramakrishnan
committed
},
"start_time": "2020-03-13T17:58:03.326970",
Chandrasekhar Ramakrishnan
committed
"version": "1.1.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}