From 263e5a1845ad962ab64cca45c1123352e22b8106 Mon Sep 17 00:00:00 2001 From: Chandrasekhar Ramakrishnan <cramakri@ethz.ch> Date: Sun, 15 Mar 2020 16:12:46 +0000 Subject: [PATCH] renku run papermill -p ts_folder ./data/covid-19_jhu-csse/ -p wb_path ./data/worldbank/SP.POP.TOTL.zip -p geodata_path ./data/geodata/geo_data.csv -p out_folder ./data/covid-19_rates/ --inject-paths notebooks/ToRates.ipynb runs/ToRates.run.ipynb --- ...3376f8aa4ba1a325212655d423e5_papermill.cwl | 137 ++++ .../ts_rates_19-covid-confirmed.csv | 4 +- .../ts_rates_19-covid-deaths.csv | 4 +- .../ts_rates_19-covid-recovered.csv | 4 +- runs/ToRates.run.ipynb | 752 +++++++++--------- 5 files changed, 526 insertions(+), 375 deletions(-) create mode 100644 .renku/workflow/2c413376f8aa4ba1a325212655d423e5_papermill.cwl diff --git a/.renku/workflow/2c413376f8aa4ba1a325212655d423e5_papermill.cwl b/.renku/workflow/2c413376f8aa4ba1a325212655d423e5_papermill.cwl new file mode 100644 index 00000000..89405a6a --- /dev/null +++ b/.renku/workflow/2c413376f8aa4ba1a325212655d423e5_papermill.cwl @@ -0,0 +1,137 @@ +arguments: [] +baseCommand: +- papermill +class: CommandLineTool +cwlVersion: v1.0 +hints: [] +inputs: + input_1: + default: ts_folder + inputBinding: + position: 1 + prefix: -p + separate: true + shellQuote: true + streamable: false + type: string + input_10: + default: runs/ToRates.run.ipynb + inputBinding: + position: 10 + separate: true + shellQuote: true + streamable: false + type: string + input_2: + default: + class: Directory + listing: [] + path: ../../data/covid-19_jhu-csse + inputBinding: + position: 2 + separate: true + shellQuote: true + streamable: false + type: Directory + input_3: + default: wb_path + inputBinding: + position: 3 + prefix: -p + separate: true + shellQuote: true + streamable: false + type: string + input_4: + default: + class: File + path: ../../data/worldbank/SP.POP.TOTL.zip + inputBinding: + position: 4 + separate: true + shellQuote: true + streamable: false + type: File + input_5: + default: geodata_path + inputBinding: + position: 5 + prefix: -p + separate: true + shellQuote: true + streamable: false + type: string + input_6: + default: + class: File + path: ../../data/geodata/geo_data.csv + inputBinding: + position: 6 + separate: true + shellQuote: true + streamable: false + type: File + input_7: + default: out_folder + inputBinding: + position: 7 + prefix: -p + separate: true + shellQuote: true + streamable: false + type: string + input_8: + default: data/covid-19_rates + inputBinding: + position: 8 + separate: true + shellQuote: true + streamable: false + type: string + input_9: + default: + class: File + path: ../../notebooks/ToRates.ipynb + inputBinding: + position: 9 + prefix: --inject-paths + separate: true + shellQuote: true + streamable: false + type: File +outputs: + output_0: + outputBinding: + glob: $(inputs.input_10) + streamable: false + type: File + output_1: + outputBinding: + glob: $(inputs.input_8) + streamable: false + type: Directory +permanentFailCodes: [] +requirements: +- class: InlineJavascriptRequirement +- class: InitialWorkDirRequirement + listing: + - entry: '$({"listing": [], "class": "Directory"})' + entryname: runs + writable: true + - entry: '$({"listing": [], "class": "Directory"})' + entryname: data/covid-19_rates + writable: true + - entry: $(inputs.input_2) + entryname: data/covid-19_jhu-csse + writable: false + - entry: $(inputs.input_4) + entryname: data/worldbank/SP.POP.TOTL.zip + writable: false + - entry: $(inputs.input_6) + entryname: data/geodata/geo_data.csv + writable: false + - entry: $(inputs.input_9) + entryname: notebooks/ToRates.ipynb + writable: false +successCodes: [] +temporaryFailCodes: [] diff --git a/data/covid-19_rates/ts_rates_19-covid-confirmed.csv b/data/covid-19_rates/ts_rates_19-covid-confirmed.csv index 41904d5c..5f016936 100644 --- a/data/covid-19_rates/ts_rates_19-covid-confirmed.csv +++ b/data/covid-19_rates/ts_rates_19-covid-confirmed.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8db5a5e65fa9c9cdc76ee4e1a665d74972a0c85d0b4de74aacf7426d1337dc38 -size 52089 +oid sha256:28a118a3e91b2ae986de30fa0f5710868edafea68c2f4192e4cdb78b5ba191c6 +size 63322 diff --git a/data/covid-19_rates/ts_rates_19-covid-deaths.csv b/data/covid-19_rates/ts_rates_19-covid-deaths.csv index 75a97a02..80e095d6 100644 --- a/data/covid-19_rates/ts_rates_19-covid-deaths.csv +++ b/data/covid-19_rates/ts_rates_19-covid-deaths.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6151848c812f7e97885da2cf7bd8f506ef862f9e46183e4849147d8471924577 -size 26971 +oid sha256:0ca853e24dadb8159536248c56d5a2fa03f61c189c90baaf8b981a140941e58a +size 33935 diff --git a/data/covid-19_rates/ts_rates_19-covid-recovered.csv b/data/covid-19_rates/ts_rates_19-covid-recovered.csv index 3050ac8b..e972ff4b 100644 --- a/data/covid-19_rates/ts_rates_19-covid-recovered.csv +++ b/data/covid-19_rates/ts_rates_19-covid-recovered.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb9740f1a3a3f0edb8466d11cca258d5b757b6f2638db80c9a53173607306549 -size 35550 +oid sha256:fe8e68a3ee46a5fb56080256c63795a686d06d731222fa07d54a3250df812f9e +size 44554 diff --git a/runs/ToRates.run.ipynb b/runs/ToRates.run.ipynb index e93912c7..62734fea 100644 --- a/runs/ToRates.run.ipynb +++ b/runs/ToRates.run.ipynb @@ -4,10 +4,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.031319, - "end_time": "2020-03-13T17:58:04.199968", + "duration": 0.02643, + "end_time": "2020-03-15T16:12:44.459053", "exception": false, - "start_time": "2020-03-13T17:58:04.168649", + "start_time": "2020-03-15T16:12:44.432623", "status": "completed" }, "tags": [] @@ -21,10 +21,10 @@ "execution_count": 1, "metadata": { "papermill": { - "duration": 0.365213, - "end_time": "2020-03-13T17:58:04.574239", + "duration": 0.285755, + "end_time": "2020-03-15T16:12:44.756979", "exception": false, - "start_time": "2020-03-13T17:58:04.209026", + "start_time": "2020-03-15T16:12:44.471224", "status": "completed" }, "tags": [] @@ -40,10 +40,10 @@ "execution_count": 2, "metadata": { "papermill": { - "duration": 0.018416, - "end_time": "2020-03-13T17:58:04.613366", + "duration": 0.026229, + "end_time": "2020-03-15T16:12:44.806173", "exception": false, - "start_time": "2020-03-13T17:58:04.594950", + "start_time": "2020-03-15T16:12:44.779944", "status": "completed" }, "tags": [ @@ -54,6 +54,7 @@ "source": [ "ts_folder = \"../data/covid-19_jhu-csse/\"\n", "wb_path = \"../data/worldbank/SP.POP.TOTL.zip\"\n", + "geodata_path = \"../data/geodata/geo_data.csv\"\n", "out_folder = None\n", "PAPERMILL_OUTPUT_PATH = None" ] @@ -63,10 +64,10 @@ "execution_count": 3, "metadata": { "papermill": { - "duration": 0.018935, - "end_time": "2020-03-13T17:58:04.641455", + "duration": 0.024224, + "end_time": "2020-03-15T16:12:44.844892", "exception": false, - "start_time": "2020-03-13T17:58:04.622520", + "start_time": "2020-03-15T16:12:44.820668", "status": "completed" }, "tags": [ @@ -76,21 +77,22 @@ "outputs": [], "source": [ "# Parameters\n", - "PAPERMILL_INPUT_PATH = \"/tmp/hmyw2rom/notebooks/ToRates.ipynb\"\n", + "PAPERMILL_INPUT_PATH = \"notebooks/ToRates.ipynb\"\n", "PAPERMILL_OUTPUT_PATH = \"runs/ToRates.run.ipynb\"\n", - "ts_folder = \"/tmp/hmyw2rom/data/covid-19_jhu-csse\"\n", - "wb_path = \"/tmp/hmyw2rom/data/worldbank/SP.POP.TOTL.zip\"\n", - "out_folder = \"data/covid-19_rates\"\n" + "ts_folder = \"./data/covid-19_jhu-csse/\"\n", + "wb_path = \"./data/worldbank/SP.POP.TOTL.zip\"\n", + "geodata_path = \"./data/geodata/geo_data.csv\"\n", + "out_folder = \"./data/covid-19_rates/\"\n" ] }, { "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.00793, - "end_time": "2020-03-13T17:58:04.657374", + "duration": 0.009881, + "end_time": "2020-03-15T16:12:44.866364", "exception": false, - "start_time": "2020-03-13T17:58:04.649444", + "start_time": "2020-03-15T16:12:44.856483", "status": "completed" }, "tags": [ @@ -108,10 +110,10 @@ "execution_count": 4, "metadata": { "papermill": { - "duration": 0.02791, - "end_time": "2020-03-13T17:58:04.692567", + "duration": 0.031271, + "end_time": "2020-03-15T16:12:44.906787", "exception": false, - "start_time": "2020-03-13T17:58:04.664657", + "start_time": "2020-03-15T16:12:44.875516", "status": "completed" }, "tags": [] @@ -133,10 +135,10 @@ "execution_count": 5, "metadata": { "papermill": { - "duration": 0.127122, - "end_time": "2020-03-13T17:58:04.831576", + "duration": 0.13379, + "end_time": "2020-03-15T16:12:45.055605", "exception": false, - "start_time": "2020-03-13T17:58:04.704454", + "start_time": "2020-03-15T16:12:44.921815", "status": "completed" }, "tags": [] @@ -150,305 +152,14 @@ "}" ] }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "papermill": { - "duration": 0.052319, - "end_time": "2020-03-13T17:58:04.901105", - "exception": false, - "start_time": "2020-03-13T17:58:04.848786", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th>2020-01-22</th>\n", - " <th>2020-01-23</th>\n", - " <th>2020-01-24</th>\n", - " <th>2020-01-25</th>\n", - " <th>2020-01-26</th>\n", - " <th>2020-01-27</th>\n", - " <th>2020-01-28</th>\n", - " <th>2020-01-29</th>\n", - " <th>2020-01-30</th>\n", - " <th>2020-01-31</th>\n", - " <th>...</th>\n", - " <th>2020-03-03</th>\n", - " <th>2020-03-04</th>\n", - " <th>2020-03-05</th>\n", - " <th>2020-03-06</th>\n", - " <th>2020-03-07</th>\n", - " <th>2020-03-08</th>\n", - " <th>2020-03-09</th>\n", - " <th>2020-03-10</th>\n", - " <th>2020-03-11</th>\n", - " <th>2020-03-12</th>\n", - " </tr>\n", - " <tr>\n", - " <th>Country/Region</th>\n", - " <th>Long</th>\n", - " <th>Lat</th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>China</th>\n", - " <th>111.649082</th>\n", - " <th>32.828385</th>\n", - " <td>548</td>\n", - " <td>643</td>\n", - " <td>920</td>\n", - " <td>1406</td>\n", - " <td>2075</td>\n", - " <td>2877</td>\n", - " <td>5509</td>\n", - " <td>6087</td>\n", - " <td>8141</td>\n", - " <td>9802</td>\n", - " <td>...</td>\n", - " <td>80261</td>\n", - " <td>80386</td>\n", - " <td>80537</td>\n", - " <td>80690</td>\n", - " <td>80770</td>\n", - " <td>80823</td>\n", - " <td>80860</td>\n", - " <td>80887</td>\n", - " <td>80921</td>\n", - " <td>80932</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Italy</th>\n", - " <th>12.000000</th>\n", - " <th>43.000000</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>2</td>\n", - " <td>...</td>\n", - " <td>2502</td>\n", - " <td>3089</td>\n", - " <td>3858</td>\n", - " <td>4636</td>\n", - " <td>5883</td>\n", - " <td>7375</td>\n", - " <td>9172</td>\n", - " <td>10149</td>\n", - " <td>12462</td>\n", - " <td>12462</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Iran</th>\n", - " <th>53.000000</th>\n", - " <th>32.000000</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>...</td>\n", - " <td>2336</td>\n", - " <td>2922</td>\n", - " <td>3513</td>\n", - " <td>4747</td>\n", - " <td>5823</td>\n", - " <td>6566</td>\n", - " <td>7161</td>\n", - " <td>8042</td>\n", - " <td>9000</td>\n", - " <td>10075</td>\n", - " </tr>\n", - " <tr>\n", - " <th>Korea, South</th>\n", - " <th>128.000000</th>\n", - " <th>36.000000</th>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>2</td>\n", - " <td>3</td>\n", - " <td>4</td>\n", - " <td>4</td>\n", - " <td>4</td>\n", - " <td>4</td>\n", - " <td>11</td>\n", - " <td>...</td>\n", - " <td>5186</td>\n", - " <td>5621</td>\n", - " <td>6088</td>\n", - " <td>6593</td>\n", - " <td>7041</td>\n", - " <td>7314</td>\n", - " <td>7478</td>\n", - " <td>7513</td>\n", - " <td>7755</td>\n", - " <td>7869</td>\n", - " </tr>\n", - " <tr>\n", - " <th>France</th>\n", - " <th>-41.223233</th>\n", - " <th>27.399467</th>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>2</td>\n", - " <td>3</td>\n", - " <td>3</td>\n", - " <td>3</td>\n", - " <td>4</td>\n", - " <td>5</td>\n", - " <td>5</td>\n", - " <td>5</td>\n", - " <td>...</td>\n", - " <td>204</td>\n", - " <td>288</td>\n", - " <td>380</td>\n", - " <td>656</td>\n", - " <td>952</td>\n", - " <td>1129</td>\n", - " <td>1212</td>\n", - " <td>1787</td>\n", - " <td>2284</td>\n", - " <td>2284</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>5 rows × 51 columns</p>\n", - "</div>" - ], - "text/plain": [ - " 2020-01-22 2020-01-23 2020-01-24 \\\n", - "Country/Region Long Lat \n", - "China 111.649082 32.828385 548 643 920 \n", - "Italy 12.000000 43.000000 0 0 0 \n", - "Iran 53.000000 32.000000 0 0 0 \n", - "Korea, South 128.000000 36.000000 1 1 2 \n", - "France -41.223233 27.399467 0 0 2 \n", - "\n", - " 2020-01-25 2020-01-26 2020-01-27 \\\n", - "Country/Region Long Lat \n", - "China 111.649082 32.828385 1406 2075 2877 \n", - "Italy 12.000000 43.000000 0 0 0 \n", - "Iran 53.000000 32.000000 0 0 0 \n", - "Korea, South 128.000000 36.000000 2 3 4 \n", - "France -41.223233 27.399467 3 3 3 \n", - "\n", - " 2020-01-28 2020-01-29 2020-01-30 \\\n", - "Country/Region Long Lat \n", - "China 111.649082 32.828385 5509 6087 8141 \n", - "Italy 12.000000 43.000000 0 0 0 \n", - "Iran 53.000000 32.000000 0 0 0 \n", - "Korea, South 128.000000 36.000000 4 4 4 \n", - "France -41.223233 27.399467 4 5 5 \n", - "\n", - " 2020-01-31 ... 2020-03-03 2020-03-04 \\\n", - "Country/Region Long Lat ... \n", - "China 111.649082 32.828385 9802 ... 80261 80386 \n", - "Italy 12.000000 43.000000 2 ... 2502 3089 \n", - "Iran 53.000000 32.000000 0 ... 2336 2922 \n", - "Korea, South 128.000000 36.000000 11 ... 5186 5621 \n", - "France -41.223233 27.399467 5 ... 204 288 \n", - "\n", - " 2020-03-05 2020-03-06 2020-03-07 \\\n", - "Country/Region Long Lat \n", - "China 111.649082 32.828385 80537 80690 80770 \n", - "Italy 12.000000 43.000000 3858 4636 5883 \n", - "Iran 53.000000 32.000000 3513 4747 5823 \n", - "Korea, South 128.000000 36.000000 6088 6593 7041 \n", - "France -41.223233 27.399467 380 656 952 \n", - "\n", - " 2020-03-08 2020-03-09 2020-03-10 \\\n", - "Country/Region Long Lat \n", - "China 111.649082 32.828385 80823 80860 80887 \n", - "Italy 12.000000 43.000000 7375 9172 10149 \n", - "Iran 53.000000 32.000000 6566 7161 8042 \n", - "Korea, South 128.000000 36.000000 7314 7478 7513 \n", - "France -41.223233 27.399467 1129 1212 1787 \n", - "\n", - " 2020-03-11 2020-03-12 \n", - "Country/Region Long Lat \n", - "China 111.649082 32.828385 80921 80932 \n", - "Italy 12.000000 43.000000 12462 12462 \n", - "Iran 53.000000 32.000000 9000 10075 \n", - "Korea, South 128.000000 36.000000 7755 7869 \n", - "France -41.223233 27.399467 2284 2284 \n", - "\n", - "[5 rows x 51 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "frames_map['confirmed'].sort_values(frames_map['confirmed'].columns[-1], ascending=False).head()" - ] - }, { "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.012028, - "end_time": "2020-03-13T17:58:04.932580", + "duration": 0.010981, + "end_time": "2020-03-15T16:12:45.086595", "exception": false, - "start_time": "2020-03-13T17:58:04.920552", + "start_time": "2020-03-15T16:12:45.075614", "status": "completed" }, "tags": [] @@ -459,13 +170,13 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": { "papermill": { - "duration": 0.044223, - "end_time": "2020-03-13T17:58:04.984764", + "duration": 0.046657, + "end_time": "2020-03-15T16:12:45.142594", "exception": false, - "start_time": "2020-03-13T17:58:04.940541", + "start_time": "2020-03-15T16:12:45.095937", "status": "completed" }, "tags": [] @@ -481,10 +192,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.010228, - "end_time": "2020-03-13T17:58:05.011248", + "duration": 0.011166, + "end_time": "2020-03-15T16:12:45.175964", "exception": false, - "start_time": "2020-03-13T17:58:05.001020", + "start_time": "2020-03-15T16:12:45.164798", "status": "completed" }, "tags": [] @@ -495,13 +206,13 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": { "papermill": { - "duration": 0.052242, - "end_time": "2020-03-13T17:58:05.071551", + "duration": 0.066877, + "end_time": "2020-03-15T16:12:45.252472", "exception": false, - "start_time": "2020-03-13T17:58:05.019309", + "start_time": "2020-03-15T16:12:45.185595", "status": "completed" }, "tags": [] @@ -621,7 +332,7 @@ "[2 rows x 65 columns]" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -634,10 +345,10 @@ "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.009568, - "end_time": "2020-03-13T17:58:05.097349", + "duration": 0.010445, + "end_time": "2020-03-15T16:12:45.282710", "exception": false, - "start_time": "2020-03-13T17:58:05.087781", + "start_time": "2020-03-15T16:12:45.272265", "status": "completed" }, "tags": [] @@ -648,13 +359,13 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": { "papermill": { - "duration": 0.038944, - "end_time": "2020-03-13T17:58:05.144958", + "duration": 0.039483, + "end_time": "2020-03-15T16:12:45.332210", "exception": false, - "start_time": "2020-03-13T17:58:05.106014", + "start_time": "2020-03-15T16:12:45.292727", "status": "completed" }, "tags": [] @@ -662,32 +373,49 @@ "outputs": [], "source": [ "region_wb_jhu_map = {\n", - " 'China': 'Mainland China',\n", - " 'Iran, Islamic Rep.': 'Iran (Islamic Republic of)',\n", - " 'Korea, Rep.': 'Republic of Korea',\n", - " 'United States': 'US',\n", - " 'United Kingdom': 'UK',\n", - " 'Hong Kong SAR, China': 'Hong Kong SAR',\n", + " 'Brunei Darussalam': 'Brunei',\n", + " 'Czech Republic': 'Czechia',\n", " 'Egypt, Arab Rep.': 'Egypt',\n", - " 'Vietnam': 'Viet Nam',\n", + " 'Hong Kong SAR, China': 'Hong Kong SAR',\n", + " 'Iran, Islamic Rep.': 'Iran',\n", + " 'Korea, Rep.': 'Korea, South',\n", " 'Macao SAR, China': 'Macao SAR',\n", + " 'Russian Federation': 'Russia',\n", " 'Slovak Republic': 'Slovakia',\n", - " 'Moldova': 'Republic of Moldova',\n", " 'St. Martin (French part)': 'Saint Martin',\n", - " 'Brunei Darussalam': 'Brunei'\n", + " 'United States': 'US'\n", "}\n", "current_pop_ser = pop_df[['Country Name', '2018']].copy().replace(region_wb_jhu_map).set_index('Country Name')['2018']\n", "data_pop_ser = current_pop_ser[current_pop_ser.index.isin(frames_map['confirmed'].index.levels[0])]" ] }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "papermill": { + "duration": 0.021992, + "end_time": "2020-03-15T16:12:45.373057", + "exception": false, + "start_time": "2020-03-15T16:12:45.351065", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Use this to find the name in the series\n", + "# current_pop_ser[current_pop_ser.index.str.contains('Czech')]" + ] + }, { "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.010532, - "end_time": "2020-03-13T17:58:05.172203", + "duration": 0.009838, + "end_time": "2020-03-15T16:12:45.395040", "exception": false, - "start_time": "2020-03-13T17:58:05.161671", + "start_time": "2020-03-15T16:12:45.385202", "status": "completed" }, "tags": [] @@ -696,14 +424,299 @@ "There are some regions that we cannot resolve, but we will just ignore these." ] }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "papermill": { + "duration": 0.043595, + "end_time": "2020-03-15T16:12:45.448638", + "exception": false, + "start_time": "2020-03-15T16:12:45.405043", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th>2020-03-12 00:00:00</th>\n", + " <th>2020-03-13 00:00:00</th>\n", + " </tr>\n", + " <tr>\n", + " <th>Country/Region</th>\n", + " <th>Long</th>\n", + " <th>Lat</th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Congo (Kinshasa)</th>\n", + " <th>21.7587</th>\n", + " <th>-4.0383</th>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Cruise Ship</th>\n", + " <th>139.6380</th>\n", + " <th>35.4437</th>\n", + " <td>696</td>\n", + " <td>696</td>\n", + " </tr>\n", + " <tr>\n", + " <th>French Guiana</th>\n", + " <th>-53.1258</th>\n", + " <th>3.9339</th>\n", + " <td>5</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Guadeloupe</th>\n", + " <th>-61.5510</th>\n", + " <th>16.2650</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Holy See</th>\n", + " <th>12.4534</th>\n", + " <th>41.9029</th>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Martinique</th>\n", + " <th>-61.0242</th>\n", + " <th>14.6415</th>\n", + " <td>3</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Reunion</th>\n", + " <th>55.5364</th>\n", + " <th>-21.1151</th>\n", + " <td>1</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Taiwan*</th>\n", + " <th>121.0000</th>\n", + " <th>23.7000</th>\n", + " <td>49</td>\n", + " <td>50</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " 2020-03-12 00:00:00 2020-03-13 00:00:00\n", + "Country/Region Long Lat \n", + "Congo (Kinshasa) 21.7587 -4.0383 1 2\n", + "Cruise Ship 139.6380 35.4437 696 696\n", + "French Guiana -53.1258 3.9339 5 5\n", + "Guadeloupe -61.5510 16.2650 0 1\n", + "Holy See 12.4534 41.9029 1 1\n", + "Martinique -61.0242 14.6415 3 3\n", + "Reunion 55.5364 -21.1151 1 5\n", + "Taiwan* 121.0000 23.7000 49 50" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "frames_map['confirmed'].loc[\n", + " frames_map['confirmed'].index.levels[0].isin(data_pop_ser.index) == False\n", + "].iloc[:,-2:]" + ] + }, { "cell_type": "markdown", "metadata": { "papermill": { - "duration": 0.008912, - "end_time": "2020-03-13T17:58:05.189883", + "duration": 0.011436, + "end_time": "2020-03-15T16:12:45.477570", "exception": false, - "start_time": "2020-03-13T17:58:05.180971", + "start_time": "2020-03-15T16:12:45.466134", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Read in geodata to get additional population numbers" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "papermill": { + "duration": 0.038201, + "end_time": "2020-03-15T16:12:45.525965", + "exception": false, + "start_time": "2020-03-15T16:12:45.487764", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "geodata_df = pd.read_csv(geodata_path).drop('Unnamed: 0', axis=1).set_index('name_jhu')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.011731, + "end_time": "2020-03-15T16:12:45.559855", + "exception": false, + "start_time": "2020-03-15T16:12:45.548124", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Add in populations for missing countries" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "papermill": { + "duration": 0.054112, + "end_time": "2020-03-15T16:12:45.625087", + "exception": false, + "start_time": "2020-03-15T16:12:45.570975", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>name</th>\n", + " <th>name_long</th>\n", + " <th>region_un</th>\n", + " <th>subregion</th>\n", + " <th>region_wb</th>\n", + " <th>pop_est</th>\n", + " <th>gdp_md_est</th>\n", + " <th>income_grp</th>\n", + " <th>Longitude</th>\n", + " <th>Latitude</th>\n", + " </tr>\n", + " <tr>\n", + " <th>name_jhu</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Taiwan*</th>\n", + " <td>Taiwan</td>\n", + " <td>Taiwan</td>\n", + " <td>Asia</td>\n", + " <td>Eastern Asia</td>\n", + " <td>East Asia & Pacific</td>\n", + " <td>22974347</td>\n", + " <td>712000.0</td>\n", + " <td>2. High income: nonOECD</td>\n", + " <td>120.954273</td>\n", + " <td>23.753993</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " name name_long region_un subregion region_wb \\\n", + "name_jhu \n", + "Taiwan* Taiwan Taiwan Asia Eastern Asia East Asia & Pacific \n", + "\n", + " pop_est gdp_md_est income_grp Longitude Latitude \n", + "name_jhu \n", + "Taiwan* 22974347 712000.0 2. High income: nonOECD 120.954273 23.753993 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "missing_countries = frames_map['confirmed'].loc[\n", + " frames_map['confirmed'].index.levels[0].isin(data_pop_ser.index) == False\n", + "].iloc[:,-2:].reset_index()['Country/Region']\n", + "\n", + "display(geodata_df.loc[geodata_df.index.isin(missing_countries)])\n", + "\n", + "data_pop_ser = data_pop_ser.append(geodata_df.loc[geodata_df.index.isin(missing_countries), 'pop_est'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.011426, + "end_time": "2020-03-15T16:12:45.657075", + "exception": false, + "start_time": "2020-03-15T16:12:45.645649", "status": "completed" }, "tags": [] @@ -714,13 +727,13 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 13, "metadata": { "papermill": { - "duration": 0.076031, - "end_time": "2020-03-13T17:58:05.275002", + "duration": 0.082905, + "end_time": "2020-03-15T16:12:45.750274", "exception": false, - "start_time": "2020-03-13T17:58:05.198971", + "start_time": "2020-03-15T16:12:45.667369", "status": "completed" }, "tags": [] @@ -742,13 +755,13 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 14, "metadata": { "papermill": { - "duration": 0.062766, - "end_time": "2020-03-13T17:58:05.356294", + "duration": 0.057388, + "end_time": "2020-03-15T16:12:45.829536", "exception": false, - "start_time": "2020-03-13T17:58:05.293528", + "start_time": "2020-03-15T16:12:45.772148", "status": "completed" }, "tags": [] @@ -781,20 +794,21 @@ "version": "3.7.6" }, "papermill": { - "duration": 2.356374, - "end_time": "2020-03-13T17:58:05.683344", + "duration": 2.578814, + "end_time": "2020-03-15T16:12:46.163138", "environment_variables": {}, "exception": null, - "input_path": "/tmp/hmyw2rom/notebooks/ToRates.ipynb", + "input_path": "notebooks/ToRates.ipynb", "output_path": "runs/ToRates.run.ipynb", "parameters": { - "PAPERMILL_INPUT_PATH": "/tmp/hmyw2rom/notebooks/ToRates.ipynb", + "PAPERMILL_INPUT_PATH": "notebooks/ToRates.ipynb", "PAPERMILL_OUTPUT_PATH": "runs/ToRates.run.ipynb", - "out_folder": "data/covid-19_rates", - "ts_folder": "/tmp/hmyw2rom/data/covid-19_jhu-csse", - "wb_path": "/tmp/hmyw2rom/data/worldbank/SP.POP.TOTL.zip" + "geodata_path": "./data/geodata/geo_data.csv", + "out_folder": "./data/covid-19_rates/", + "ts_folder": "./data/covid-19_jhu-csse/", + "wb_path": "./data/worldbank/SP.POP.TOTL.zip" }, - "start_time": "2020-03-13T17:58:03.326970", + "start_time": "2020-03-15T16:12:43.584324", "version": "1.1.0" } }, -- GitLab