From 263e5a1845ad962ab64cca45c1123352e22b8106 Mon Sep 17 00:00:00 2001
From: Chandrasekhar Ramakrishnan <cramakri@ethz.ch>
Date: Sun, 15 Mar 2020 16:12:46 +0000
Subject: [PATCH] renku run papermill -p ts_folder ./data/covid-19_jhu-csse/ -p
 wb_path ./data/worldbank/SP.POP.TOTL.zip -p geodata_path
 ./data/geodata/geo_data.csv -p out_folder ./data/covid-19_rates/
 --inject-paths notebooks/ToRates.ipynb runs/ToRates.run.ipynb

---
 ...3376f8aa4ba1a325212655d423e5_papermill.cwl | 137 ++++
 .../ts_rates_19-covid-confirmed.csv           |   4 +-
 .../ts_rates_19-covid-deaths.csv              |   4 +-
 .../ts_rates_19-covid-recovered.csv           |   4 +-
 runs/ToRates.run.ipynb                        | 752 +++++++++---------
 5 files changed, 526 insertions(+), 375 deletions(-)
 create mode 100644 .renku/workflow/2c413376f8aa4ba1a325212655d423e5_papermill.cwl

diff --git a/.renku/workflow/2c413376f8aa4ba1a325212655d423e5_papermill.cwl b/.renku/workflow/2c413376f8aa4ba1a325212655d423e5_papermill.cwl
new file mode 100644
index 00000000..89405a6a
--- /dev/null
+++ b/.renku/workflow/2c413376f8aa4ba1a325212655d423e5_papermill.cwl
@@ -0,0 +1,137 @@
+arguments: []
+baseCommand:
+- papermill
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default: ts_folder
+    inputBinding:
+      position: 1
+      prefix: -p
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+  input_10:
+    default: runs/ToRates.run.ipynb
+    inputBinding:
+      position: 10
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+  input_2:
+    default:
+      class: Directory
+      listing: []
+      path: ../../data/covid-19_jhu-csse
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: Directory
+  input_3:
+    default: wb_path
+    inputBinding:
+      position: 3
+      prefix: -p
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+  input_4:
+    default:
+      class: File
+      path: ../../data/worldbank/SP.POP.TOTL.zip
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default: geodata_path
+    inputBinding:
+      position: 5
+      prefix: -p
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+  input_6:
+    default:
+      class: File
+      path: ../../data/geodata/geo_data.csv
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: out_folder
+    inputBinding:
+      position: 7
+      prefix: -p
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+  input_8:
+    default: data/covid-19_rates
+    inputBinding:
+      position: 8
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+  input_9:
+    default:
+      class: File
+      path: ../../notebooks/ToRates.ipynb
+    inputBinding:
+      position: 9
+      prefix: --inject-paths
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_10)
+    streamable: false
+    type: File
+  output_1:
+    outputBinding:
+      glob: $(inputs.input_8)
+    streamable: false
+    type: Directory
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: runs
+    writable: true
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/covid-19_rates
+    writable: true
+  - entry: $(inputs.input_2)
+    entryname: data/covid-19_jhu-csse
+    writable: false
+  - entry: $(inputs.input_4)
+    entryname: data/worldbank/SP.POP.TOTL.zip
+    writable: false
+  - entry: $(inputs.input_6)
+    entryname: data/geodata/geo_data.csv
+    writable: false
+  - entry: $(inputs.input_9)
+    entryname: notebooks/ToRates.ipynb
+    writable: false
+successCodes: []
+temporaryFailCodes: []
diff --git a/data/covid-19_rates/ts_rates_19-covid-confirmed.csv b/data/covid-19_rates/ts_rates_19-covid-confirmed.csv
index 41904d5c..5f016936 100644
--- a/data/covid-19_rates/ts_rates_19-covid-confirmed.csv
+++ b/data/covid-19_rates/ts_rates_19-covid-confirmed.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8db5a5e65fa9c9cdc76ee4e1a665d74972a0c85d0b4de74aacf7426d1337dc38
-size 52089
+oid sha256:28a118a3e91b2ae986de30fa0f5710868edafea68c2f4192e4cdb78b5ba191c6
+size 63322
diff --git a/data/covid-19_rates/ts_rates_19-covid-deaths.csv b/data/covid-19_rates/ts_rates_19-covid-deaths.csv
index 75a97a02..80e095d6 100644
--- a/data/covid-19_rates/ts_rates_19-covid-deaths.csv
+++ b/data/covid-19_rates/ts_rates_19-covid-deaths.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6151848c812f7e97885da2cf7bd8f506ef862f9e46183e4849147d8471924577
-size 26971
+oid sha256:0ca853e24dadb8159536248c56d5a2fa03f61c189c90baaf8b981a140941e58a
+size 33935
diff --git a/data/covid-19_rates/ts_rates_19-covid-recovered.csv b/data/covid-19_rates/ts_rates_19-covid-recovered.csv
index 3050ac8b..e972ff4b 100644
--- a/data/covid-19_rates/ts_rates_19-covid-recovered.csv
+++ b/data/covid-19_rates/ts_rates_19-covid-recovered.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb9740f1a3a3f0edb8466d11cca258d5b757b6f2638db80c9a53173607306549
-size 35550
+oid sha256:fe8e68a3ee46a5fb56080256c63795a686d06d731222fa07d54a3250df812f9e
+size 44554
diff --git a/runs/ToRates.run.ipynb b/runs/ToRates.run.ipynb
index e93912c7..62734fea 100644
--- a/runs/ToRates.run.ipynb
+++ b/runs/ToRates.run.ipynb
@@ -4,10 +4,10 @@
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.031319,
-     "end_time": "2020-03-13T17:58:04.199968",
+     "duration": 0.02643,
+     "end_time": "2020-03-15T16:12:44.459053",
      "exception": false,
-     "start_time": "2020-03-13T17:58:04.168649",
+     "start_time": "2020-03-15T16:12:44.432623",
      "status": "completed"
     },
     "tags": []
@@ -21,10 +21,10 @@
    "execution_count": 1,
    "metadata": {
     "papermill": {
-     "duration": 0.365213,
-     "end_time": "2020-03-13T17:58:04.574239",
+     "duration": 0.285755,
+     "end_time": "2020-03-15T16:12:44.756979",
      "exception": false,
-     "start_time": "2020-03-13T17:58:04.209026",
+     "start_time": "2020-03-15T16:12:44.471224",
      "status": "completed"
     },
     "tags": []
@@ -40,10 +40,10 @@
    "execution_count": 2,
    "metadata": {
     "papermill": {
-     "duration": 0.018416,
-     "end_time": "2020-03-13T17:58:04.613366",
+     "duration": 0.026229,
+     "end_time": "2020-03-15T16:12:44.806173",
      "exception": false,
-     "start_time": "2020-03-13T17:58:04.594950",
+     "start_time": "2020-03-15T16:12:44.779944",
      "status": "completed"
     },
     "tags": [
@@ -54,6 +54,7 @@
    "source": [
     "ts_folder = \"../data/covid-19_jhu-csse/\"\n",
     "wb_path = \"../data/worldbank/SP.POP.TOTL.zip\"\n",
+    "geodata_path = \"../data/geodata/geo_data.csv\"\n",
     "out_folder = None\n",
     "PAPERMILL_OUTPUT_PATH = None"
    ]
@@ -63,10 +64,10 @@
    "execution_count": 3,
    "metadata": {
     "papermill": {
-     "duration": 0.018935,
-     "end_time": "2020-03-13T17:58:04.641455",
+     "duration": 0.024224,
+     "end_time": "2020-03-15T16:12:44.844892",
      "exception": false,
-     "start_time": "2020-03-13T17:58:04.622520",
+     "start_time": "2020-03-15T16:12:44.820668",
      "status": "completed"
     },
     "tags": [
@@ -76,21 +77,22 @@
    "outputs": [],
    "source": [
     "# Parameters\n",
-    "PAPERMILL_INPUT_PATH = \"/tmp/hmyw2rom/notebooks/ToRates.ipynb\"\n",
+    "PAPERMILL_INPUT_PATH = \"notebooks/ToRates.ipynb\"\n",
     "PAPERMILL_OUTPUT_PATH = \"runs/ToRates.run.ipynb\"\n",
-    "ts_folder = \"/tmp/hmyw2rom/data/covid-19_jhu-csse\"\n",
-    "wb_path = \"/tmp/hmyw2rom/data/worldbank/SP.POP.TOTL.zip\"\n",
-    "out_folder = \"data/covid-19_rates\"\n"
+    "ts_folder = \"./data/covid-19_jhu-csse/\"\n",
+    "wb_path = \"./data/worldbank/SP.POP.TOTL.zip\"\n",
+    "geodata_path = \"./data/geodata/geo_data.csv\"\n",
+    "out_folder = \"./data/covid-19_rates/\"\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.00793,
-     "end_time": "2020-03-13T17:58:04.657374",
+     "duration": 0.009881,
+     "end_time": "2020-03-15T16:12:44.866364",
      "exception": false,
-     "start_time": "2020-03-13T17:58:04.649444",
+     "start_time": "2020-03-15T16:12:44.856483",
      "status": "completed"
     },
     "tags": [
@@ -108,10 +110,10 @@
    "execution_count": 4,
    "metadata": {
     "papermill": {
-     "duration": 0.02791,
-     "end_time": "2020-03-13T17:58:04.692567",
+     "duration": 0.031271,
+     "end_time": "2020-03-15T16:12:44.906787",
      "exception": false,
-     "start_time": "2020-03-13T17:58:04.664657",
+     "start_time": "2020-03-15T16:12:44.875516",
      "status": "completed"
     },
     "tags": []
@@ -133,10 +135,10 @@
    "execution_count": 5,
    "metadata": {
     "papermill": {
-     "duration": 0.127122,
-     "end_time": "2020-03-13T17:58:04.831576",
+     "duration": 0.13379,
+     "end_time": "2020-03-15T16:12:45.055605",
      "exception": false,
-     "start_time": "2020-03-13T17:58:04.704454",
+     "start_time": "2020-03-15T16:12:44.921815",
      "status": "completed"
     },
     "tags": []
@@ -150,305 +152,14 @@
     "}"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "papermill": {
-     "duration": 0.052319,
-     "end_time": "2020-03-13T17:58:04.901105",
-     "exception": false,
-     "start_time": "2020-03-13T17:58:04.848786",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th>2020-01-22</th>\n",
-       "      <th>2020-01-23</th>\n",
-       "      <th>2020-01-24</th>\n",
-       "      <th>2020-01-25</th>\n",
-       "      <th>2020-01-26</th>\n",
-       "      <th>2020-01-27</th>\n",
-       "      <th>2020-01-28</th>\n",
-       "      <th>2020-01-29</th>\n",
-       "      <th>2020-01-30</th>\n",
-       "      <th>2020-01-31</th>\n",
-       "      <th>...</th>\n",
-       "      <th>2020-03-03</th>\n",
-       "      <th>2020-03-04</th>\n",
-       "      <th>2020-03-05</th>\n",
-       "      <th>2020-03-06</th>\n",
-       "      <th>2020-03-07</th>\n",
-       "      <th>2020-03-08</th>\n",
-       "      <th>2020-03-09</th>\n",
-       "      <th>2020-03-10</th>\n",
-       "      <th>2020-03-11</th>\n",
-       "      <th>2020-03-12</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>Country/Region</th>\n",
-       "      <th>Long</th>\n",
-       "      <th>Lat</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>China</th>\n",
-       "      <th>111.649082</th>\n",
-       "      <th>32.828385</th>\n",
-       "      <td>548</td>\n",
-       "      <td>643</td>\n",
-       "      <td>920</td>\n",
-       "      <td>1406</td>\n",
-       "      <td>2075</td>\n",
-       "      <td>2877</td>\n",
-       "      <td>5509</td>\n",
-       "      <td>6087</td>\n",
-       "      <td>8141</td>\n",
-       "      <td>9802</td>\n",
-       "      <td>...</td>\n",
-       "      <td>80261</td>\n",
-       "      <td>80386</td>\n",
-       "      <td>80537</td>\n",
-       "      <td>80690</td>\n",
-       "      <td>80770</td>\n",
-       "      <td>80823</td>\n",
-       "      <td>80860</td>\n",
-       "      <td>80887</td>\n",
-       "      <td>80921</td>\n",
-       "      <td>80932</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>Italy</th>\n",
-       "      <th>12.000000</th>\n",
-       "      <th>43.000000</th>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>2502</td>\n",
-       "      <td>3089</td>\n",
-       "      <td>3858</td>\n",
-       "      <td>4636</td>\n",
-       "      <td>5883</td>\n",
-       "      <td>7375</td>\n",
-       "      <td>9172</td>\n",
-       "      <td>10149</td>\n",
-       "      <td>12462</td>\n",
-       "      <td>12462</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>Iran</th>\n",
-       "      <th>53.000000</th>\n",
-       "      <th>32.000000</th>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>2336</td>\n",
-       "      <td>2922</td>\n",
-       "      <td>3513</td>\n",
-       "      <td>4747</td>\n",
-       "      <td>5823</td>\n",
-       "      <td>6566</td>\n",
-       "      <td>7161</td>\n",
-       "      <td>8042</td>\n",
-       "      <td>9000</td>\n",
-       "      <td>10075</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>Korea, South</th>\n",
-       "      <th>128.000000</th>\n",
-       "      <th>36.000000</th>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>4</td>\n",
-       "      <td>4</td>\n",
-       "      <td>4</td>\n",
-       "      <td>4</td>\n",
-       "      <td>11</td>\n",
-       "      <td>...</td>\n",
-       "      <td>5186</td>\n",
-       "      <td>5621</td>\n",
-       "      <td>6088</td>\n",
-       "      <td>6593</td>\n",
-       "      <td>7041</td>\n",
-       "      <td>7314</td>\n",
-       "      <td>7478</td>\n",
-       "      <td>7513</td>\n",
-       "      <td>7755</td>\n",
-       "      <td>7869</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>France</th>\n",
-       "      <th>-41.223233</th>\n",
-       "      <th>27.399467</th>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>4</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "      <td>...</td>\n",
-       "      <td>204</td>\n",
-       "      <td>288</td>\n",
-       "      <td>380</td>\n",
-       "      <td>656</td>\n",
-       "      <td>952</td>\n",
-       "      <td>1129</td>\n",
-       "      <td>1212</td>\n",
-       "      <td>1787</td>\n",
-       "      <td>2284</td>\n",
-       "      <td>2284</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 51 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                      2020-01-22  2020-01-23  2020-01-24  \\\n",
-       "Country/Region Long        Lat                                             \n",
-       "China           111.649082 32.828385         548         643         920   \n",
-       "Italy           12.000000  43.000000           0           0           0   \n",
-       "Iran            53.000000  32.000000           0           0           0   \n",
-       "Korea, South    128.000000 36.000000           1           1           2   \n",
-       "France         -41.223233  27.399467           0           0           2   \n",
-       "\n",
-       "                                      2020-01-25  2020-01-26  2020-01-27  \\\n",
-       "Country/Region Long        Lat                                             \n",
-       "China           111.649082 32.828385        1406        2075        2877   \n",
-       "Italy           12.000000  43.000000           0           0           0   \n",
-       "Iran            53.000000  32.000000           0           0           0   \n",
-       "Korea, South    128.000000 36.000000           2           3           4   \n",
-       "France         -41.223233  27.399467           3           3           3   \n",
-       "\n",
-       "                                      2020-01-28  2020-01-29  2020-01-30  \\\n",
-       "Country/Region Long        Lat                                             \n",
-       "China           111.649082 32.828385        5509        6087        8141   \n",
-       "Italy           12.000000  43.000000           0           0           0   \n",
-       "Iran            53.000000  32.000000           0           0           0   \n",
-       "Korea, South    128.000000 36.000000           4           4           4   \n",
-       "France         -41.223233  27.399467           4           5           5   \n",
-       "\n",
-       "                                      2020-01-31  ...  2020-03-03  2020-03-04  \\\n",
-       "Country/Region Long        Lat                    ...                           \n",
-       "China           111.649082 32.828385        9802  ...       80261       80386   \n",
-       "Italy           12.000000  43.000000           2  ...        2502        3089   \n",
-       "Iran            53.000000  32.000000           0  ...        2336        2922   \n",
-       "Korea, South    128.000000 36.000000          11  ...        5186        5621   \n",
-       "France         -41.223233  27.399467           5  ...         204         288   \n",
-       "\n",
-       "                                      2020-03-05  2020-03-06  2020-03-07  \\\n",
-       "Country/Region Long        Lat                                             \n",
-       "China           111.649082 32.828385       80537       80690       80770   \n",
-       "Italy           12.000000  43.000000        3858        4636        5883   \n",
-       "Iran            53.000000  32.000000        3513        4747        5823   \n",
-       "Korea, South    128.000000 36.000000        6088        6593        7041   \n",
-       "France         -41.223233  27.399467         380         656         952   \n",
-       "\n",
-       "                                      2020-03-08  2020-03-09  2020-03-10  \\\n",
-       "Country/Region Long        Lat                                             \n",
-       "China           111.649082 32.828385       80823       80860       80887   \n",
-       "Italy           12.000000  43.000000        7375        9172       10149   \n",
-       "Iran            53.000000  32.000000        6566        7161        8042   \n",
-       "Korea, South    128.000000 36.000000        7314        7478        7513   \n",
-       "France         -41.223233  27.399467        1129        1212        1787   \n",
-       "\n",
-       "                                      2020-03-11  2020-03-12  \n",
-       "Country/Region Long        Lat                                \n",
-       "China           111.649082 32.828385       80921       80932  \n",
-       "Italy           12.000000  43.000000       12462       12462  \n",
-       "Iran            53.000000  32.000000        9000       10075  \n",
-       "Korea, South    128.000000 36.000000        7755        7869  \n",
-       "France         -41.223233  27.399467        2284        2284  \n",
-       "\n",
-       "[5 rows x 51 columns]"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "frames_map['confirmed'].sort_values(frames_map['confirmed'].columns[-1], ascending=False).head()"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.012028,
-     "end_time": "2020-03-13T17:58:04.932580",
+     "duration": 0.010981,
+     "end_time": "2020-03-15T16:12:45.086595",
      "exception": false,
-     "start_time": "2020-03-13T17:58:04.920552",
+     "start_time": "2020-03-15T16:12:45.075614",
      "status": "completed"
     },
     "tags": []
@@ -459,13 +170,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {
     "papermill": {
-     "duration": 0.044223,
-     "end_time": "2020-03-13T17:58:04.984764",
+     "duration": 0.046657,
+     "end_time": "2020-03-15T16:12:45.142594",
      "exception": false,
-     "start_time": "2020-03-13T17:58:04.940541",
+     "start_time": "2020-03-15T16:12:45.095937",
      "status": "completed"
     },
     "tags": []
@@ -481,10 +192,10 @@
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.010228,
-     "end_time": "2020-03-13T17:58:05.011248",
+     "duration": 0.011166,
+     "end_time": "2020-03-15T16:12:45.175964",
      "exception": false,
-     "start_time": "2020-03-13T17:58:05.001020",
+     "start_time": "2020-03-15T16:12:45.164798",
      "status": "completed"
     },
     "tags": []
@@ -495,13 +206,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "metadata": {
     "papermill": {
-     "duration": 0.052242,
-     "end_time": "2020-03-13T17:58:05.071551",
+     "duration": 0.066877,
+     "end_time": "2020-03-15T16:12:45.252472",
      "exception": false,
-     "start_time": "2020-03-13T17:58:05.019309",
+     "start_time": "2020-03-15T16:12:45.185595",
      "status": "completed"
     },
     "tags": []
@@ -621,7 +332,7 @@
        "[2 rows x 65 columns]"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -634,10 +345,10 @@
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.009568,
-     "end_time": "2020-03-13T17:58:05.097349",
+     "duration": 0.010445,
+     "end_time": "2020-03-15T16:12:45.282710",
      "exception": false,
-     "start_time": "2020-03-13T17:58:05.087781",
+     "start_time": "2020-03-15T16:12:45.272265",
      "status": "completed"
     },
     "tags": []
@@ -648,13 +359,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "metadata": {
     "papermill": {
-     "duration": 0.038944,
-     "end_time": "2020-03-13T17:58:05.144958",
+     "duration": 0.039483,
+     "end_time": "2020-03-15T16:12:45.332210",
      "exception": false,
-     "start_time": "2020-03-13T17:58:05.106014",
+     "start_time": "2020-03-15T16:12:45.292727",
      "status": "completed"
     },
     "tags": []
@@ -662,32 +373,49 @@
    "outputs": [],
    "source": [
     "region_wb_jhu_map = {\n",
-    "    'China': 'Mainland China',\n",
-    "     'Iran, Islamic Rep.': 'Iran (Islamic Republic of)',\n",
-    "     'Korea, Rep.': 'Republic of Korea',\n",
-    "     'United States': 'US',\n",
-    "     'United Kingdom': 'UK',\n",
-    "     'Hong Kong SAR, China': 'Hong Kong SAR',\n",
+    "     'Brunei Darussalam': 'Brunei',\n",
+    "     'Czech Republic': 'Czechia',\n",
     "     'Egypt, Arab Rep.': 'Egypt',\n",
-    "     'Vietnam': 'Viet Nam',\n",
+    "     'Hong Kong SAR, China': 'Hong Kong SAR',\n",
+    "     'Iran, Islamic Rep.': 'Iran',\n",
+    "     'Korea, Rep.': 'Korea, South',\n",
     "     'Macao SAR, China': 'Macao SAR',\n",
+    "     'Russian Federation': 'Russia',\n",
     "     'Slovak Republic': 'Slovakia',\n",
-    "     'Moldova': 'Republic of Moldova',\n",
     "     'St. Martin (French part)': 'Saint Martin',\n",
-    "     'Brunei Darussalam': 'Brunei'\n",
+    "     'United States': 'US'\n",
     "}\n",
     "current_pop_ser = pop_df[['Country Name', '2018']].copy().replace(region_wb_jhu_map).set_index('Country Name')['2018']\n",
     "data_pop_ser = current_pop_ser[current_pop_ser.index.isin(frames_map['confirmed'].index.levels[0])]"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "papermill": {
+     "duration": 0.021992,
+     "end_time": "2020-03-15T16:12:45.373057",
+     "exception": false,
+     "start_time": "2020-03-15T16:12:45.351065",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Use this to find the name in the series\n",
+    "# current_pop_ser[current_pop_ser.index.str.contains('Czech')]"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.010532,
-     "end_time": "2020-03-13T17:58:05.172203",
+     "duration": 0.009838,
+     "end_time": "2020-03-15T16:12:45.395040",
      "exception": false,
-     "start_time": "2020-03-13T17:58:05.161671",
+     "start_time": "2020-03-15T16:12:45.385202",
      "status": "completed"
     },
     "tags": []
@@ -696,14 +424,299 @@
     "There are some regions that we cannot resolve, but we will just ignore these."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "papermill": {
+     "duration": 0.043595,
+     "end_time": "2020-03-15T16:12:45.448638",
+     "exception": false,
+     "start_time": "2020-03-15T16:12:45.405043",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>2020-03-12 00:00:00</th>\n",
+       "      <th>2020-03-13 00:00:00</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Country/Region</th>\n",
+       "      <th>Long</th>\n",
+       "      <th>Lat</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Congo (Kinshasa)</th>\n",
+       "      <th>21.7587</th>\n",
+       "      <th>-4.0383</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Cruise Ship</th>\n",
+       "      <th>139.6380</th>\n",
+       "      <th>35.4437</th>\n",
+       "      <td>696</td>\n",
+       "      <td>696</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>French Guiana</th>\n",
+       "      <th>-53.1258</th>\n",
+       "      <th>3.9339</th>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Guadeloupe</th>\n",
+       "      <th>-61.5510</th>\n",
+       "      <th>16.2650</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Holy See</th>\n",
+       "      <th>12.4534</th>\n",
+       "      <th>41.9029</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Martinique</th>\n",
+       "      <th>-61.0242</th>\n",
+       "      <th>14.6415</th>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Reunion</th>\n",
+       "      <th>55.5364</th>\n",
+       "      <th>-21.1151</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Taiwan*</th>\n",
+       "      <th>121.0000</th>\n",
+       "      <th>23.7000</th>\n",
+       "      <td>49</td>\n",
+       "      <td>50</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                     2020-03-12 00:00:00  2020-03-13 00:00:00\n",
+       "Country/Region   Long      Lat                                               \n",
+       "Congo (Kinshasa)  21.7587  -4.0383                     1                    2\n",
+       "Cruise Ship       139.6380  35.4437                  696                  696\n",
+       "French Guiana    -53.1258   3.9339                     5                    5\n",
+       "Guadeloupe       -61.5510   16.2650                    0                    1\n",
+       "Holy See          12.4534   41.9029                    1                    1\n",
+       "Martinique       -61.0242   14.6415                    3                    3\n",
+       "Reunion           55.5364  -21.1151                    1                    5\n",
+       "Taiwan*           121.0000  23.7000                   49                   50"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "frames_map['confirmed'].loc[\n",
+    "    frames_map['confirmed'].index.levels[0].isin(data_pop_ser.index) == False\n",
+    "].iloc[:,-2:]"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
     "papermill": {
-     "duration": 0.008912,
-     "end_time": "2020-03-13T17:58:05.189883",
+     "duration": 0.011436,
+     "end_time": "2020-03-15T16:12:45.477570",
      "exception": false,
-     "start_time": "2020-03-13T17:58:05.180971",
+     "start_time": "2020-03-15T16:12:45.466134",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "# Read in geodata to get additional population numbers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "papermill": {
+     "duration": 0.038201,
+     "end_time": "2020-03-15T16:12:45.525965",
+     "exception": false,
+     "start_time": "2020-03-15T16:12:45.487764",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "geodata_df = pd.read_csv(geodata_path).drop('Unnamed: 0', axis=1).set_index('name_jhu')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "papermill": {
+     "duration": 0.011731,
+     "end_time": "2020-03-15T16:12:45.559855",
+     "exception": false,
+     "start_time": "2020-03-15T16:12:45.548124",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "Add in populations for missing countries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "papermill": {
+     "duration": 0.054112,
+     "end_time": "2020-03-15T16:12:45.625087",
+     "exception": false,
+     "start_time": "2020-03-15T16:12:45.570975",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>name_long</th>\n",
+       "      <th>region_un</th>\n",
+       "      <th>subregion</th>\n",
+       "      <th>region_wb</th>\n",
+       "      <th>pop_est</th>\n",
+       "      <th>gdp_md_est</th>\n",
+       "      <th>income_grp</th>\n",
+       "      <th>Longitude</th>\n",
+       "      <th>Latitude</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>name_jhu</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Taiwan*</th>\n",
+       "      <td>Taiwan</td>\n",
+       "      <td>Taiwan</td>\n",
+       "      <td>Asia</td>\n",
+       "      <td>Eastern Asia</td>\n",
+       "      <td>East Asia &amp; Pacific</td>\n",
+       "      <td>22974347</td>\n",
+       "      <td>712000.0</td>\n",
+       "      <td>2. High income: nonOECD</td>\n",
+       "      <td>120.954273</td>\n",
+       "      <td>23.753993</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            name name_long region_un     subregion            region_wb  \\\n",
+       "name_jhu                                                                  \n",
+       "Taiwan*   Taiwan    Taiwan      Asia  Eastern Asia  East Asia & Pacific   \n",
+       "\n",
+       "           pop_est  gdp_md_est               income_grp   Longitude   Latitude  \n",
+       "name_jhu                                                                        \n",
+       "Taiwan*   22974347    712000.0  2. High income: nonOECD  120.954273  23.753993  "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "missing_countries = frames_map['confirmed'].loc[\n",
+    "    frames_map['confirmed'].index.levels[0].isin(data_pop_ser.index) == False\n",
+    "].iloc[:,-2:].reset_index()['Country/Region']\n",
+    "\n",
+    "display(geodata_df.loc[geodata_df.index.isin(missing_countries)])\n",
+    "\n",
+    "data_pop_ser = data_pop_ser.append(geodata_df.loc[geodata_df.index.isin(missing_countries), 'pop_est'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "papermill": {
+     "duration": 0.011426,
+     "end_time": "2020-03-15T16:12:45.657075",
+     "exception": false,
+     "start_time": "2020-03-15T16:12:45.645649",
      "status": "completed"
     },
     "tags": []
@@ -714,13 +727,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 13,
    "metadata": {
     "papermill": {
-     "duration": 0.076031,
-     "end_time": "2020-03-13T17:58:05.275002",
+     "duration": 0.082905,
+     "end_time": "2020-03-15T16:12:45.750274",
      "exception": false,
-     "start_time": "2020-03-13T17:58:05.198971",
+     "start_time": "2020-03-15T16:12:45.667369",
      "status": "completed"
     },
     "tags": []
@@ -742,13 +755,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 14,
    "metadata": {
     "papermill": {
-     "duration": 0.062766,
-     "end_time": "2020-03-13T17:58:05.356294",
+     "duration": 0.057388,
+     "end_time": "2020-03-15T16:12:45.829536",
      "exception": false,
-     "start_time": "2020-03-13T17:58:05.293528",
+     "start_time": "2020-03-15T16:12:45.772148",
      "status": "completed"
     },
     "tags": []
@@ -781,20 +794,21 @@
    "version": "3.7.6"
   },
   "papermill": {
-   "duration": 2.356374,
-   "end_time": "2020-03-13T17:58:05.683344",
+   "duration": 2.578814,
+   "end_time": "2020-03-15T16:12:46.163138",
    "environment_variables": {},
    "exception": null,
-   "input_path": "/tmp/hmyw2rom/notebooks/ToRates.ipynb",
+   "input_path": "notebooks/ToRates.ipynb",
    "output_path": "runs/ToRates.run.ipynb",
    "parameters": {
-    "PAPERMILL_INPUT_PATH": "/tmp/hmyw2rom/notebooks/ToRates.ipynb",
+    "PAPERMILL_INPUT_PATH": "notebooks/ToRates.ipynb",
     "PAPERMILL_OUTPUT_PATH": "runs/ToRates.run.ipynb",
-    "out_folder": "data/covid-19_rates",
-    "ts_folder": "/tmp/hmyw2rom/data/covid-19_jhu-csse",
-    "wb_path": "/tmp/hmyw2rom/data/worldbank/SP.POP.TOTL.zip"
+    "geodata_path": "./data/geodata/geo_data.csv",
+    "out_folder": "./data/covid-19_rates/",
+    "ts_folder": "./data/covid-19_jhu-csse/",
+    "wb_path": "./data/worldbank/SP.POP.TOTL.zip"
    },
-   "start_time": "2020-03-13T17:58:03.326970",
+   "start_time": "2020-03-15T16:12:43.584324",
    "version": "1.1.0"
   }
  },
-- 
GitLab