diff --git a/notebooks/CompileGeoData.ipynb b/notebooks/CompileGeoData.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..afc2c2b8e9a087930bd0d941eea8e760566ff9f0
--- /dev/null
+++ b/notebooks/CompileGeoData.ipynb
@@ -0,0 +1,191 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Extract the Geographic Info\n",
+    "\n",
+    "Use the Harvard [country_centroids.csv](https://worldmap.harvard.edu/data/geonode:country_centroids_az8) data to extract the geographic info we need for the visualizations."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ts_folder = \"../data/covid-19_jhu-csse/\"\n",
+    "worldmap_path = \"../data/worldmap/country_centroids.csv\"\n",
+    "out_folder = None\n",
+    "PAPERMILL_OUTPUT_PATH = None"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   },
+   "source": [
+    "## Read in JHU CSSE data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_jhu_covid_region_df(name):\n",
+    "    filename = os.path.join(ts_folder, f\"time_series_19-covid-{name}.csv\")\n",
+    "    df = pd.read_csv(filename)\n",
+    "    df = df.set_index(['Country/Region', 'Province/State', 'Lat', 'Long'])\n",
+    "    df.columns = pd.to_datetime(df.columns)\n",
+    "    region_df = df.groupby(level='Country/Region').sum()\n",
+    "    return region_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "confirmed_df = read_jhu_covid_region_df(\"Confirmed\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Read in Harvard country centroids"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "country_centroids_df = pd.read_csv(worldmap_path)\n",
+    "country_centroids_df = country_centroids_df[['name', 'name_long', 'region_un', 'subregion', 'region_wb', 'pop_est', 'gdp_md_est', 'income_grp', 'Longitude', 'Latitude']]\n",
+    "country_centroids_df['name_jhu'] = country_centroids_df['name_long'] "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "country_centroids_df.columns"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Fix names that differ between JHU CSSE and Harvard data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "region_hd_jhu_map = {\n",
+    "     'Brunei Darussalam': 'Brunei',\n",
+    "     \"Côte d'Ivoire\": \"Cote d'Ivoire\",\n",
+    "     'Czech Republic': 'Czechia',\n",
+    "     'Hong Kong': 'Hong Kong SAR',\n",
+    "     'Republic of Korea': 'Korea, South',\n",
+    "     'Macao': 'Macao SAR',\n",
+    "     'Russian Federation': 'Russia',\n",
+    "     'Taiwan': 'Taiwan*',\n",
+    "     'United States': 'US'\n",
+    "}\n",
+    "country_centroids_df['name_jhu'] = country_centroids_df['name_jhu'].replace(region_hd_jhu_map)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Use this to find the name in the series\n",
+    "# country_centroids_df[country_centroids_df['name'].str.contains('Macao')]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "There are some regions that we cannot resolve, but we will just ignore these."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "confirmed_df.loc[\n",
+    "    (confirmed_df.index.isin(country_centroids_df['name_jhu']) == False)\n",
+    "].iloc[:,-2:]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Save the result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if PAPERMILL_OUTPUT_PATH:\n",
+    "    out_path = os.path.join(out_folder, f\"geo_data.csv\")\n",
+    "    country_centroids_df.to_csv(out_path)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}