From e8d09f632b65608c404ed54785e86865da0e265d Mon Sep 17 00:00:00 2001
From: Chandrasekhar Ramakrishnan <cramakri@ethz.ch>
Date: Wed, 11 Mar 2020 22:29:45 +0000
Subject: [PATCH] feat: notebook for playing with data

---
 notebooks/Play.ipynb | 109 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 notebooks/Play.ipynb

diff --git a/notebooks/Play.ipynb b/notebooks/Play.ipynb
new file mode 100644
index 00000000..fd34756b
--- /dev/null
+++ b/notebooks/Play.ipynb
@@ -0,0 +1,109 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Read in JHU CSSE data\n",
+    "\n",
+    "I will switch to [xarray](http://xarray.pydata.org/en/stable/), but ATM, it's easier like this..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_jhu_covid_df(name):\n",
+    "    filename = f\"../data/covid-19_jhu-csse/time_series_19-covid-{name}.csv\"\n",
+    "    df = pd.read_csv(filename)\n",
+    "    df = df.set_index(['Province/State', 'Country/Region', 'Lat', 'Long'])\n",
+    "    df.columns = pd.to_datetime(df.columns)\n",
+    "    return df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "confirmed_df = read_jhu_covid_df(\"Confirmed\")\n",
+    "deaths_df = read_jhu_covid_df(\"Deaths\")\n",
+    "recovered_df = read_jhu_covid_df(\"Recovered\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def summarize_df(df, name):\n",
+    "    ser = df.groupby(level='Country/Region').sum().iloc[:,-1].sort_values(ascending=False)\n",
+    "    ser.name = f\"Total {name}\"\n",
+    "    return ser"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "confirmed_ser = summarize_df(confirmed_df, \"Confirmed\")\n",
+    "deaths_ser = summarize_df(deaths_df, \"Deaths\")\n",
+    "recovered_ser = summarize_df(recovered_df, \"Recovered\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Read in World Bank data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import zipfile\n",
+    "zf = zipfile.ZipFile(\"../data/worldbank/SP.POP.TOTL.zip\")\n",
+    "pop_df = pd.read_csv(zf.open(\"API_SP.POP.TOTL_DS2_en_csv_v2_821007.csv\"), skiprows=4)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
-- 
GitLab