From e8d09f632b65608c404ed54785e86865da0e265d Mon Sep 17 00:00:00 2001 From: Chandrasekhar Ramakrishnan <cramakri@ethz.ch> Date: Wed, 11 Mar 2020 22:29:45 +0000 Subject: [PATCH] feat: notebook for playing with data --- notebooks/Play.ipynb | 109 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 notebooks/Play.ipynb diff --git a/notebooks/Play.ipynb b/notebooks/Play.ipynb new file mode 100644 index 00000000..fd34756b --- /dev/null +++ b/notebooks/Play.ipynb @@ -0,0 +1,109 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Read in JHU CSSE data\n", + "\n", + "I will switch to [xarray](http://xarray.pydata.org/en/stable/), but ATM, it's easier like this..." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def read_jhu_covid_df(name):\n", + " filename = f\"../data/covid-19_jhu-csse/time_series_19-covid-{name}.csv\"\n", + " df = pd.read_csv(filename)\n", + " df = df.set_index(['Province/State', 'Country/Region', 'Lat', 'Long'])\n", + " df.columns = pd.to_datetime(df.columns)\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "confirmed_df = read_jhu_covid_df(\"Confirmed\")\n", + "deaths_df = read_jhu_covid_df(\"Deaths\")\n", + "recovered_df = read_jhu_covid_df(\"Recovered\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def summarize_df(df, name):\n", + " ser = df.groupby(level='Country/Region').sum().iloc[:,-1].sort_values(ascending=False)\n", + " ser.name = f\"Total {name}\"\n", + " return ser" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "confirmed_ser = summarize_df(confirmed_df, \"Confirmed\")\n", + "deaths_ser = summarize_df(deaths_df, \"Deaths\")\n", + "recovered_ser = summarize_df(recovered_df, \"Recovered\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Read in World Bank data" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "import zipfile\n", + "zf = zipfile.ZipFile(\"../data/worldbank/SP.POP.TOTL.zip\")\n", + "pop_df = pd.read_csv(zf.open(\"API_SP.POP.TOTL_DS2_en_csv_v2_821007.csv\"), skiprows=4)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} -- GitLab