diff --git a/notebooks/Dashboard.ipynb b/notebooks/Dashboard.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..b7f64b2ba3651d24319f568fbe5b5810b04fd4ac --- /dev/null +++ b/notebooks/Dashboard.ipynb @@ -0,0 +1,257 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os\n", + "from IPython.display import display, HTML, Markdown" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "ts_folder = \"../data/covid-19_jhu-csse/\"\n", + "rates_folder = \"../data/covid-19_rates/\"\n", + "out_folder = None\n", + "PAPERMILL_OUTPUT_PATH = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Read in the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def read_jhu_covid_df(name):\n", + " filename = os.path.join(ts_folder, f\"time_series_19-covid-{name}.csv\")\n", + " df = pd.read_csv(filename)\n", + " df = df.set_index(['Province/State', 'Country/Region', 'Lat', 'Long'])\n", + " df.columns = pd.to_datetime(df.columns)\n", + " return df\n", + "\n", + "\n", + "jhu_frames_map = {\n", + " \"confirmed\": read_jhu_covid_df(\"Confirmed\"),\n", + " \"deaths\": read_jhu_covid_df(\"Deaths\"),\n", + " \"recovered\": read_jhu_covid_df(\"Recovered\")\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def read_rates_covid_df(name):\n", + " filename = os.path.join(rates_folder, f\"ts_rates_19-covid-{name}.csv\")\n", + " df = pd.read_csv(filename).drop(\"Unnamed: 0\", axis=1)\n", + " df = df.set_index(['Country/Region'])\n", + " df.columns = pd.to_datetime(df.columns)\n", + " return df\n", + "\n", + "\n", + "rates_frames_map = {\n", + " \"confirmed\": read_rates_covid_df(\"confirmed\"),\n", + " \"deaths\": read_rates_covid_df(\"deaths\"),\n", + " \"recovered\": read_rates_covid_df(\"recovered\")\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Compile data needed for the visualizations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Compute geospatial coordinates\n", + "country_coords_df = jhu_frames_map['confirmed'].reset_index([2,3])[['Lat', 'Long']]\n", + "country_coords_df = country_coords_df.groupby(level='Country/Region').mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Identify countries with 100 or more cases\n", + "case_count_ser = jhu_frames_map['confirmed'].iloc[:,-1].groupby(level='Country/Region').sum()\n", + "countries_over_thresh = case_count_ser[case_count_ser > 99].index" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Questions About COVID-19 and Its Spread\n", + "\n", + "These plots should be taken with a large grain of salt. I am not an epidemiologist, so the analyses shown here are completely naive. There are large discrepencies in the data from different countries for a variety of reasons (rates of testing, demographics, etc.) so that make direct comparisons inaccurate. Nonetheless, I think there is a lot of interesting information in this data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_ts = jhu_frames_map['confirmed'].iloc[:,-1].name.strftime(\"%b %d %Y\")\n", + "display(HTML(f\"<em>Data up to {data_ts}</em>\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## How are cases per 100,000 distributed geographically?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import altair as alt\n", + "from vega_datasets import data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Compile the basic df\n", + "map_df = pd.concat([\n", + " rates_frames_map['confirmed'].iloc[:,-1],\n", + " rates_frames_map['deaths'].iloc[:,-1],\n", + " rates_frames_map['recovered'].iloc[:,-1],\n", + " country_coords_df], axis=1)\n", + "# Restrict to countries with 100 or more cases\n", + "map_df = map_df.loc[countries_over_thresh].dropna()\n", + "map_df = map_df.reset_index()\n", + "map_df.columns = ['Country/Region', 'Confirmed', 'Deaths', 'Recovered', 'Lat', 'Long']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def map_of_variable(map_df, variable):\n", + " # Data generators for the background\n", + " sphere = alt.sphere()\n", + " graticule = alt.graticule()\n", + "\n", + " # Source of land data\n", + " source = alt.topo_feature(data.world_110m.url, 'countries')\n", + "\n", + " # Layering and configuring the components\n", + " p = alt.layer(\n", + " alt.Chart(sphere).mark_geoshape(fill='#cae6ef'),\n", + " alt.Chart(graticule).mark_geoshape(stroke='white', strokeWidth=0.5),\n", + " alt.Chart(source).mark_geoshape(fill='#dddddd', stroke='#aaaaaa'),\n", + " alt.Chart(map_df).mark_circle(opacity=0.6).encode(\n", + " longitude='Long:Q',\n", + " latitude='Lat:Q',\n", + " size=alt.Size(f'{variable}:Q', title=\"Cases\"),\n", + " color=alt.value('steelblue'),\n", + " tooltip=[\"Country/Region:N\", \"Confirmed:Q\", \"Deaths:Q\", \"Recovered:Q\"]\n", + " )\n", + " ).project(\n", + " 'naturalEarth1'\n", + " ).properties(width=600, height=400, title=f\"{variable} cases per 100,000\"\n", + " ).configure_view(stroke=None)\n", + " return p" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(map_of_variable(map_df, 'Confirmed'))\n", + "display(HTML('''\n", + "<p style=\"font-size: smaller\">Data Source: \n", + " <a href=\"https://github.com/CSSEGISandData/COVID-19\">JHU CSSE</a> and\n", + " <a href=\"https://data.worldbank.org/indicator/SP.POP.TOTL\">World Bank</a>\n", + "</p>'''))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bars = alt.Chart(map_df).mark_bar().encode(\n", + " x='Confirmed:Q',\n", + " y=alt.Y(\"Country/Region:N\", sort='-x')\n", + ")\n", + "\n", + "text = bars.mark_text(\n", + " align='left',\n", + " baseline='middle',\n", + " dx=3 # Nudges text to right so it doesn't appear on top of the bar\n", + ").encode(\n", + " text=alt.Text('Confirmed:Q', format=\".3\")\n", + ")\n", + "\n", + "(bars + text).properties(height=900)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}