Skip to content
Snippets Groups Projects
Dashboard.ipynb 7.28 KiB
Newer Older
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "from IPython.display import display, HTML, Markdown"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "parameters"
    ]
   },
   "outputs": [],
   "source": [
    "ts_folder = \"../data/covid-19_jhu-csse/\"\n",
    "rates_folder = \"../data/covid-19_rates/\"\n",
    "out_folder = None\n",
    "PAPERMILL_OUTPUT_PATH = None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read in the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_jhu_covid_df(name):\n",
    "    filename = os.path.join(ts_folder, f\"time_series_19-covid-{name}.csv\")\n",
    "    df = pd.read_csv(filename)\n",
    "    df = df.set_index(['Province/State', 'Country/Region', 'Lat', 'Long'])\n",
    "    df.columns = pd.to_datetime(df.columns)\n",
    "    return df\n",
    "\n",
    "\n",
    "jhu_frames_map = {\n",
    "    \"confirmed\": read_jhu_covid_df(\"Confirmed\"),\n",
    "    \"deaths\": read_jhu_covid_df(\"Deaths\"),\n",
    "    \"recovered\": read_jhu_covid_df(\"Recovered\")\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_rates_covid_df(name):\n",
    "    filename = os.path.join(rates_folder, f\"ts_rates_19-covid-{name}.csv\")\n",
    "    df = pd.read_csv(filename).drop(\"Unnamed: 0\", axis=1)\n",
    "    df = df.set_index(['Country/Region'])\n",
    "    df.columns = pd.to_datetime(df.columns)\n",
    "    return df\n",
    "\n",
    "\n",
    "rates_frames_map = {\n",
    "    \"confirmed\": read_rates_covid_df(\"confirmed\"),\n",
    "    \"deaths\": read_rates_covid_df(\"deaths\"),\n",
    "    \"recovered\": read_rates_covid_df(\"recovered\")\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Compile data needed for the visualizations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Compute geospatial coordinates\n",
    "country_coords_df = jhu_frames_map['confirmed'].reset_index([2,3])[['Lat', 'Long']]\n",
    "country_coords_df = country_coords_df.groupby(level='Country/Region').mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Identify countries with 100 or more cases\n",
    "case_count_ser = jhu_frames_map['confirmed'].iloc[:,-1].groupby(level='Country/Region').sum()\n",
    "countries_over_thresh = case_count_ser[case_count_ser > 99].index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Questions About COVID-19 and Its Spread\n",
    "\n",
    "These plots should be taken with a large grain of salt. I am not an epidemiologist, so the analyses shown here are completely naive. There are large discrepencies in the data from different countries for a variety of reasons (rates of testing, demographics, etc.) so that make direct comparisons inaccurate. Nonetheless, I think there is a lot of interesting information in this data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_ts = jhu_frames_map['confirmed'].iloc[:,-1].name.strftime(\"%b %d %Y\")\n",
    "display(HTML(f\"<em>Data up to {data_ts}</em>\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## How are cases per 100,000 distributed geographically?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import altair as alt\n",
    "from vega_datasets import data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Compile the basic df\n",
    "map_df = pd.concat([\n",
    "    rates_frames_map['confirmed'].iloc[:,-1],\n",
    "    rates_frames_map['deaths'].iloc[:,-1],\n",
    "    rates_frames_map['recovered'].iloc[:,-1],\n",
    "    country_coords_df], axis=1)\n",
    "# Restrict to countries with 100 or more cases\n",
    "map_df = map_df.loc[countries_over_thresh].dropna()\n",
    "map_df = map_df.reset_index()\n",
    "map_df.columns = ['Country/Region', 'Confirmed', 'Deaths', 'Recovered', 'Lat', 'Long']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def map_of_variable(map_df, variable):\n",
    "    # Data generators for the background\n",
    "    sphere = alt.sphere()\n",
    "    graticule = alt.graticule()\n",
    "\n",
    "    # Source of land data\n",
    "    source = alt.topo_feature(data.world_110m.url, 'countries')\n",
    "\n",
    "    # Layering and configuring the components\n",
    "    p = alt.layer(\n",
    "        alt.Chart(sphere).mark_geoshape(fill='#cae6ef'),\n",
    "        alt.Chart(graticule).mark_geoshape(stroke='white', strokeWidth=0.5),\n",
    "        alt.Chart(source).mark_geoshape(fill='#dddddd', stroke='#aaaaaa'),\n",
    "        alt.Chart(map_df).mark_circle(opacity=0.6).encode(\n",
    "            longitude='Long:Q',\n",
    "            latitude='Lat:Q',\n",
    "            size=alt.Size(f'{variable}:Q', title=\"Cases\"),\n",
    "            color=alt.value('steelblue'),\n",
    "            tooltip=[\"Country/Region:N\", \"Confirmed:Q\", \"Deaths:Q\", \"Recovered:Q\"]\n",
    "        )\n",
    "    ).project(\n",
    "        'naturalEarth1'\n",
    "    ).properties(width=600, height=400, title=f\"{variable} cases per 100,000\"\n",
    "    ).configure_view(stroke=None)\n",
    "    return p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(map_of_variable(map_df, 'Confirmed'))\n",
    "display(HTML('''\n",
    "<p style=\"font-size: smaller\">Data Source: \n",
    "  <a href=\"https://github.com/CSSEGISandData/COVID-19\">JHU CSSE</a> and\n",
    "  <a href=\"https://data.worldbank.org/indicator/SP.POP.TOTL\">World Bank</a>\n",
    "</p>'''))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bars = alt.Chart(map_df).mark_bar().encode(\n",
    "    x='Confirmed:Q',\n",
    "    y=alt.Y(\"Country/Region:N\", sort='-x')\n",
    ")\n",
    "\n",
    "text = bars.mark_text(\n",
    "    align='left',\n",
    "    baseline='middle',\n",
    "    dx=3  # Nudges text to right so it doesn't appear on top of the bar\n",
    ").encode(\n",
    "    text=alt.Text('Confirmed:Q', format=\".3\")\n",
    ")\n",
    "\n",
    "(bars + text).properties(height=900)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}