{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import altair as alt\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "from IPython.display import display, HTML, Markdown\n",
    "from covid_19_utils import helper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "parameters"
    ]
   },
   "outputs": [],
   "source": [
    "ts_folder = \"../data/covid-19_jhu-csse/\"\n",
    "rates_folder = \"../data/covid-19_rates/\"\n",
    "geodata_path = \"../data/geodata/geo_data.csv\"\n",
    "atlas_path = \"../data/atlas\"\n",
    "out_folder = None\n",
    "PAPERMILL_OUTPUT_PATH = None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read in and transform the case data\n",
    "from covid_19_utils.converters import CaseConverter\n",
    "converter = CaseConverter(atlas_path)\n",
    "jhu_df = converter.read_convert(ts_folder)\n",
    "\n",
    "# Read in geographical data\n",
    "geodata_df = helper.read_geodata(geodata_path)\n",
    "\n",
    "# Join in the geo data\n",
    "jhu_df = jhu_df.merge(\n",
    "    geodata_df.rename(\n",
    "        columns={\"name\": \"country_label\"}\n",
    "    )[['Latitude','Longitude','country_label', 'region_un']]\n",
    ").rename(columns={'region_un': 'Geo Region'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# case threshold\n",
    "nthresh = 500\n",
    "\n",
    "# Identify countries with {nthresh} or more cases\n",
    "country_max_ser = jhu_df.set_index(['region_label', 'date'])['positive'].groupby(level='region_label').max()\n",
    "countries_over_thresh = country_max_ser[country_max_ser>nthresh].index\n",
    "\n",
    "# Filter out some countries with very high case/population ratio\n",
    "countries_over_thresh = [c for c in countries_over_thresh if c not in set(['Andorra', 'Iceland', 'San Marino'])]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Questions About COVID-19 and Its Spread\n",
    "\n",
    "Understanding the spread, distribution, and deadliness of COVID-19 is difficult, despite the data available about it. Differences in rates of testing, quality of data, demographics, etc. make it difficult to compare data between countries. \n",
    "\n",
    "All this needs to be considered when looking at the plots below. But despite those caveats, I found it helpful to plot the raw data, even though direct comparisons between countries might not be inaccurate."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_ts = jhu_df.date.iloc[-1].strftime(\"%b %d %Y\")\n",
    "display(HTML(f\"<em>Data up to {data_ts}; countries with {nthresh} or more confirmed cases.</em>\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## How are cases per 100,000 distributed geographically?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "latest_df = jhu_df[jhu_df.date == data_ts]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "map_df = latest_df[latest_df.country_label.isin(countries_over_thresh)]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(helper.map_of_variable(map_df, 'positive_100k', 'Positive'))\n",
    "display(HTML('''\n",
    "<p style=\"font-size: smaller\">Data Sources: \n",
    "  <a href=\"https://github.com/CSSEGISandData/COVID-19\">JHU CSSE</a>,\n",
    "  <a href=\"https://data.worldbank.org/indicator/SP.POP.TOTL\">World Bank</a>,\n",
    "  <a href=\"https://worldmap.harvard.edu/data/geonode:country_centroids_az8\">Harvard Worldmap</a>\n",
    "</p>'''))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bars = alt.Chart(map_df).mark_bar().encode(\n",
    "    x=alt.X('positive_100k:Q', title='Positive cases/100k'),\n",
    "    y=alt.Y(\"country_label:N\", title='Country/Region', sort='-x'),\n",
    "    tooltip=[\"country_label:N\", \n",
    "         \"positive:Q\", \"deceased:Q\",\n",
    "         \"positive_100k:Q\", \"deceased_100k:Q\"]\n",
    ")\n",
    "\n",
    "text = bars.mark_text(\n",
    "    align='left',\n",
    "    baseline='middle',\n",
    "    dx=3  # Nudges text to right so it doesn't appear on top of the bar\n",
    ").encode(\n",
    "    text=alt.Text('positive_100k:Q', format=\".3\")\n",
    ")\n",
    "\n",
    "chart = (bars + text).properties(height=900, title=f\"Confirmed cases per 100k inhabitants\")\n",
    "display(chart)\n",
    "display(HTML('''\n",
    "<p style=\"font-size: smaller\">Data Sources: \n",
    "  <a href=\"https://github.com/CSSEGISandData/COVID-19\">JHU CSSE</a>,\n",
    "  <a href=\"https://data.worldbank.org/indicator/SP.POP.TOTL\">World Bank</a>,\n",
    "  <a href=\"https://worldmap.harvard.edu/data/geonode:country_centroids_az8\">Harvard Worldmap</a>\n",
    "</p>'''))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## How have cases been growing?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# select countries over a certain per capita case threshold\n",
    "per_capita_thresh = 50\n",
    "countries_over_thresh_per_capita = latest_df[latest_df.positive_100k > per_capita_thresh].country_label\n",
    "countries_over_thresh_per_capita = [c for c in countries_over_thresh_per_capita if c not in set(['Andorra', 'Iceland', 'San Marino'])]\n",
    "\n",
    "# build the charts\n",
    "sort_order = latest_df.groupby('Geo Region').mean().sort_values(ascending=False, by='positive').index.tolist()\n",
    "selection = alt.selection_multi(fields=['country_label'], bind='legend')\n",
    "opacity=alt.condition(selection, alt.value(1), alt.value(0.2))\n",
    "\n",
    "base = alt.Chart(\n",
    "  jhu_df[jhu_df.country_label.isin(\n",
    "    countries_over_thresh_per_capita)]\n",
    "  ).encode(\n",
    "    alt.X('date', title='Date')\n",
    ").properties(\n",
    "  width=300, \n",
    "  height=200\n",
    ")\n",
    "cases = base.mark_line().encode(\n",
    "    alt.Y('positive_100k', scale=alt.Scale(type='symlog'), title='Cases per 100k population'),\n",
    "    color=alt.Color('country_label', title='Country'),\n",
    "    facet=alt.Facet('Geo Region:N', columns=1, sort=alt.SortArray(sort_order), title=''),\n",
    "    tooltip=[\"country_label:N\", \"date:T\", \"positive_100k:Q\"],\n",
    "    opacity=opacity\n",
    ").add_selection(selection)\n",
    "\n",
    "deaths = base.mark_line().encode(\n",
    "    alt.Y('deceased_100k', scale=alt.Scale(type='symlog'), title='Deaths per 100k population'),\n",
    "    color=alt.Color('country_label', title='Country'),\n",
    "    facet=alt.Facet('Geo Region:N', columns=1, sort=alt.SortArray(sort_order), title=''),\n",
    "    tooltip=[\"country_label:N\", \"date:T\", \"deceased_100k:Q\"],\n",
    "    opacity=opacity\n",
    ").add_selection(selection)\n",
    "\n",
    "chart = alt.hconcat(\n",
    "    cases, deaths, title=f\"Countries with {per_capita_thresh} or more cases per 100k\"\n",
    ").configure_title(\n",
    "    anchor='middle'\n",
    ")\n",
    "\n",
    "display(chart)\n",
    "display(HTML('''\n",
    "<p style=\"font-size: smaller\">Data Sources: \n",
    "  <a href=\"https://github.com/CSSEGISandData/COVID-19\">JHU CSSE</a>,\n",
    "  <a href=\"https://data.worldbank.org/indicator/SP.POP.TOTL\">World Bank</a>,\n",
    "  <a href=\"https://worldmap.harvard.edu/data/geonode:country_centroids_az8\">Harvard Worldmap</a>\n",
    "</p>'''))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "since_df = helper.make_since_df(\n",
    "    jhu_df[jhu_df.country_label.isin(countries_over_thresh_per_capita)], \n",
    "    region_column='country_label'\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sort_order = since_df.groupby(\n",
    "    'country_label').max().sort_values(\n",
    "    'positive', ascending=False).index.tolist()\n",
    "# Exclude China in this plot because its numbers are far greater then everywhere else\n",
    "sort_order = [o for o in sort_order if o != 'China']\n",
    "chart = helper.facetted_growth_plot(\n",
    "  since_df[since_df['country_label'] != 'China'], \n",
    "  'sinceDay0',\n",
    "  'positive_100k',\n",
    "  sort_order,\n",
    "  'Italy',\n",
    "  \"Growth of cases per 100k population from case 100, compared to Italy\",\n",
    "  \"Cases/100k\"\n",
    ")\n",
    "display(chart)\n",
    "display(HTML('''\n",
    "<p style=\"font-size: smaller\">Data Sources: \n",
    "  <a href=\"https://github.com/CSSEGISandData/COVID-19\">JHU CSSE</a>,\n",
    "  <a href=\"https://data.worldbank.org/indicator/SP.POP.TOTL\">World Bank</a>,\n",
    "  <a href=\"https://worldmap.harvard.edu/data/geonode:country_centroids_az8\">Harvard Worldmap</a>\n",
    "</p>\n",
    "<p style=\"font-size: smaller\">Inspired by <a href=\"https://covid19dashboards.com/growth-analysis/\">Thomas Wiecki</a>'''))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Same with log scale\n",
    "chart = helper.facetted_growth_plot(\n",
    "  since_df, \n",
    "  'sinceDay0',\n",
    "  'positive_100k',\n",
    "  sort_order,\n",
    "  'Italy',\n",
    "  \"Growth of cases per 100k population from case 100, compared to Italy (log scale)\",\n",
    "  \"Cases/100k\",\n",
    "  'log'\n",
    ")\n",
    "display(chart)\n",
    "display(HTML('''\n",
    "<p style=\"font-size: smaller\">Data Sources: \n",
    "  <a href=\"https://github.com/CSSEGISandData/COVID-19\">JHU CSSE</a>,\n",
    "  <a href=\"https://data.worldbank.org/indicator/SP.POP.TOTL\">World Bank</a>,\n",
    "  <a href=\"https://worldmap.harvard.edu/data/geonode:country_centroids_az8\">Harvard Worldmap</a>\n",
    "</p>'''))"
   ]
  }
 ],
 "metadata": {
  "hide_input": true,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}