{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "from IPython.display import display, HTML, Markdown" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "parameters" ] }, "outputs": [], "source": [ "ts_folder = \"../data/covid-19_jhu-csse/\"\n", "rates_folder = \"../data/covid-19_rates/\"\n", "out_folder = None\n", "PAPERMILL_OUTPUT_PATH = None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Read in the data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def read_jhu_covid_df(name):\n", " filename = os.path.join(ts_folder, f\"time_series_19-covid-{name}.csv\")\n", " df = pd.read_csv(filename)\n", " df = df.set_index(['Province/State', 'Country/Region', 'Lat', 'Long'])\n", " df.columns = pd.to_datetime(df.columns)\n", " return df\n", "\n", "\n", "jhu_frames_map = {\n", " \"confirmed\": read_jhu_covid_df(\"Confirmed\"),\n", " \"deaths\": read_jhu_covid_df(\"Deaths\"),\n", " \"recovered\": read_jhu_covid_df(\"Recovered\")\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def read_rates_covid_df(name):\n", " filename = os.path.join(rates_folder, f\"ts_rates_19-covid-{name}.csv\")\n", " df = pd.read_csv(filename).drop(\"Unnamed: 0\", axis=1)\n", " df = df.set_index(['Country/Region'])\n", " df.columns = pd.to_datetime(df.columns)\n", " return df\n", "\n", "\n", "rates_frames_map = {\n", " \"confirmed\": read_rates_covid_df(\"confirmed\"),\n", " \"deaths\": read_rates_covid_df(\"deaths\"),\n", " \"recovered\": read_rates_covid_df(\"recovered\")\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Compile data needed for the visualizations" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Compute geospatial coordinates\n", "country_coords_df = jhu_frames_map['confirmed'].reset_index([2,3])[['Lat', 'Long']]\n", "country_coords_df = country_coords_df.groupby(level='Country/Region').mean()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Identify countries with 100 or more cases\n", "case_count_ser = jhu_frames_map['confirmed'].iloc[:,-1].groupby(level='Country/Region').sum()\n", "countries_over_thresh = case_count_ser[case_count_ser > 99].index" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Questions About COVID-19 and Its Spread\n", "\n", "These plots should be taken with a large grain of salt. I am not an epidemiologist, so the analyses shown here are completely naive. There are large discrepencies in the data from different countries for a variety of reasons (rates of testing, demographics, etc.) so that make direct comparisons inaccurate. Nonetheless, I think there is a lot of interesting information in this data." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data_ts = jhu_frames_map['confirmed'].iloc[:,-1].name.strftime(\"%b %d %Y\")\n", "display(HTML(f\"<em>Data up to {data_ts}</em>\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## How are cases per 100,000 distributed geographically?" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import altair as alt\n", "from vega_datasets import data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Compile the basic df\n", "map_df = pd.concat([\n", " rates_frames_map['confirmed'].iloc[:,-1],\n", " rates_frames_map['deaths'].iloc[:,-1],\n", " rates_frames_map['recovered'].iloc[:,-1],\n", " country_coords_df], axis=1)\n", "# Restrict to countries with 100 or more cases\n", "map_df = map_df.loc[countries_over_thresh].dropna()\n", "map_df = map_df.reset_index()\n", "map_df.columns = ['Country/Region', 'Confirmed', 'Deaths', 'Recovered', 'Lat', 'Long']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def map_of_variable(map_df, variable):\n", " # Data generators for the background\n", " sphere = alt.sphere()\n", " graticule = alt.graticule()\n", "\n", " # Source of land data\n", " source = alt.topo_feature(data.world_110m.url, 'countries')\n", "\n", " # Layering and configuring the components\n", " p = alt.layer(\n", " alt.Chart(sphere).mark_geoshape(fill='#cae6ef'),\n", " alt.Chart(graticule).mark_geoshape(stroke='white', strokeWidth=0.5),\n", " alt.Chart(source).mark_geoshape(fill='#dddddd', stroke='#aaaaaa'),\n", " alt.Chart(map_df).mark_circle(opacity=0.6).encode(\n", " longitude='Long:Q',\n", " latitude='Lat:Q',\n", " size=alt.Size(f'{variable}:Q', title=\"Cases\"),\n", " color=alt.value('steelblue'),\n", " tooltip=[\"Country/Region:N\", \"Confirmed:Q\", \"Deaths:Q\", \"Recovered:Q\"]\n", " )\n", " ).project(\n", " 'naturalEarth1'\n", " ).properties(width=600, height=400, title=f\"{variable} cases per 100,000\"\n", " ).configure_view(stroke=None)\n", " return p" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "display(map_of_variable(map_df, 'Confirmed'))\n", "display(HTML('''\n", "<p style=\"font-size: smaller\">Data Source: \n", " <a href=\"https://github.com/CSSEGISandData/COVID-19\">JHU CSSE</a> and\n", " <a href=\"https://data.worldbank.org/indicator/SP.POP.TOTL\">World Bank</a>\n", "</p>'''))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bars = alt.Chart(map_df).mark_bar().encode(\n", " x='Confirmed:Q',\n", " y=alt.Y(\"Country/Region:N\", sort='-x')\n", ")\n", "\n", "text = bars.mark_text(\n", " align='left',\n", " baseline='middle',\n", " dx=3 # Nudges text to right so it doesn't appear on top of the bar\n", ").encode(\n", " text=alt.Text('Confirmed:Q', format=\".3\")\n", ")\n", "\n", "(bars + text).properties(height=900)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }