diff --git a/Dockerfile b/Dockerfile index 2f3cb9d7a9f17d998496e5ac5d23511e2c208ed8..eb711f8747382e1fcb6f57f9239f4ee0d5d696c5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,6 +33,12 @@ RUN conda env update -q -f /tmp/environment.yml && \ conda clean -y --all && \ conda env export -n "root" && \ jupyter lab build + +RUN /opt/conda/bin/pip install ipywidgets voila + +RUN jupyter labextension install @jupyter-voila/jupyterlab-preview && \ + jupyter labextension install @jupyter-widgets/jupyterlab-manager + USER ${NB_USER} # install the R dependencies diff --git a/README.md b/README.md index 084e10a6e78372d80ec6fc8b76d79faff0fee151..7dd999bfff930471bb3ea92cbe0e3dd5d5691134 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,8 @@ The environment image allows you to work in Python or R in JupyterLab or RStudio <td><a href="https://github.com/pcm-dpc/COVID-19">Covid-19 data for Italy</a></td> <td><a href="https://renkulab.io/projects/covid-19/covid-19-public-data/datasets/286c58b1-dbbc-4caa-a23a-fcb001d5ac51/">covid-19-italy</a></td> <td><code>data/covid-19-italy</code></td> -<td>N/A</td> +<td><a href="https://renkulab.io/projects/covid-19/covid-19-public-data/files/blob/notebooks/examples/italy-examples/italy-notebook-example.ipynb">notebook</a>, + <a href="https://renkulab.io/projects/covid-19/covid-19-public-data/files/blob/notebooks/examples/italy-examples/italy-dashboard-example.ipynb">dashboard</a></td> </tr> <tr> <td><a href="https://github.com/echen102/COVID-19-TweetIDs">Covid-19 tweet IDs</a></td> diff --git a/notebooks/examples/italy-examples/italy-dashboard-example.ipynb b/notebooks/examples/italy-examples/italy-dashboard-example.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..4dfaa3dff9d3cbfb84bfef4f38d689b2551b2111 --- /dev/null +++ b/notebooks/examples/italy-examples/italy-dashboard-example.ipynb @@ -0,0 +1,174 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "\n", + "from italy_utils import * " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_folder = \"../../../data/covid-19-italy/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_provinces = prepare_dataframe(\n", + " data_folder, \n", + " \"dpc-covid19-ita-province.csv\", \n", + " \"dati-province-description.json\",\n", + " use_time_index=True\n", + ")\n", + "\n", + "province_dict = get_province_structure(df_provinces)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_growth_factor_series(province, df, N_min=1000):\n", + " ts = df.loc[\n", + " (df['province'] == province) & \\\n", + " (df['total_cases'] >= N_min)\n", + " ] \\\n", + " ['total_cases'] \\\n", + " .rolling('3d') \\\n", + " .mean() \\\n", + " .pct_change() \\\n", + " .add(1.0)\n", + " return ts.iloc[1:]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_growth_factors(provinces, df, N_min=1000):\n", + " if len(provinces) == 0:\n", + " return \n", + " plt.figure(figsize=(9, 6))\n", + " datemin = datemax = df.index[-1]\n", + " for province in provinces:\n", + " data = get_growth_factor_series(province, df, N_min=N_min)\n", + " if len(data) >= 1:\n", + " data.plot(label=province)\n", + " datemin = min(data.index[0], datemin)\n", + "\n", + " ax = plt.gca()\n", + " plt.legend(loc='upper left', bbox_to_anchor=(1.05, 1.0), frameon=False)\n", + " plt.plot([datemin, datemax], [1,1], color='gray', alpha=0.2)\n", + " plt.ylim(0.95, ax.get_ylim()[1])\n", + " plt.title('Daily growth rate of total cases per province')\n", + " plt.xlabel('');\n", + " \n", + "def plot_total_cases(provinces, df, N_min=500):\n", + " if len(provinces) == 0:\n", + " return \n", + " plt.figure(figsize=(9, 6))\n", + " for province in provinces:\n", + " data = df.loc[\n", + " (df['province'] == province) & \\\n", + " (df['total_cases'] >= N_min)\n", + " ] \\\n", + " ['total_cases'] \\\n", + " .rolling('1d') \\\n", + " .mean() \\\n", + " .add(1.0)\n", + " if len(data) >= 1:\n", + " data.plot(label=province, logy=True)\n", + "\n", + " ax = plt.gca()\n", + " plt.legend(loc='upper left', bbox_to_anchor=(1.05, 1.0), frameon=False)\n", + " plt.title('Total cases per province')\n", + " plt.xlabel('');\n", + " \n", + "def make_plots(provinces, df):\n", + " plot_growth_factors(provinces, df)\n", + " plot_total_cases(provinces, df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_province_selector(region):\n", + " return widgets.SelectMultiple(\n", + " options=province_dict[region],\n", + " value=[],\n", + " description='Provinces:',\n", + " disabled=region_selector.value is None\n", + " )\n", + "def get_interactive_widgets(region):\n", + " widgets.interact(lambda prov: make_plots(list(prov), df_provinces), prov=get_province_selector(region_selector.value));" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "region_selector = widgets.Dropdown(\n", + " options=[key for key in province_dict],\n", + " value='Lombardia',\n", + " description='Region:',\n", + " disabled=False,\n", + ")\n", + "widgets.interact(lambda reg: get_interactive_widgets(reg), reg=region_selector);" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/examples/italy-examples/italy-notebook-example.ipynb b/notebooks/examples/italy-examples/italy-notebook-example.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..95d52a1a79db4dc8c0d92e227900c1e1636bbf81 --- /dev/null +++ b/notebooks/examples/italy-examples/italy-notebook-example.ipynb @@ -0,0 +1,571 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "\n", + "from italy_utils import * " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "data_folder = \"../../../data/covid-19-italy/\"" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>country_code</th>\n", + " <th>hospitalised_with_symptoms</th>\n", + " <th>intensive_care</th>\n", + " <th>total_hospitalised</th>\n", + " <th>home_confinement</th>\n", + " <th>total_current_positive</th>\n", + " <th>new_current_positive</th>\n", + " <th>recovered</th>\n", + " <th>dead</th>\n", + " <th>total_cases</th>\n", + " <th>tests</th>\n", + " </tr>\n", + " <tr>\n", + " <th>date</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2020-03-18</th>\n", + " <td>ITA</td>\n", + " <td>14363</td>\n", + " <td>2257</td>\n", + " <td>16620</td>\n", + " <td>12090</td>\n", + " <td>28710</td>\n", + " <td>2648</td>\n", + " <td>4025</td>\n", + " <td>2978</td>\n", + " <td>35713</td>\n", + " <td>165541</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2020-03-19</th>\n", + " <td>ITA</td>\n", + " <td>15757</td>\n", + " <td>2498</td>\n", + " <td>18255</td>\n", + " <td>14935</td>\n", + " <td>33190</td>\n", + " <td>4480</td>\n", + " <td>4440</td>\n", + " <td>3405</td>\n", + " <td>41035</td>\n", + " <td>182777</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2020-03-20</th>\n", + " <td>ITA</td>\n", + " <td>16020</td>\n", + " <td>2655</td>\n", + " <td>18675</td>\n", + " <td>19185</td>\n", + " <td>37860</td>\n", + " <td>4670</td>\n", + " <td>5129</td>\n", + " <td>4032</td>\n", + " <td>47021</td>\n", + " <td>206886</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2020-03-21</th>\n", + " <td>ITA</td>\n", + " <td>17708</td>\n", + " <td>2857</td>\n", + " <td>20565</td>\n", + " <td>22116</td>\n", + " <td>42681</td>\n", + " <td>4821</td>\n", + " <td>6072</td>\n", + " <td>4825</td>\n", + " <td>53578</td>\n", + " <td>233222</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2020-03-22</th>\n", + " <td>ITA</td>\n", + " <td>19846</td>\n", + " <td>3009</td>\n", + " <td>22855</td>\n", + " <td>23783</td>\n", + " <td>46638</td>\n", + " <td>3957</td>\n", + " <td>7024</td>\n", + " <td>5476</td>\n", + " <td>59138</td>\n", + " <td>258402</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " country_code hospitalised_with_symptoms intensive_care \\\n", + "date \n", + "2020-03-18 ITA 14363 2257 \n", + "2020-03-19 ITA 15757 2498 \n", + "2020-03-20 ITA 16020 2655 \n", + "2020-03-21 ITA 17708 2857 \n", + "2020-03-22 ITA 19846 3009 \n", + "\n", + " total_hospitalised home_confinement total_current_positive \\\n", + "date \n", + "2020-03-18 16620 12090 28710 \n", + "2020-03-19 18255 14935 33190 \n", + "2020-03-20 18675 19185 37860 \n", + "2020-03-21 20565 22116 42681 \n", + "2020-03-22 22855 23783 46638 \n", + "\n", + " new_current_positive recovered dead total_cases tests \n", + "date \n", + "2020-03-18 2648 4025 2978 35713 165541 \n", + "2020-03-19 4480 4440 3405 41035 182777 \n", + "2020-03-20 4670 5129 4032 47021 206886 \n", + "2020-03-21 4821 6072 4825 53578 233222 \n", + "2020-03-22 3957 7024 5476 59138 258402 " + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_national_trend = prepare_dataframe(\n", + " data_folder, \n", + " \"dpc-covid19-ita-andamento-nazionale.csv\", \n", + " \"dati-andamento-nazionale-description.json\",\n", + " use_time_index=True\n", + ")\n", + "df_national_trend.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df_national_trend[\"New cases per day\"] = df_national_trend[\"total_cases\"].diff().rolling('3d').mean()\n", + "df_national_trend.plot(y=\"New cases per day\", kind=\"bar\", logy=True, legend=False);\n", + "plt.title('New cases per day in Italy, 3 day backwards mean');" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>country_code</th>\n", + " <th>region_code</th>\n", + " <th>region</th>\n", + " <th>province_code</th>\n", + " <th>province</th>\n", + " <th>province_short</th>\n", + " <th>latitude</th>\n", + " <th>longitude</th>\n", + " <th>total_cases</th>\n", + " </tr>\n", + " <tr>\n", + " <th>date</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2020-02-24</th>\n", + " <td>ITA</td>\n", + " <td>13</td>\n", + " <td>Abruzzo</td>\n", + " <td>69</td>\n", + " <td>Chieti</td>\n", + " <td>CH</td>\n", + " <td>42.351032</td>\n", + " <td>14.167546</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2020-02-24</th>\n", + " <td>ITA</td>\n", + " <td>13</td>\n", + " <td>Abruzzo</td>\n", + " <td>66</td>\n", + " <td>L'Aquila</td>\n", + " <td>AQ</td>\n", + " <td>42.351222</td>\n", + " <td>13.398438</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2020-02-24</th>\n", + " <td>ITA</td>\n", + " <td>13</td>\n", + " <td>Abruzzo</td>\n", + " <td>68</td>\n", + " <td>Pescara</td>\n", + " <td>PE</td>\n", + " <td>42.464584</td>\n", + " <td>14.213648</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2020-02-24</th>\n", + " <td>ITA</td>\n", + " <td>13</td>\n", + " <td>Abruzzo</td>\n", + " <td>67</td>\n", + " <td>Teramo</td>\n", + " <td>TE</td>\n", + " <td>42.658918</td>\n", + " <td>13.704400</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2020-02-24</th>\n", + " <td>ITA</td>\n", + " <td>13</td>\n", + " <td>Abruzzo</td>\n", + " <td>979</td>\n", + " <td>In fase di definizione/aggiornamento</td>\n", + " <td>NaN</td>\n", + " <td>0.000000</td>\n", + " <td>0.000000</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " country_code region_code region province_code \\\n", + "date \n", + "2020-02-24 ITA 13 Abruzzo 69 \n", + "2020-02-24 ITA 13 Abruzzo 66 \n", + "2020-02-24 ITA 13 Abruzzo 68 \n", + "2020-02-24 ITA 13 Abruzzo 67 \n", + "2020-02-24 ITA 13 Abruzzo 979 \n", + "\n", + " province province_short latitude \\\n", + "date \n", + "2020-02-24 Chieti CH 42.351032 \n", + "2020-02-24 L'Aquila AQ 42.351222 \n", + "2020-02-24 Pescara PE 42.464584 \n", + "2020-02-24 Teramo TE 42.658918 \n", + "2020-02-24 In fase di definizione/aggiornamento NaN 0.000000 \n", + "\n", + " longitude total_cases \n", + "date \n", + "2020-02-24 14.167546 0 \n", + "2020-02-24 13.398438 0 \n", + "2020-02-24 14.213648 0 \n", + "2020-02-24 13.704400 0 \n", + "2020-02-24 0.000000 0 " + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_provinces = prepare_dataframe(\n", + " data_folder, \n", + " \"dpc-covid19-ita-province.csv\", \n", + " \"dati-province-description.json\",\n", + " use_time_index=True\n", + ")\n", + "\n", + "df_provinces.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>country_code</th>\n", + " <th>region_code</th>\n", + " <th>region</th>\n", + " <th>province_code</th>\n", + " <th>province</th>\n", + " <th>province_short</th>\n", + " <th>latitude</th>\n", + " <th>longitude</th>\n", + " <th>total_cases</th>\n", + " </tr>\n", + " <tr>\n", + " <th>date</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2020-02-24</th>\n", + " <td>ITA</td>\n", + " <td>13</td>\n", + " <td>Abruzzo</td>\n", + " <td>69</td>\n", + " <td>Chieti</td>\n", + " <td>CH</td>\n", + " <td>42.351032</td>\n", + " <td>14.167546</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2020-02-24</th>\n", + " <td>ITA</td>\n", + " <td>13</td>\n", + " <td>Abruzzo</td>\n", + " <td>66</td>\n", + " <td>L'Aquila</td>\n", + " <td>AQ</td>\n", + " <td>42.351222</td>\n", + " <td>13.398438</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2020-02-24</th>\n", + " <td>ITA</td>\n", + " <td>13</td>\n", + " <td>Abruzzo</td>\n", + " <td>68</td>\n", + " <td>Pescara</td>\n", + " <td>PE</td>\n", + " <td>42.464584</td>\n", + " <td>14.213648</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2020-02-24</th>\n", + " <td>ITA</td>\n", + " <td>13</td>\n", + " <td>Abruzzo</td>\n", + " <td>67</td>\n", + " <td>Teramo</td>\n", + " <td>TE</td>\n", + " <td>42.658918</td>\n", + " <td>13.704400</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2020-02-24</th>\n", + " <td>ITA</td>\n", + " <td>13</td>\n", + " <td>Abruzzo</td>\n", + " <td>979</td>\n", + " <td>In fase di definizione/aggiornamento</td>\n", + " <td>NaN</td>\n", + " <td>0.000000</td>\n", + " <td>0.000000</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " country_code region_code region province_code \\\n", + "date \n", + "2020-02-24 ITA 13 Abruzzo 69 \n", + "2020-02-24 ITA 13 Abruzzo 66 \n", + "2020-02-24 ITA 13 Abruzzo 68 \n", + "2020-02-24 ITA 13 Abruzzo 67 \n", + "2020-02-24 ITA 13 Abruzzo 979 \n", + "\n", + " province province_short latitude \\\n", + "date \n", + "2020-02-24 Chieti CH 42.351032 \n", + "2020-02-24 L'Aquila AQ 42.351222 \n", + "2020-02-24 Pescara PE 42.464584 \n", + "2020-02-24 Teramo TE 42.658918 \n", + "2020-02-24 In fase di definizione/aggiornamento NaN 0.000000 \n", + "\n", + " longitude total_cases \n", + "date \n", + "2020-02-24 14.167546 0 \n", + "2020-02-24 13.398438 0 \n", + "2020-02-24 14.213648 0 \n", + "2020-02-24 13.704400 0 \n", + "2020-02-24 0.000000 0 " + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_regions = prepare_dataframe(\n", + " data_folder, \n", + " \"dpc-covid19-ita-regioni.csv\", \n", + " \"dati-regioni-description.json\",\n", + " use_time_index=True\n", + ")\n", + "\n", + "df_provinces.head()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/examples/italy-examples/italy_utils.py b/notebooks/examples/italy-examples/italy_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0cf0b7272bdcf39f1f39b168e17d44bcaaaea60b --- /dev/null +++ b/notebooks/examples/italy-examples/italy_utils.py @@ -0,0 +1,46 @@ +import json +import os +import pandas as pd + + +def translate_columns(data_folder, df, description_filename): + description_file_path = os.path.join(data_folder, description_filename) + + with open(description_file_path, 'r') as description_file: + decoded_data = description_file.read().encode().decode('utf-8-sig') + descriptions = json.loads(decoded_data) + descriptions = { column_dict['Nome campo']: column_dict for column_dict in descriptions} + + df.rename(columns=lambda col: descriptions[col]['Field name'], inplace=True) + return df + +def set_time_index(df, drop_hour=True): + if drop_hour: + lambda_func = lambda x: x.split(' ')[0] + else: + labda_func = lambda x: x + + timestamp = pd.DatetimeIndex(df['date'].apply(lambda_func)) + df.set_index(timestamp, inplace=True) + del df['date'] + return df + +def prepare_dataframe(data_folder, df_filename, description_filename, use_time_index=False): + data_file_path = os.path.join(data_folder, df_filename) + df = pd.read_csv(data_file_path) + df = translate_columns(data_folder, df, description_filename) + if use_time_index: + df = set_time_index(df) + return df + +def get_province_structure(df_provinces): + """Extract the province/region structure from the province dataframe.""" + + def get_province_list(region): + """Get list of provinces for a given region.""" + provinces = set(df_provinces.loc[df_provinces['region']==region]['province']) + provinces.discard('In fase di definizione/aggiornamento') + return list(provinces) + + regions = df_provinces['region'].unique() + return {region: get_province_list(region) for region in regions} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index d05293f9bb3bbe4cccce6fece1282633e991d91a..2bc49958b6cd110a8a5284d2e4f6826a7e404c13 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,11 @@ +aiohttp==3.6.2 alembic==1.4.0 altair==4.0.1 ansiwrap==0.8.4 +appnope==0.1.0 argcomplete==1.11.1 async-generator==1.10 +async-timeout==3.0.1 attrs==19.3.0 backcall==0.1.0 bleach==3.1.1 @@ -15,6 +18,7 @@ Click==7.0 colorama==0.4.3 -e src/covid-19/covid_19_dashboard cryptography==2.8 +cycler==0.10.0 decorator==4.4.1 defusedxml==0.6.0 distro==1.4.0 @@ -34,14 +38,18 @@ jsonschema==3.2.0 jupyter-client==6.0.0 jupyter-core==4.6.3 jupyter-rsession-proxy==1.1 +jupyter-server-proxy==1.2.0 jupyter-telemetry==0.0.5 jupyterhub==0.9.6 jupyterlab==1.2.5 jupyterlab-git==0.9.0 jupyterlab-server==1.0.6 +kiwisolver==1.1.0 Mako==1.1.0 MarkupSafe==1.1.1 +matplotlib==3.2.1 mistune==0.8.4 +multidict==4.7.5 nbconvert==5.6.1 nbdime==1.1.0 nbformat==5.0.4 @@ -66,6 +74,7 @@ pycurl==7.43.0.5 Pygments==2.5.2 PyJWT==1.7.1 pyOpenSSL==19.1.0 +pyparsing==2.4.6 pyrsistent==0.15.7 PySocks==1.7.1 python-dateutil==2.8.1 @@ -76,9 +85,10 @@ pytz==2019.3 PyYAML==5.3 pyzmq==19.0.0 requests==2.23.0 -ruamel-yaml==0.15.80 +ruamel.yaml==0.15.80 ruamel.yaml.clib==0.2.0 Send2Trash==1.5.0 +simpervisor==0.3 six==1.14.0 smmap==3.0.1 SQLAlchemy==1.3.13 @@ -95,4 +105,5 @@ userpath==1.3.0 vega-datasets==0.8.0 wcwidth==0.1.8 webencodings==0.5.1 +yarl==1.4.2 zipp==3.0.0