From 9e828e70580386119943cab25db488a2e26929a0 Mon Sep 17 00:00:00 2001 From: Chandrasekhar Ramakrishnan <cramakri@ethz.ch> Date: Thu, 2 Apr 2020 20:28:17 +0000 Subject: [PATCH] feat: implement notebook for getting population data from Wikidata --- notebooks/process/wikidata-pop-data.ipynb | 136 ++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 notebooks/process/wikidata-pop-data.ipynb diff --git a/notebooks/process/wikidata-pop-data.ipynb b/notebooks/process/wikidata-pop-data.ipynb new file mode 100644 index 000000000..1eefbd38d --- /dev/null +++ b/notebooks/process/wikidata-pop-data.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Gather Population Data from Wikidata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "\n", + "from covid_19_dashboard import helper" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "out_folder = '../../data/atlas/wikidata'\n", + "PAPERMILL_OUTPUT_PATH = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def write_population_data(iso_code, df):\n", + " out_path = os.path.join(out_folder, f\"{iso_code.lower()}-population.csv\")\n", + " print(f\"Writing {len(df)} rows to {out_path}\")\n", + " if PAPERMILL_OUTPUT_PATH is None:\n", + " return\n", + " df.to_csv(out_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Italy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iso_code = \"ITA\"\n", + "pops = helper.get_region_populations(\n", + " iso_code,\n", + " additional_fields=\"?istatid\",\n", + " additional_query=\"?region wdt:P635 ?istatid .\",\n", + ")\n", + "df = pd.DataFrame(pops)\n", + "write_population_data(iso_code, df)\n", + "df.head(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Switzerland" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iso_code = \"CHE\"\n", + "pops = helper.get_region_populations(iso_code)\n", + "df = pd.DataFrame(pops)\n", + "write_population_data(iso_code, df)\n", + "df.head(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## United States" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iso_code = \"USA\"\n", + "pops = helper.get_region_populations(iso_code)\n", + "df = pd.DataFrame(pops)\n", + "write_population_data(iso_code, df)\n", + "df.head(2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} -- GitLab