renku update --with-siblings

67cbdeaf · CI-bot · renku 0.10.4 · 77f2290c · 67cbdeaf · 67cbdeaf
Commit 67cbdeaf authored 4 years ago by CI-bot Committed by renku 0.10.4 4 years ago
--- a/.renku/workflow/3ea6e0eac80145d493c20e1210f5461d.cwl
+++ b/.renku/workflow/3ea6e0eac80145d493c20e1210f5461d.cwl
+class: Workflow
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../notebooks/covidtracking.ipynb
+    streamable: false
+    type: File
+  input_10:
+    default:
+      class: Directory
+      listing: []
+      path: ../../data/covid-19-ecdc
+    streamable: false
+    type: Directory
+  input_11:
+    default: runs/dataset_summary.run.ipynb
+    streamable: false
+    type: string
+  input_12:
+    default: atlas_path
+    streamable: false
+    type: string
+  input_13:
+    default:
+      class: Directory
+      listing: []
+      path: ../../data/atlas
+    streamable: false
+    type: Directory
+  input_14:
+    default: us_path
+    streamable: false
+    type: string
+  input_15:
+    default:
+      class: Directory
+      listing: []
+      path: ../../data/covid-19-us-nyt
+    streamable: false
+    type: Directory
+  input_16:
+    default: italy_path
+    streamable: false
+    type: string
+  input_17:
+    default:
+      class: File
+      path: ../../data/covid-19-italy/dpc-covid19-ita-regioni.csv
+    streamable: false
+    type: File
+  input_18:
+    default: spain_path
+    streamable: false
+    type: string
+  input_19:
+    default:
+      class: File
+      path: ../../notebooks/Dashboard.ipynb
+    streamable: false
+    type: File
+  input_2:
+    default: runs/covidtracking.run.ipynb
+    streamable: false
+    type: string
+  input_20:
+    default: runs/Dashboard.run.ipynb
+    streamable: false
+    type: string
+  input_21:
+    default: ts_folder
+    streamable: false
+    type: string
+  input_22:
+    default:
+      class: Directory
+      listing: []
+      path: ../../data/covid-19_jhu-csse
+    streamable: false
+    type: Directory
+  input_23:
+    default: rates_folder
+    streamable: false
+    type: string
+  input_24:
+    default:
+      class: Directory
+      listing: []
+      path: ../../data/covid-19_rates
+    streamable: false
+    type: Directory
+  input_25:
+    default: geodata_path
+    streamable: false
+    type: string
+  input_26:
+    default:
+      class: File
+      path: ../../data/geodata/geo_data.csv
+    streamable: false
+    type: File
+  input_27:
+    default: atlas_path
+    streamable: false
+    type: string
+  input_28:
+    default:
+      class: Directory
+      listing: []
+      path: ../../data/atlas
+    streamable: false
+    type: Directory
+  input_3:
+    default: data_path
+    streamable: false
+    type: string
+  input_4:
+    default:
+      class: Directory
+      listing: []
+      path: ../../data/covidtracking
+    streamable: false
+    type: Directory
+  input_5:
+    default: atlas_path
+    streamable: false
+    type: string
+  input_6:
+    default:
+      class: Directory
+      listing: []
+      path: ../../data/atlas
+    streamable: false
+    type: Directory
+  input_7:
+    default:
+      class: File
+      path: ../../notebooks/datasets_summary.ipynb
+    streamable: false
+    type: File
+  input_8:
+    default:
+      class: Directory
+      listing: []
+      path: ../../data/covid-19-spain
+    streamable: false
+    type: Directory
+  input_9:
+    default: ecdc_path
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputSource: step_1/output_0
+    streamable: false
+    type: File
+  output_1:
+    outputSource: step_3/output_0
+    streamable: false
+    type: File
+  output_2:
+    outputSource: step_2/output_0
+    streamable: false
+    type: File
+requirements: []
+steps:
+  step_1:
+    in:
+      input_1: input_1
+      input_2: input_2
+      input_3: input_3
+      input_4: input_4
+      input_5: input_5
+      input_6: input_6
+    out:
+    - output_0
+    run: 2fba4568d8784fb99872b6c8a35f66b9_papermill.cwl
+  step_2:
+    in:
+      input_1: input_7
+      input_10: input_8
+      input_11: input_9
+      input_12: input_10
+      input_2: input_11
+      input_3: input_12
+      input_4: input_13
+      input_5: input_14
+      input_6: input_15
+      input_7: input_16
+      input_8: input_17
+      input_9: input_18
+    out:
+    - output_0
+    run: e0c6511bd8234efe8a19405f1145e990_papermill.cwl
+  step_3:
+    in:
+      input_10: input_19
+      input_11: input_20
+      input_2: input_21
+      input_3: input_22
+      input_4: input_23
+      input_5: input_24
+      input_6: input_25
+      input_7: input_26
+      input_8: input_27
+      input_9: input_28
+    out:
+    - output_0
+    run: edeae7a3f9bd41579941a5f9b0eaf2aa_papermill.cwl
--- a/runs/Dashboard.run.ipynb
+++ b/runs/Dashboard.run.ipynb
--- a/runs/covidtracking.run.ipynb
+++ b/runs/covidtracking.run.ipynb
@@ -5,10 +5,10 @@
   "execution_count": 1,
   "metadata": {
    "papermill": {
-     "duration": 0.542403,
-     "end_time": "2020-11-23T00:57:30.878805",
+     "duration": 1.393966,
+     "end_time": "2020-11-23T08:05:47.549431",
     "exception": false,
-     "start_time": "2020-11-23T00:57:30.336402",
+     "start_time": "2020-11-23T08:05:46.155465",
     "status": "completed"
    },
    "tags": []
@@ -29,10 +29,10 @@
   "execution_count": 2,
   "metadata": {
    "papermill": {
-     "duration": 0.02682,
-     "end_time": "2020-11-23T00:57:30.927595",
+     "duration": 0.023512,
+     "end_time": "2020-11-23T08:05:47.590831",
     "exception": false,
-     "start_time": "2020-11-23T00:57:30.900775",
+     "start_time": "2020-11-23T08:05:47.567319",
     "status": "completed"
    },
    "tags": []
@@ -53,10 +53,10 @@
   "execution_count": 3,
   "metadata": {
    "papermill": {
-     "duration": 0.021969,
-     "end_time": "2020-11-23T00:57:30.965175",
+     "duration": 0.020404,
+     "end_time": "2020-11-23T08:05:47.622166",
     "exception": false,
-     "start_time": "2020-11-23T00:57:30.943206",
+     "start_time": "2020-11-23T08:05:47.601762",
     "status": "completed"
    },
    "tags": [
@@ -74,10 +74,10 @@
   "execution_count": 4,
   "metadata": {
    "papermill": {
-     "duration": 0.022562,
-     "end_time": "2020-11-23T00:57:30.998209",
+     "duration": 0.018243,
+     "end_time": "2020-11-23T08:05:47.649533",
     "exception": false,
-     "start_time": "2020-11-23T00:57:30.975647",
+     "start_time": "2020-11-23T08:05:47.631290",
     "status": "completed"
    },
    "tags": [
@@ -87,8 +87,8 @@
   "outputs": [],
   "source": [
    "# Parameters\n",
-    "data_path = \"/tmp/hc8z45_m/data/covidtracking\"\n",
-    "atlas_path = \"/tmp/hc8z45_m/data/atlas\"\n"
+    "data_path = \"/tmp/4vzuj7ki/data/covidtracking\"\n",
+    "atlas_path = \"/tmp/4vzuj7ki/data/atlas\"\n"
   ]
  },
  {
@@ -96,10 +96,10 @@
   "execution_count": 5,
   "metadata": {
    "papermill": {
-     "duration": 1.929566,
-     "end_time": "2020-11-23T00:57:32.940628",
+     "duration": 1.429481,
+     "end_time": "2020-11-23T08:05:49.085966",
     "exception": false,
-     "start_time": "2020-11-23T00:57:31.011062",
+     "start_time": "2020-11-23T08:05:47.656485",
     "status": "completed"
    },
    "tags": []
@@ -119,10 +119,10 @@
   "execution_count": 6,
   "metadata": {
    "papermill": {
-     "duration": 0.219997,
-     "end_time": "2020-11-23T00:57:33.180842",
+     "duration": 0.167569,
+     "end_time": "2020-11-23T08:05:49.272972",
     "exception": false,
-     "start_time": "2020-11-23T00:57:32.960845",
+     "start_time": "2020-11-23T08:05:49.105403",
     "status": "completed"
    },
    "tags": []
@@ -151,10 +151,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.012277,
-     "end_time": "2020-11-23T00:57:33.213378",
+     "duration": 0.011003,
+     "end_time": "2020-11-23T08:05:49.301429",
     "exception": false,
-     "start_time": "2020-11-23T00:57:33.201101",
+     "start_time": "2020-11-23T08:05:49.290426",
     "status": "completed"
    },
    "tags": []
@@ -169,10 +169,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.009506,
-     "end_time": "2020-11-23T00:57:33.232302",
+     "duration": 0.006607,
+     "end_time": "2020-11-23T08:05:49.315096",
     "exception": false,
-     "start_time": "2020-11-23T00:57:33.222796",
+     "start_time": "2020-11-23T08:05:49.308489",
     "status": "completed"
    },
    "tags": []
@@ -186,10 +186,10 @@
   "execution_count": 7,
   "metadata": {
    "papermill": {
-     "duration": 0.618127,
-     "end_time": "2020-11-23T00:57:33.860026",
+     "duration": 0.46049,
+     "end_time": "2020-11-23T08:05:49.782195",
     "exception": false,
-     "start_time": "2020-11-23T00:57:33.241899",
+     "start_time": "2020-11-23T08:05:49.321705",
     "status": "completed"
    },
    "tags": []
@@ -199,10 +199,10 @@
     "data": {
      "text/html": [
       "\n",
-       "<div id=\"altair-viz-40b798ab318e4af8a88704c9ce0ce657\"></div>\n",
+       "<div id=\"altair-viz-c8d68d64d5eb4258ae7c364f0b3e3c42\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  (function(spec, embedOpt){\n",
-       "    const outputDiv = document.getElementById(\"altair-viz-40b798ab318e4af8a88704c9ce0ce657\");\n",
+       "    const outputDiv = document.getElementById(\"altair-viz-c8d68d64d5eb4258ae7c364f0b3e3c42\");\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
@@ -392,10 +392,10 @@
   "execution_count": 8,
   "metadata": {
    "papermill": {
-     "duration": 0.538621,
-     "end_time": "2020-11-23T00:57:34.461262",
+     "duration": 0.412141,
+     "end_time": "2020-11-23T08:05:50.240691",
     "exception": false,
-     "start_time": "2020-11-23T00:57:33.922641",
+     "start_time": "2020-11-23T08:05:49.828550",
     "status": "completed"
    },
    "tags": []
@@ -405,10 +405,10 @@
     "data": {
      "text/html": [
       "\n",
-       "<div id=\"altair-viz-caea20a5bdd14dc68833cfbd7be87936\"></div>\n",
+       "<div id=\"altair-viz-5ef947181b4d44c394fd8a8e606233c1\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  (function(spec, embedOpt){\n",
-       "    const outputDiv = document.getElementById(\"altair-viz-caea20a5bdd14dc68833cfbd7be87936\");\n",
+       "    const outputDiv = document.getElementById(\"altair-viz-5ef947181b4d44c394fd8a8e606233c1\");\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
@@ -596,10 +596,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.101525,
-     "end_time": "2020-11-23T00:57:34.668565",
+     "duration": 0.074449,
+     "end_time": "2020-11-23T08:05:50.393745",
     "exception": false,
-     "start_time": "2020-11-23T00:57:34.567040",
+     "start_time": "2020-11-23T08:05:50.319296",
     "status": "completed"
    },
    "tags": []
@@ -615,10 +615,10 @@
   "execution_count": 9,
   "metadata": {
    "papermill": {
-     "duration": 0.212339,
-     "end_time": "2020-11-23T00:57:34.982120",
+     "duration": 0.168135,
+     "end_time": "2020-11-23T08:05:50.638324",
     "exception": false,
-     "start_time": "2020-11-23T00:57:34.769781",
+     "start_time": "2020-11-23T08:05:50.470189",
     "status": "completed"
    },
    "tags": []
@@ -628,10 +628,10 @@
     "data": {
      "text/html": [
       "\n",
-       "<div id=\"altair-viz-91e3e416ecce41f39f043e53cc2e87d4\"></div>\n",
+       "<div id=\"altair-viz-6a6f3124fee74cd1b73e59b155c0e5d5\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  (function(spec, embedOpt){\n",
-       "    const outputDiv = document.getElementById(\"altair-viz-91e3e416ecce41f39f043e53cc2e87d4\");\n",
+       "    const outputDiv = document.getElementById(\"altair-viz-6a6f3124fee74cd1b73e59b155c0e5d5\");\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
@@ -730,10 +730,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.114309,
-     "end_time": "2020-11-23T00:57:35.214983",
+     "duration": 0.07651,
+     "end_time": "2020-11-23T08:05:50.799503",
     "exception": false,
-     "start_time": "2020-11-23T00:57:35.100674",
+     "start_time": "2020-11-23T08:05:50.722993",
     "status": "completed"
    },
    "tags": []
@@ -747,10 +747,10 @@
   "execution_count": 10,
   "metadata": {
    "papermill": {
-     "duration": 0.157299,
-     "end_time": "2020-11-23T00:57:35.485956",
+     "duration": 0.10308,
+     "end_time": "2020-11-23T08:05:50.984868",
     "exception": false,
-     "start_time": "2020-11-23T00:57:35.328657",
+     "start_time": "2020-11-23T08:05:50.881788",
     "status": "completed"
    },
    "tags": []
@@ -777,10 +777,10 @@
   "execution_count": 11,
   "metadata": {
    "papermill": {
-     "duration": 0.19406,
-     "end_time": "2020-11-23T00:57:35.804439",
+     "duration": 0.160873,
+     "end_time": "2020-11-23T08:05:51.236422",
     "exception": false,
-     "start_time": "2020-11-23T00:57:35.610379",
+     "start_time": "2020-11-23T08:05:51.075549",
     "status": "completed"
    },
    "tags": []
@@ -790,10 +790,10 @@
     "data": {
      "text/html": [
       "\n",
-       "<div id=\"altair-viz-f8ff15af85c24f4f825ae5cfc05f92ff\"></div>\n",
+       "<div id=\"altair-viz-f4d64bf7a3df4b099d08bb181907e737\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  (function(spec, embedOpt){\n",
-       "    const outputDiv = document.getElementById(\"altair-viz-f8ff15af85c24f4f825ae5cfc05f92ff\");\n",
+       "    const outputDiv = document.getElementById(\"altair-viz-f4d64bf7a3df4b099d08bb181907e737\");\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
@@ -876,10 +876,10 @@
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
-     "duration": 0.115996,
-     "end_time": "2020-11-23T00:57:36.048635",
+     "duration": 0.080109,
+     "end_time": "2020-11-23T08:05:51.412825",
     "exception": false,
-     "start_time": "2020-11-23T00:57:35.932639",
+     "start_time": "2020-11-23T08:05:51.332716",
     "status": "completed"
    },
    "tags": []
@@ -895,10 +895,10 @@
   "execution_count": 12,
   "metadata": {
    "papermill": {
-     "duration": 0.397448,
-     "end_time": "2020-11-23T00:57:36.561719",
+     "duration": 0.282972,
+     "end_time": "2020-11-23T08:05:51.779745",
     "exception": false,
-     "start_time": "2020-11-23T00:57:36.164271",
+     "start_time": "2020-11-23T08:05:51.496773",
     "status": "completed"
    },
    "tags": []
@@ -908,10 +908,10 @@
     "data": {
      "text/html": [
       "\n",
-       "<div id=\"altair-viz-37301d5bd82c41c4a7469ff53f3dacb4\"></div>\n",
+       "<div id=\"altair-viz-0dfc9c61932949b0b6e82f0709dd5fe4\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  (function(spec, embedOpt){\n",
-       "    const outputDiv = document.getElementById(\"altair-viz-37301d5bd82c41c4a7469ff53f3dacb4\");\n",
+       "    const outputDiv = document.getElementById(\"altair-viz-0dfc9c61932949b0b6e82f0709dd5fe4\");\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
@@ -1008,10 +1008,10 @@
   "execution_count": null,
   "metadata": {
    "papermill": {
-     "duration": 0.122625,
-     "end_time": "2020-11-23T00:57:36.813870",
+     "duration": 0.115096,
+     "end_time": "2020-11-23T08:05:51.989119",
     "exception": false,
-     "start_time": "2020-11-23T00:57:36.691245",
+     "start_time": "2020-11-23T08:05:51.874023",
     "status": "completed"
    },
    "tags": []
@@ -1040,17 +1040,17 @@
   "version": "3.7.3"
  },
  "papermill": {
-   "duration": 7.863069,
-   "end_time": "2020-11-23T00:57:37.354262",
+   "duration": 7.18608,
+   "end_time": "2020-11-23T08:05:52.403494",
   "environment_variables": {},
   "exception": null,
-   "input_path": "/tmp/hc8z45_m/notebooks/covidtracking.ipynb",
+   "input_path": "/tmp/4vzuj7ki/notebooks/covidtracking.ipynb",
   "output_path": "runs/covidtracking.run.ipynb",
   "parameters": {
-    "atlas_path": "/tmp/hc8z45_m/data/atlas",
-    "data_path": "/tmp/hc8z45_m/data/covidtracking"
+    "atlas_path": "/tmp/4vzuj7ki/data/atlas",
+    "data_path": "/tmp/4vzuj7ki/data/covidtracking"
   },
-   "start_time": "2020-11-23T00:57:29.491193",
+   "start_time": "2020-11-23T08:05:45.217414",
   "version": "1.1.0"
  }
 },
 %% Cell type:code id: tags:
  
 ``` python
 from pathlib import Path
  
 import pandas as pd
 import altair as alt
 from IPython.display import display, HTML
  
 from covid_19_utils.converters import CaseConverter
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 html_credits=HTML('''
 <p style="font-size: smaller">Data Sources:
  <a href="https://covidtracking.com">The COVID Tracking Project</a>
 <br>
 Analysis and Visualization:
  <a href="https://renkulab.io/projects/covid-19/covid-19-public-data">Covid-19 Public Data Collaboration Project</a>
 </p>''')
 ```
  
 %% Cell type:code id: tags:parameters
  
 ``` python
 data_path = '../data/covidtracking'
 atlas_path = '../data/atlas'
 ```
  
 %% Cell type:code id: tags:injected-parameters
  
 ``` python
 # Parameters
-data_path = "/tmp/hc8z45_m/data/covidtracking"
-atlas_path = "/tmp/hc8z45_m/data/atlas"
+data_path = "/tmp/4vzuj7ki/data/covidtracking"
+atlas_path = "/tmp/4vzuj7ki/data/atlas"
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 # read in the data
 converter = CaseConverter(atlas_path)
 data_df = converter.read_convert(data_path)
  
 # referring to "state" will make more sense in this notebook
 data_df = data_df.rename(columns={"region_label": "state"})
 ```
  
 %% Cell type:code id: tags:
  
 ``` python
 # Compute daily differences
 tdf = data_df.sort_values(['state', 'date'], ascending=[True, False]).set_index(['state', 'date'])
 diffs_df = tdf[['positive', 'deceased', 'positive_100k', 'deceased_100k']].groupby(level='state').diff(periods=-1).dropna(how='all')
 tdf_diff=tdf.join(diffs_df, rsuffix='_diff').reset_index()
  
 # "Normalizing" the total tests
 tdf_diff['total_10'] = tdf_diff['tested']/10.
  
 # Daily totals
 daily_totals = tdf_diff.groupby('date').sum()
 daily_totals.reset_index(level=0, inplace=True)
  
 # National daily totals
 nation_df = data_df.groupby('date').sum()
 nation_df['state']='All US'
 nation_df = nation_df.reset_index()
 ```
  
 %% Cell type:markdown id: tags:
  
 # Covid-19 Cases in U.S.
  
 The case data from the U.S. is obtained from https://covidtracking.com, a public crowd-sourced covid-19 dataset.
  
 %% Cell type:markdown id: tags:
  
 ### Growth trends
  
 %% Cell type:code id: tags:
  
 ``` python
 # make dataframe for text labels on chart - hand edit these label locations
 textLabels_df = pd.DataFrame(
    [[10,6000,'doubles every day'],
     [36,50000,'doubles every 3 days'],
     [34,100, 'doubles every week']],
    columns =['labelX', 'labelY','labelText']
 )
  
 startCase = 2000
  
 # make dataframe of states with points >=10 deceaseds
 deceased10_df = data_df.loc[data_df['deceased']>=startCase]
  
 # group deceased10 dataframe by state and then increasing order of date
 deceased10_df = deceased10_df.sort_values(by=['state','date'])
  
 # add US to that dataframe
 nationdeceased10_df = nation_df.loc[nation_df['deceased']>=startCase]
 deceased10_df= pd.concat ([deceased10_df,nationdeceased10_df])
  
 deceased10_df = deceased10_df.reset_index()
  
 # make a list of the states with 10 or more deceaseds
 state_list = list(set(deceased10_df['state']))
  
 # add a column for the number of days since the 10th deceased for each state
 for state, df in deceased10_df.groupby('state'):
    deceased10_df.loc[df.index,'sinceDay0'] = range(0, len(df))
 deceased10_df = deceased10_df.astype({'sinceDay0': 'int32'})
  
 #Now create plotlines for each state since 10 deceaseds
 lineChart = alt.Chart(deceased10_df,title=f'US States: Cumulative Deaths Since {startCase}th Death').mark_line(interpolate='basis').encode(
    alt.X('sinceDay0:Q', axis=alt.Axis(title=f'Days Since {startCase}th Death')),
    alt.Y('deceased:Q',
         axis = alt.Axis(title='Cumulative Deaths'),
         scale=alt.Scale(type='log')),
    tooltip=['state', 'sinceDay0', 'deceased', 'positive'],
    color = 'state'
 ).properties(width=800,height=400)
  
 ## Create a layer with the lines for doubling every day and doubling every week
  
 # Compute theoretical trends of doubling every day, 3 days, week
 days = {'day':[1,2,3,4,5,10,15,20, max(deceased10_df.sinceDay0)+5]}
 logRuleDay_df = pd.DataFrame(days, columns=['day'])
 logRuleDay_df['case']= startCase * pow(2,logRuleDay_df['day'])
 logRuleDay_df['doubling period']='every day'
  
 logRule3Days_df = pd.DataFrame(days, columns=['day'])
 logRule3Days_df['case']= startCase * pow(2,(logRule3Days_df['day'])/3)
 logRule3Days_df['doubling period']='three days'
  
 logRuleWeek_df = pd.DataFrame(days, columns=['day'])
 logRuleWeek_df['case']= startCase * pow(2,(logRuleWeek_df['day'])/7)
 logRuleWeek_df['doubling period']='every week'
  
 logRules_df = pd.concat([logRuleDay_df, logRule3Days_df, logRuleWeek_df])
 logRules_df = logRules_df.reset_index()
  
  
 ruleChart = alt.Chart(logRules_df).mark_line(opacity=0.2,clip=True).encode(
    alt.X('day:Q',
            scale=alt.Scale(domain=[1,max(deceased10_df.sinceDay0)+5])),
    alt.Y('case', scale=alt.Scale(type='log',domain=[startCase,150000]),
         ),
    color = 'doubling period',
    tooltip = ['doubling period'])
  
 # create a layer for the state labels
 # 1) make dataframe with each state's max days
 # 2) make a chart layer with text of state name to right of each state's rightmost point
 stateLabels_df = deceased10_df[deceased10_df['sinceDay0'] == deceased10_df.groupby(['state'])['sinceDay0'].transform(max)]
 labelChart = alt.Chart(stateLabels_df).mark_text(align='left', baseline='middle', dx=10).encode(
    x='sinceDay0',
    y='deceased',
    text='state',
    color='state')
  
 #now put the text labels layer on top of state labels Chart
 labelChart = labelChart + alt.Chart(textLabels_df).mark_text(align='right', baseline='bottom', dx=0, size=18,opacity=0.5).encode(
    x='labelX',
    y='labelY',
    text='labelText')
  
  
 ## Create some tooltip behavior - show Y values on mouseover
 # Step 1: Selection that chooses nearest point based on value on x-axis
 nearest = alt.selection(type='single', nearest=True, on='mouseover',
                            fields=['sinceDay0'])
  
 # Step 2: Transparent selectors across the chart. This is what tells us
 # the x-value of the cursor
 selectors = alt.Chart().mark_point().encode(
    x="sinceDay0:Q",
    opacity=alt.value(0),
 ).add_selection(
    nearest
 )
  
 # Step 3: Add text, show values in column when it's the nearest point to
 # mouseover, else show blank
 text = lineChart.mark_text(align='center', dx=3, dy=-20).encode(
    text=alt.condition(nearest, 'deceased', alt.value(' '))
 )
  
  
 #Finally, lets show the chart!
  
 chart = alt.layer(lineChart, selectors, text, data=deceased10_df)
  
 display(chart)
 display(html_credits)
 ```
  
 %% Output
  
  
  
 %% Cell type:code id: tags:
  
 ``` python
 # make dataframe for text labels on chart - hand edit these label locations
 textLabels_df = pd.DataFrame(
    [[9,30000,'doubles every day'],
     [28,31000,'doubles every 3 days'],
     [32,1000, 'doubles every week']],
    columns =['labelX', 'labelY','labelText']
 )
  
 startCase = 100000
  
 # make dataframe with only points >=100 positives
 positive100_df = data_df.loc[data_df['positive']>=startCase]
  
 ## add US to that dataframe
 nationpos100_df = nation_df.loc[nation_df['positive']>=startCase]
 positive100_df= pd.concat ([positive100_df,nationpos100_df])
  
 # group positive100 dataframe by state and then increasing order of date
 positive100_df = positive100_df.sort_values(by=['state','date'])
 positive100_df = positive100_df.reset_index()
  
 # make a list of the states with 10 or more deaths (don't really need this)
 # state_list = list(set(positive100_df['state']))
  
 # add a column for the number of days since the 100th case for each state
 for state, df in positive100_df.groupby('state'):
    positive100_df.loc[df.index,'sinceDay0'] = range(0, len(df))
 positive100_df = positive100_df.astype({'sinceDay0': 'int32'})
  
  
 # Now create plotlines for each state since 10 deaths
 lineChart = alt.Chart(positive100_df, title=f"US States: total cases since {startCase}th case").mark_line(interpolate='basis').encode(
    alt.X('sinceDay0:Q', axis=alt.Axis(title=f'Days since {startCase}th case')),
    alt.Y('positive:Q',
          axis = alt.Axis(title='Cumulative positive cases'),
          scale=alt.Scale(type='log')),
    tooltip=['state', 'sinceDay0', 'deceased', 'positive'],
    color = 'state'
 ).properties(width=800,height=400)
  
 ## Create a layer with the lines for doubling every day and doubling every week
 # make dataframe with lines to indicate doubling every day, 3 days, week
  
 days = {'day':[1,2,3,4,5,10,15,20, max(positive100_df.sinceDay0)+5]}
  
 logRuleDay_df = pd.DataFrame (days, columns=['day'])
 logRuleDay_df['case']= startCase * pow(2,logRuleDay_df['day'])
 logRuleDay_df['doubling period']='every day'
  
 logRule3Days_df = pd.DataFrame (days, columns=['day'])
 logRule3Days_df['case']= startCase * pow(2,(logRule3Days_df['day'])/3)
 logRule3Days_df['doubling period']='three days'
  
 logRuleWeek_df = pd.DataFrame (days, columns=['day'])
 logRuleWeek_df['case']= startCase * pow(2,(logRuleWeek_df['day'])/7)
 logRuleWeek_df['doubling period']='every week'
  
 logRules_df = pd.concat([logRuleDay_df, logRule3Days_df, logRuleWeek_df])
 logRules_df = logRules_df.reset_index()
  
 ruleChart = alt.Chart(logRules_df).mark_line(opacity=0.2,clip=True).encode(
    alt.X('day:Q',
            scale=alt.Scale(domain=[1, max(positive100_df.sinceDay0)+5])),
    alt.Y('case', scale=alt.Scale(domain=[startCase,2000000], type='log'),
         ),
    color = 'doubling period')
  
 # create a layer for the state labels
 # 1) make dataframe with each state's max days
 # 2) make a chart layer with text of state name to right of each state's rightmost point
 stateLabels_df = positive100_df[positive100_df['sinceDay0'] == positive100_df.groupby(['state'])['sinceDay0'].transform(max)]
 labelChart = alt.Chart(stateLabels_df).mark_text(align='left', baseline='middle', dx=10).encode(
    x='sinceDay0',
    y='positive',
    text='state',
    color='state')
  
 #now put the text labels layer on top of state labels Chart
 labelChart = labelChart + alt.Chart(textLabels_df).mark_text(align='right', baseline='bottom', dx=0, size=18,opacity=0.5).encode(
    x='labelX',
    y='labelY',
    text='labelText')
  
 #Create some tooltip behavior
 # Step 1: Selection that chooses nearest point based on value on x-axis
 nearest = alt.selection(type='single', nearest=True, on='mouseover',
                            fields=['sinceDay0'])
  
 # Step 2: Transparent selectors across the chart. This is what tells us
 # the x-value of the cursor
 selectors = alt.Chart().mark_point().encode(
    x="sinceDay0:Q",
    opacity=alt.value(0),
 ).add_selection(
    nearest
 )
  
 # Step 3: Add text, show values in Sex column when it's the nearest point to
 # mouseover, else show blank
 text = lineChart.mark_text(align='center', dx=3, dy=-20).encode(
    text=alt.condition(nearest, 'positive', alt.value(' '))
 )
  
  
 #Finally, lets show the chart!
  
 chart = alt.layer(lineChart, selectors, text, data=positive100_df)
 #chart = alt.layer(lineChart, ruleChart, labelChart)
 chart.properties (width=400,height=800)
 display(chart)
 display(html_credits)
 ```
  
 %% Output
  
  
  
 %% Cell type:markdown id: tags:
  
 ### Daily Cumulative Totals
  
 Cumulative reported totals of positive cases and deaths.
  
 %% Cell type:code id: tags:
  
 ``` python
 base = alt.Chart(
    daily_totals
 ).mark_bar(size=2).encode(
    alt.X('date', axis=alt.Axis(title='')
    )
 ).properties(
    height=200,
    width=400
 )
  
 cumulative = base.encode(alt.Y('positive', title = 'Cumulative cases'))
 cumulative_deaths = base.encode(alt.Y('deceased', title = 'Cumulative deaths'))
 rates = base.encode(alt.Y('positive_diff', title='Daily cases'))
 rates_deaths = base.encode(alt.Y('deceased_diff', title='Daily deaths'))
 chart = alt.vconcat(
    cumulative | rates, cumulative_deaths | rates_deaths,
    title='Cumulative Covid-19 cases and deaths in the U.S.'
 ).configure_title(
    anchor='middle'
 )
 display(chart)
 display(html_credits)
 ```
  
 %% Output
  
  
  
 %% Cell type:markdown id: tags:
  
 ### Total tests and positives per 100k population
  
 %% Cell type:code id: tags:
  
 ``` python
 most_recent_test_date = data_df['date'].max()
 most_recent_df = data_df[data_df['date'] == most_recent_test_date]
 print("Most recent test date", most_recent_test_date)
 print(len(most_recent_df), "states/territories have data on this date.")
 ```
  
 %% Output
  
    Most recent test date 2020-11-22 00:00:00
    50 states/territories have data on this date.
  
 %% Cell type:code id: tags:
  
 ``` python
 viz_df = most_recent_df.sort_values('tested_100k', ascending=False)
 chart = alt.Chart(viz_df, title="Cases (orange points) and tests(blue bars) per 100k").encode(alt.X('state', sort=None))
 tests = chart.mark_bar().encode(alt.Y('tested_100k', axis=alt.Axis(title='COVID-19 Tests/100k, Positive Cases/100k')))
 positives = chart.mark_point(color='orange', filled=True, size=100, opacity=1).encode(alt.Y('positive_100k'))
 display(alt.layer(tests, positives))
 display(html_credits)
 ```
  
 %% Output
  
  
  
 %% Cell type:markdown id: tags:
  
 ## Counts and rates by state
  
 Taking a look at the three states with the highest per-capita incidence of covid-19. The red and yellow curves represent the total tests and total positive tests respectively.
  
 %% Cell type:code id: tags:
  
 ``` python
 # produce the charts for a few states
  
 charts=[]
 for state in most_recent_df.sort_values('tested_100k', ascending=False)['state'].to_list()[:3]:
    state_df = tdf_diff[tdf_diff['state'] == state].copy()
  
    base = alt.Chart(state_df, title=state).encode(alt.X('date', axis=alt.Axis(title='Date'))).properties(width=250, height=150)
    dailies = base.mark_bar(size=6).encode(alt.Y('positive_diff', axis=alt.Axis(title='Daily positive')))
  
    totals = base.mark_line(color='red').encode(alt.Y('total_10', axis=alt.Axis(title='Total/10')))
    positives = totals.mark_line(color='orange').encode(alt.Y('positive', axis=alt.Axis(title='Positive')))
    cumulative = totals + positives
  
    ratio = base.mark_line(color='red').encode(alt.Y('ratio', axis=alt.Axis(title='Positive/Total'), scale=alt.Scale(domain=(0,1))))
  
    charts.append(alt.layer(dailies, cumulative).resolve_scale(y='independent'))
  
 display(alt.hconcat(*charts))
 display(html_credits)
 ```
  
 %% Output
  
  
  
 %% Cell type:code id: tags:
  
 ``` python
 ```
--- a/runs/dataset_summary.run.ipynb
+++ b/runs/dataset_summary.run.ipynb