diff --git a/CHANGELOG.md b/CHANGELOG.md
index 77855346b29aa68da8099325e91c7d24d10fe18e..8fe0ddc95c99a98f9936cf66d8ab01f7e70c0a7e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,28 +1,42 @@
 # CHANGELOG
 
+### unreleased
+
+- Add notebooks showcasing accessing output of different models from different sources: (!2, [Aaron Spring](https://renkulab.io/gitlab/aaron.spring))
+    - S2S-Project models:
+        - from from European Weather Cloud:
+            - [`climetlab-s2s-ai-challenge`](https://github.com/ecmwf-lab/climetlab-s2s-ai-challenge/) [recommended], see [`climetlab-s2s-ai-challenge` notebooks](https://github.com/ecmwf-lab/climetlab-s2s-ai-challenge/tree/main/notebooks)
+            - `curl` & `wget`, see [wget_curl.ipynb](https://renkulab.io/gitlab/aaron.spring/s2s-ai-challenge-template/-/blob/master/notebooks/data_access/wget_curl.ipynb)
+            - `intake`, see [intake.ipynb](https://renkulab.io/gitlab/aaron.spring/s2s-ai-challenge-template/-/blob/master/notebooks/data_access/intake.ipynb)
+        - `IRIDL` including overview, see [IRIDL.ipynb](https://renkulab.io/gitlab/aaron.spring/s2s-ai-challenge-template/-/blob/master/notebooks/data_access/IRIDL.ipynb)
+    - SubX-Project models: `IRIDL` including overview, see [IRIDL.ipynb](https://renkulab.io/gitlab/aaron.spring/s2s-ai-challenge-template/-/blob/master/notebooks/data_access/IRIDL.ipynb)
+    - How to access password-protected S2S-Project output from IRIDL with xarray? see [IRIDL.ipynb](https://renkulab.io/gitlab/aaron.spring/s2s-ai-challenge-template/-/blob/master/notebooks/data_access/IRIDL.ipynb)
+- fix `netcdf4` version to `1.5.4` for `opendap` to work lazily with `xarray` (!2, [Aaron Spring](https://renkulab.io/gitlab/aaron.spring))
+
+
 ### 2021-05-31: `v0.2` *release*
 
 After this `v0.2` release, this CHANGELOG.md will describe all changes made in this template repository.
 
 - update `README` how to join competition, please `git pull` if you forked before
 - find status of your submission in `s2s-ai-competition-scoring-image` https://renkulab.io/gitlab/tasko.olevski/s2s-ai-competition-scoring-image/-/blob/master/README.md 
-- calculate `RPSS` with respect to climatology (not ECMWF anymore)
+- calculate `RPSS` with respect to climatology (not ECMWF anymore) ([Aaron Spring](https://renkulab.io/gitlab/aaron.spring))
     - update `RPSS_verification.ipynb`
-    - update `scorer`: https://renkulab.io/gitlab/tasko.olevski/s2s-ai-competition-scoring-image
+    - update `scorer`: https://renkulab.io/gitlab/tasko.olevski/s2s-ai-competition-scoring-image ([Tasko Olevski](https://renkulab.io/gitlab/tasko.olevski), [Aaron Spring](https://renkulab.io/gitlab/aaron.spring))
     - Averaged ECMWF RPSS skill value to beat at least: -0.0070
 
 
 ### 2021-05-26: `v0.1` *pre-release*
 
-- update `README` how to join competition !4
-- git lfs track zarr: `git lfs track "**/*.zarr/**"` !4
-- add notebooks: !4
+- update `README` how to join competition (!4, [Aaron Spring](https://renkulab.io/gitlab/aaron.spring))
+- git lfs track zarr: `git lfs track "**/*.zarr/**"` (!4, [Aaron Spring](https://renkulab.io/gitlab/aaron.spring))
+- add notebooks: (!4, [Aaron Spring](https://renkulab.io/gitlab/aaron.spring))
     - create renku datasets: `renku_datasets_biweekly.ipynb`
     - RPSS verification: `RPSS_verification.ipynb`
     - ML train and predict based on weatherbench: `ML_train_and_predict.ipynb`
     - mean bias reduction: `mean_bias_reduction.ipynb`
     - template for training and predictions: `ML_forecast_template.ipynb`
-- add renku dataset `s2s-ai-challenge` with files: !4
+- add renku dataset `s2s-ai-challenge` with files: (!4, [Aaron Spring](https://renkulab.io/gitlab/aaron.spring))
     - `hindcast-like-observations_2000-2019_biweekly_deterministic.zarr`
     - `forecast-like-observations_2020_biweekly_deterministic.zarr`
     - `hindcast-like-observations_2000-2019_biweekly_tercile-edges.nc`
@@ -31,10 +45,10 @@ After this `v0.2` release, this CHANGELOG.md will describe all changes made in t
     - `ecmwf_forecast-input_2020_biweekly_deterministic.zarr`
     - `ecmwf_hindcast-input_2000-2019_biweekly_deterministic.zarr`
     - `ecmwf_recalibrated_benchmark_2020_biweekly_terciled.nc`
-- add reproducibility section below in training !4
+- add reproducibility section below in training (!4, [Aaron Spring](https://renkulab.io/gitlab/aaron.spring))
 - how to deal with this dry mask? provide as renku dataset? now implicitly masked in categorized observations `obs_p`
-- justify if training takes more than a week !4
-- show RPS for all years. ToDo: take RPSS #4
+- justify if training takes more than a week (!4, [Aaron Spring](https://renkulab.io/gitlab/aaron.spring))
+- show RPS for all years. ~~ToDo: take RPSS~~ (!4, [Aaron Spring](https://renkulab.io/gitlab/aaron.spring))
 
 
 
diff --git a/environment.yml b/environment.yml
index b7585bc272b108595fce878b73d7c076b3312b7f..8d5ba046c10503b88d338ebe1f1b1af6334c35ae 100644
--- a/environment.yml
+++ b/environment.yml
@@ -5,27 +5,27 @@ dependencies:
   - xarray
   # ML
   - tensorflow
-  #- pytorch
+  - pytorch
   # viz
   - matplotlib-base
   # - cartopy
   # scoring
-  - xskillscore  # includes sklearn
+  - xskillscore>=0.0.20  # includes sklearn
   # data access
-  #- intake
-  #- fsspec
+  - intake
+  - fsspec
   - zarr
   - s3fs
-  #- intake-xarray
+  - intake-xarray
   - cfgrib
-  #- pydap
-  #- h5netcdf
-  # - netcdf4#==1.5.1  # see https://github.com/pydata/xarray/issues/4925
+  - nc-time-axis
+  - pydap
+  - h5netcdf
+  - netcdf4==1.5.3
   - pip
   - pip:
     - climetlab >= 0.7.0
     - climetlab_s2s_ai_challenge >= 0.6.3
     - configargparse # for weatherbench
-    - netcdf4 # ==1.5.1  # see https://github.com/pydata/xarray/issues/4925
-    - git+https://github.com/phausamann/sklearn-xarray.git@develop
+    - netcdf4==1.5.4
 prefix: "/opt/conda"
diff --git a/notebooks/data_access/EWC_catalog.yml b/notebooks/data_access/EWC_catalog.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a3af4252c9bbe49a559dbfcaf337ae6f048b1aad
--- /dev/null
+++ b/notebooks/data_access/EWC_catalog.yml
@@ -0,0 +1,122 @@
+plugins:
+  source:
+    - module: intake_xarray
+
+sources:
+  training-input:
+    description: climetlab name in AI/ML community naming for hindcasts as input to the ML-model in training period
+    driver: netcdf
+    parameters:
+      model:
+        description: name of the S2S model
+        type: str
+        default: ecmwf
+        allowed: [ecmwf, eccc, ncep]
+      param:
+        description: variable name
+        type: str
+        default: tp
+        allowed: [t2m, ci, gh, lsm, msl, q, rsn, sm100, sm20, sp, sst, st100, st20, t, tcc, tcw, ttr, tp, v, u]
+      date:
+        description: initialization weekly thursdays
+        type: datetime
+        default: 2020.01.02
+        min: 2020.01.02
+        max: 2020.12.31
+      version:
+        description: versioning of the data
+        type: str
+        default: 0.3.0
+      format:
+        description: data type
+        type: str
+        default: netcdf
+        allowed: [netcdf, grib]
+      ending:
+        description: data format compatible with format; netcdf -> nc, grib -> grib
+        type: str
+        default: nc
+        allowed: [nc, grib]
+    xarray_kwargs:
+        engine: h5netcdf
+    args: # add simplecache:: for caching: https://filesystem-spec.readthedocs.io/en/latest/features.html#caching-files-locally
+      urlpath: https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/training-input/{{version}}/{{format}}/{{model}}-hindcast-{{param}}-{{date.strftime("%Y%m%d")}}.{{ending}}
+
+  test-input:
+    description: climetlab name in AI/ML community naming for 2020 forecasts as input to ML model in test period 2020
+    driver: netcdf
+    parameters:
+      model:
+        description: name of the S2S model
+        type: str
+        default: ecmwf
+        allowed: [ecmwf, eccc, ncep]
+      param:
+        description: variable name
+        type: str
+        default: tp
+        allowed: [t2m, ci, gh, lsm, msl, q, rsn, sm100, sm20, sp, sst, st100, st20, t, tcc, tcw, ttr, tp, v, u]
+      date:
+        description: initialization weekly thursdays
+        type: datetime
+        default: 2020.01.02
+        min: 2020.01.02
+        max: 2020.12.31
+      version:
+        description: versioning of the data
+        type: str
+        default: 0.3.0
+      format:
+        description: data type
+        type: str
+        default: netcdf
+        allowed: [netcdf, grib]
+      ending:
+        description: data format compatible with format; netcdf -> nc, grib -> grib
+        type: str
+        default: nc
+        allowed: [nc, grib]
+    xarray_kwargs:
+        engine: h5netcdf
+    args: # add simplecache:: for caching: https://filesystem-spec.readthedocs.io/en/latest/features.html#caching-files-locally
+      urlpath: https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/test-input/{{version}}/{{format}}/{{model}}-forecast-{{param}}-{{date.strftime("%Y%m%d")}}.{{ending}}
+
+  training-output-reference:
+    description: climetlab name in AI/ML community naming for 2020 forecasts as output reference to compare to ML model output to in training period
+    driver: netcdf
+    parameters:
+      param:
+        description: variable name
+        type: str
+        default: tp
+        allowed: [t2m, tp]
+      date:
+        description: initialization weekly thursdays
+        type: datetime
+        default: 2020.01.02
+        min: 2020.01.02
+        max: 2020.12.31
+    xarray_kwargs:
+        engine: h5netcdf
+    args: # add simplecache:: for caching: https://filesystem-spec.readthedocs.io/en/latest/features.html#caching-files-locally
+      urlpath: https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/test-output-reference/{{param}}-{{date.strftime("%Y%m%d")}}.nc
+            
+  test-output-reference:
+    description: climetlab name in AI/ML community naming for 2020 forecasts as output reference to compare to ML model output to in test period 2020
+    driver: netcdf
+    parameters:
+      param:
+        description: variable name
+        type: str
+        default: tp
+        allowed: [t2m, tp]
+      date:
+        description: initialization weekly thursdays
+        type: datetime
+        default: 2020.01.02
+        min: 2020.01.02
+        max: 2020.12.31
+    xarray_kwargs:
+        engine: h5netcdf
+    args: # add simplecache:: for caching: https://filesystem-spec.readthedocs.io/en/latest/features.html#caching-files-locally
+      urlpath: https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/test-output-reference/{{param}}-{{date.strftime("%Y%m%d")}}.nc
diff --git a/notebooks/data_access/IRIDL.ipynb b/notebooks/data_access/IRIDL.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..f1e11dfd484a232188b6ea8696652d5d4f04d8b7
--- /dev/null
+++ b/notebooks/data_access/IRIDL.ipynb
@@ -0,0 +1,855 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Access from `iridl.ldeo.columbia.edu`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "IRI Data Library (IRIDL) hosts various subseasonal initialized forecast and hindcast simulations:\n",
+    "- `S2S project`:\n",
+    "    - http://iridl.ldeo.columbia.edu/SOURCES/.ECMWF/.S2S/\n",
+    "        - hindcast/reforecast: one variable, one model:\n",
+    "        - login required\n",
+    "- `SubX project`:\n",
+    "    - http://iridl.ldeo.columbia.edu/SOURCES/.Models/.SubX/\n",
+    "        - hindcast/reforecast: one variable, one model:\n",
+    "        - login not required\n",
+    "- Notes:\n",
+    "    - Output on IRIDL is not always on the 1.5 degree grid requested for the competition. Also dimension names and coordinates differ.\n",
+    "    - Beware that most models are not only initialized on thursdays. It is not forbidden to use simulations which are started on other weekdays, buy please pay attention that you may only use information available on `forecast_time`, i.e. if the model is initialized on Mondays, you have to use the day 14+3=17 to day 27+3=30 forecast for week 3-4.\n",
+    "---\n",
+    "This notebook also provides opendap magic, i.e. commands added to the opendap URL which preprocess data server-side. (not implemented)\n",
+    "\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here are instructions for configuring xarray to open protected Data Library datasets, after you have created a Data Library account and accepted the terms and conditions for the dataset.\n",
+    "1. Visit https://iridl.ldeo.columbia.edu/auth/genkey . Log in to the Data Library. Copy the key from the response.\n",
+    "\n",
+    "2. Create a file with the following content, substituting the key from step 1 for `\"xxxx\"`:\n",
+    "`Set-Cookie: __dlauth_id=xxxx; domain=.iridl.ldeo.columbia.edu`\n",
+    "\n",
+    "3. Put the following in `~/.daprc`, which is `/home/jovyan/.daprc` on renku, substituting the path to the above file for `/path/to/cookie/file`:\n",
+    "`HTTP.COOKIEJAR=/path/to/cookie/file`. You may need to copy `.daprc` to `/home/jovyan` on renku, because `/home/jovyan` is not tracked by `git`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Writing /work/s2s-ai-challenge-template/.daprc\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile /work/s2s-ai-challenge-template/.daprc\n",
+    "HTTP.COOKIEJAR=/work/s2s-ai-challenge-template/.cookie_iridl"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!cp /work/s2s-ai-challenge-template/.daprc /home/jovyan"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#%writefile /work/s2s-ai-challenge-template/.cookie_iridl\n",
+    "#Set-Cookie: __dlauth_id=xxxx; domain=.iridl.ldeo.columbia.edu"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Writing /work/s2s-ai-challenge-template/.cookie_iridl\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile /work/s2s-ai-challenge-template/.cookie_iridl\n",
+    "Set-Cookie: __dlauth_id=6d3f0d342e1bdd448b287481f6d7989673305eeba2fa65fabb2709e2d76101b21ae816ffe0560b1a25ed3c8d0bf8884eab7d4bc2; domain=.iridl.ldeo.columbia.edu"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.8/site-packages/xarray/backends/cfgrib_.py:27: UserWarning: Failed to load cfgrib - most likely there is a problem accessing the ecCodes library. Try `import cfgrib` to get the full error message\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<xarray.core.options.set_options at 0x7efe3cb51fd0>"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import xarray as xr\n",
+    "xr.set_options(display_style='text')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Please beawre that most models are not only initialized on thursdays.\n",
+    "It is not forbidden to use simulations which are started on other weekdays,\n",
+    "buy please pay attention that you may only use information available on `forecast_time`,\n",
+    "i.e. if the model is initialized on Mondays, you have to use the day 14+3=17 to day 27+3=30 forecast for week 3-4."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.8/site-packages/xarray/backends/plugins.py:61: RuntimeWarning: Engine 'cfgrib' loading failed:\n",
+      "/opt/conda/lib/python3.8/site-packages/gribapi/_bindings.cpython-38-x86_64-linux-gnu.so: undefined symbol: codes_bufr_key_is_header\n",
+      "  warnings.warn(f\"Engine {name!r} loading failed:\\n{ex}\", RuntimeWarning)\n"
+     ]
+    }
+   ],
+   "source": [
+    "ds = xr.open_dataset('https://iridl.ldeo.columbia.edu/SOURCES/.ECMWF/.S2S/.ECMF/.reforecast/.control/.2m_above_ground/.2t/dods',\n",
+    "                     chunks='auto', decode_times=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# calendar '360' not recognized, but '360_day'\n",
+    "if ds.hdate.attrs['calendar'] == '360':\n",
+    "    ds.hdate.attrs['calendar'] = '360_day'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre>&lt;xarray.DataArray &#x27;t2m&#x27; (hdate: 26, forecast_time: 637, lead_time: 46, latitude: 121, longitude: 240)&gt;\n",
+       "dask.array&lt;open_dataset-f89df07098f6ce22c120a08e3f3f29a52t, shape=(26, 637, 46, 121, 240), dtype=float32, chunksize=(8, 91, 15, 46, 60), chunktype=numpy.ndarray&gt;\n",
+       "Coordinates:\n",
+       "  * latitude       (latitude) float32 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
+       "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 45 days 12...\n",
+       "  * hdate          (hdate) object 1995-07-01 00:00:00 ... 2020-07-01 00:00:00\n",
+       "  * forecast_time  (forecast_time) datetime64[ns] 2015-05-14 ... 2021-06-17\n",
+       "  * longitude      (longitude) float32 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5\n",
+       "Attributes:\n",
+       "    pointwidth:      0\n",
+       "    gribPDSpattern:  04XXXX003D0000\n",
+       "    long_name:       2-meter Temperature\n",
+       "    units:           K\n",
+       "    standard_name:   air_temperature</pre>"
+      ],
+      "text/plain": [
+       "<xarray.DataArray 't2m' (hdate: 26, forecast_time: 637, lead_time: 46, latitude: 121, longitude: 240)>\n",
+       "dask.array<open_dataset-f89df07098f6ce22c120a08e3f3f29a52t, shape=(26, 637, 46, 121, 240), dtype=float32, chunksize=(8, 91, 15, 46, 60), chunktype=numpy.ndarray>\n",
+       "Coordinates:\n",
+       "  * latitude       (latitude) float32 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
+       "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 45 days 12...\n",
+       "  * hdate          (hdate) object 1995-07-01 00:00:00 ... 2020-07-01 00:00:00\n",
+       "  * forecast_time  (forecast_time) datetime64[ns] 2015-05-14 ... 2021-06-17\n",
+       "  * longitude      (longitude) float32 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5\n",
+       "Attributes:\n",
+       "    pointwidth:      0\n",
+       "    gribPDSpattern:  04XXXX003D0000\n",
+       "    long_name:       2-meter Temperature\n",
+       "    units:           K\n",
+       "    standard_name:   air_temperature"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ds = xr.decode_cf(ds).rename({'X':'longitude', 'Y':'latitude', 'S':'forecast_time', 'LA': 'lead_time', '2t':'t2m'})\n",
+    "ds['t2m']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(88.496735436, 'GB')"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ds.nbytes/1e9,'GB'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# hdate gets the privous years reforecast for that dayofyear"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Hindcast Availability\n",
+    "\n",
+    "- BOM:\tBoM POAMA Ensemble.\n",
+    "- CMA:\tBeijing Climate Center (BCC) Climate Prediction System for S2S.\n",
+    "- CNRM:\tCNRM Ensemble Prediction System.\n",
+    "- ECCC:\tECCC Ensemble Prediction System.\n",
+    "- ECMF:\tECMWF Ensemble.\n",
+    "- HMCR:\tHMCR Ensemble.\n",
+    "- ISAC:\tISAC-CNR Ensemble.\n",
+    "- JMA:\tJMA Ensemble System.\n",
+    "- KMA:\tKMA Seasonal Prediction System.\n",
+    "- NCEP:\tNCEP CFSv2 Ensemble.\n",
+    "- UKMO:\tUKMO Ensemble Prediction System."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "BOM not on-the-fly forecast_time freq not found \n",
+      " Coordinates:\n",
+      "  * latitude       (latitude) float32 88.1 85.64 83.16 ... -83.16 -85.64 -88.1\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 61 days 12...\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 1981-01-01 ... 2013-12-26\n",
+      "  * realization    (realization) float32 1.0 2.0 3.0 4.0 ... 29.0 30.0 31.0 32.0\n",
+      "  * longitude      (longitude) float32 0.0 2.507 5.014 ... 353.5 356.0 358.5 \n",
+      " Frozen(SortedKeysDict({'latitude': 72, 'lead_time': 62, 'forecast_time': 2376, 'realization': 32, 'longitude': 144})) 195.498364944 GB \n",
+      "\n",
+      "CNRM not on-the-fly forecast_time freq not found \n",
+      " Coordinates:\n",
+      "  * latitude       (latitude) float32 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 60 days 12...\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 1993-01-01 ... 2014-12-15\n",
+      "  * realization    (realization) float32 1.0 2.0 3.0 4.0 ... 11.0 12.0 13.0 14.0\n",
+      "  * longitude      (longitude) float32 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5 \n",
+      " Frozen(SortedKeysDict({'latitude': 121, 'lead_time': 61, 'forecast_time': 528, 'realization': 14, 'longitude': 240})) 52.377944132 GB \n",
+      "\n",
+      "ECCC on-the-fly forecast_time freq:W-THU \n",
+      " Coordinates:\n",
+      "  * latitude       (latitude) float32 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
+      "  * hdate          (hdate) object 1995-07-01 00:00:00 ... 2017-07-01 00:00:00\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 2016-01-07 ... 2021-06-03\n",
+      "  * realization    (realization) float32 1.0 2.0 3.0\n",
+      "  * longitude      (longitude) float32 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 31 days 12... \n",
+      " Frozen(SortedKeysDict({'latitude': 121, 'hdate': 23, 'forecast_time': 283, 'realization': 3, 'longitude': 240, 'lead_time': 32})) 72.5842064 GB \n",
+      "\n",
+      "ECMF on-the-fly forecast_time freq not found \n",
+      " Coordinates:\n",
+      "  * latitude       (latitude) float32 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 45 days 12...\n",
+      "  * hdate          (hdate) object 1995-07-01 00:00:00 ... 2020-07-01 00:00:00\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 2015-05-14 ... 2021-06-17\n",
+      "  * realization    (realization) float32 1.0 2.0 3.0 4.0 ... 7.0 8.0 9.0 10.0\n",
+      "  * longitude      (longitude) float32 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5 \n",
+      " Frozen(SortedKeysDict({'latitude': 121, 'lead_time': 46, 'hdate': 26, 'forecast_time': 637, 'realization': 10, 'longitude': 240})) 884.967290356 GB \n",
+      "\n",
+      "HMCR on-the-fly forecast_time freq not found \n",
+      " Coordinates:\n",
+      "  * latitude       (latitude) float32 90.0 87.5 85.0 82.5 ... -85.0 -87.5 -90.0\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 60 days 12...\n",
+      "  * hdate          (hdate) object 1985-07-01 00:00:00 ... 2010-07-01 00:00:00\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 2015-01-07 ... 2021-06-03\n",
+      "  * realization    (realization) float32 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0\n",
+      "  * longitude      (longitude) float32 0.0 2.507 5.014 ... 353.5 356.0 358.5 \n",
+      " Frozen(SortedKeysDict({'latitude': 73, 'lead_time': 61, 'hdate': 26, 'forecast_time': 335, 'realization': 9, 'longitude': 144})) 201.0647102 GB \n",
+      "\n",
+      "model=ISAC failed due to OSError: [Errno -90] NetCDF: file not found: b'https://iridl.ldeo.columbia.edu/SOURCES/.ECMWF/.S2S/.ISAC/.reforecast/.perturbed/.2m_above_ground/.2t/dods' \n",
+      "\n",
+      "JMA not on-the-fly forecast_time freq:D \n",
+      " Coordinates:\n",
+      "  * latitude       (latitude) float32 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 1981-01-10T12:00:00 ... 201...\n",
+      "  * realization    (realization) float32 1.0 2.0 3.0 4.0\n",
+      "  * longitude      (longitude) float32 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 1 days 2 days ... 32 days 33 days \n",
+      " Frozen(SortedKeysDict({'latitude': 121, 'forecast_time': 10948, 'realization': 4, 'longitude': 240, 'lead_time': 33})) 167.867087068 GB \n",
+      "\n",
+      "KMA on-the-fly forecast_time freq:D \n",
+      " Coordinates:\n",
+      "  * latitude       (latitude) float32 90.0 87.5 85.0 82.5 ... -85.0 -87.5 -90.0\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 59 days 12...\n",
+      "  * hdate          (hdate) object 1991-07-01 00:00:00 ... 2010-07-01 00:00:00\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 2016-11-01 ... 2021-06-01\n",
+      "  * realization    (realization) float32 1.0 2.0\n",
+      "  * longitude      (longitude) float32 0.0 2.507 5.014 ... 353.5 356.0 358.5 \n",
+      " Frozen(SortedKeysDict({'latitude': 73, 'lead_time': 60, 'hdate': 20, 'forecast_time': 1674, 'realization': 2, 'longitude': 144})) 168.932059708 GB \n",
+      "\n",
+      "NCEP not on-the-fly forecast_time freq:D \n",
+      " Coordinates:\n",
+      "  * latitude       (latitude) float32 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 43 days 12...\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 1999-01-01 ... 2010-12-31\n",
+      "  * realization    (realization) float32 1.0 2.0 3.0\n",
+      "  * longitude      (longitude) float32 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5 \n",
+      " Frozen(SortedKeysDict({'latitude': 121, 'lead_time': 44, 'forecast_time': 4383, 'realization': 3, 'longitude': 240})) 67.205101832 GB \n",
+      "\n",
+      "UKMO on-the-fly forecast_time freq not found \n",
+      " Coordinates:\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 59 days 12...\n",
+      "  * latitude       (latitude) float32 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 2016-01-01 ... 2019-05-09\n",
+      "  * realization    (realization) float32 1.0 2.0\n",
+      "  * longitude      (longitude) float32 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5\n",
+      "  * hdate          (hdate) object 1993-07-01 00:00:00 ... 2015-07-01 00:00:00 \n",
+      " Frozen(SortedKeysDict({'lead_time': 60, 'latitude': 121, 'forecast_time': 162, 'realization': 2, 'longitude': 240, 'hdate': 23})) 51.937462612 GB \n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "models = ['BOM','CNRM','ECCC','ECMF','HMCR','ISAC','JMA','KMA','NCEP','UKMO']\n",
+    "for model in models:\n",
+    "    try:\n",
+    "        ds = xr.open_dataset(f'https://iridl.ldeo.columbia.edu/SOURCES/.ECMWF/.S2S/.{model}/.reforecast/.perturbed/.2m_above_ground/.2t/dods',\n",
+    "                             chunks='auto', decode_times=False).rename({'S':'forecast_time', 'LA':'lead_time','M':'realization', 'X':'longitude', 'Y':'latitude'})\n",
+    "        # calendar '360' not recognized, but '360_day'\n",
+    "        for c in ['hdate','forecast_time']:\n",
+    "            if c in ds.coords:\n",
+    "                if ds[c].attrs['calendar'] == '360':\n",
+    "                    ds[c].attrs['calendar'] = '360_day'\n",
+    "        ds = xr.decode_cf(ds)\n",
+    "        onthefly = True if 'hdate' in ds.coords else False\n",
+    "        forecast_time_freq = xr.infer_freq(ds.forecast_time)\n",
+    "        print(model, 'on-the-fly' if onthefly else 'not on-the-fly',\n",
+    "              'forecast_time freq:'+forecast_time_freq if forecast_time_freq else 'forecast_time freq not found',\n",
+    "              '\\n',ds.coords,'\\n',ds.sizes,ds.nbytes/1e9,'GB','\\n')\n",
+    "    except Exception as e:\n",
+    "        print(f'model={model} failed due to {type(e).__name__}: {e} \\n')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# SubX"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The access to output from the SubX project does not require login information via cookie."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds = xr.open_dataset('http://iridl.ldeo.columbia.edu/SOURCES/.Models/.SubX/.CESM/.30LCESM1/.hindcast/.tas/dods',\n",
+    "                     chunks='auto', decode_times=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# calendar '360' not recognized, but '360_day'\n",
+    "if ds.S.attrs['calendar'] == '360':\n",
+    "    ds.S.attrs['calendar'] = '360_day'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre>&lt;xarray.DataArray &#x27;t2m&#x27; (forecast_time: 887, realization: 10, lead_time: 45, latitude: 181, longitude: 360)&gt;\n",
+       "dask.array&lt;open_dataset-1bd5755a82e148fd83330ea4db46cbb8tas, shape=(887, 10, 45, 181, 360), dtype=float32, chunksize=(335, 2, 9, 61, 90), chunktype=numpy.ndarray&gt;\n",
+       "Coordinates:\n",
+       "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 44 days 12...\n",
+       "  * latitude       (latitude) float32 -90.0 -89.0 -88.0 -87.0 ... 88.0 89.0 90.0\n",
+       "  * forecast_time  (forecast_time) datetime64[ns] 1999-01-06 ... 2015-12-30\n",
+       "  * realization    (realization) float32 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0\n",
+       "  * longitude      (longitude) float32 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0\n",
+       "Attributes:\n",
+       "    pointwidth:     0.0\n",
+       "    standard_name:  air_temperature\n",
+       "    long_name:      2-meter Air Temperature\n",
+       "    level_type:     2 meters above ground\n",
+       "    cell_methods:   time: mean\n",
+       "    units:          Kelvin_scale</pre>"
+      ],
+      "text/plain": [
+       "<xarray.DataArray 't2m' (forecast_time: 887, realization: 10, lead_time: 45, latitude: 181, longitude: 360)>\n",
+       "dask.array<open_dataset-1bd5755a82e148fd83330ea4db46cbb8tas, shape=(887, 10, 45, 181, 360), dtype=float32, chunksize=(335, 2, 9, 61, 90), chunktype=numpy.ndarray>\n",
+       "Coordinates:\n",
+       "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 44 days 12...\n",
+       "  * latitude       (latitude) float32 -90.0 -89.0 -88.0 -87.0 ... 88.0 89.0 90.0\n",
+       "  * forecast_time  (forecast_time) datetime64[ns] 1999-01-06 ... 2015-12-30\n",
+       "  * realization    (realization) float32 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0\n",
+       "  * longitude      (longitude) float32 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0\n",
+       "Attributes:\n",
+       "    pointwidth:     0.0\n",
+       "    standard_name:  air_temperature\n",
+       "    long_name:      2-meter Air Temperature\n",
+       "    level_type:     2 meters above ground\n",
+       "    cell_methods:   time: mean\n",
+       "    units:          Kelvin_scale"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ds = xr.decode_cf(ds).rename({'X':'longitude', 'Y':'latitude', 'S':'forecast_time', 'L': 'lead_time', 'M':'realization', 'tas':'t2m'})\n",
+    "ds['t2m']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(104.03446566, 'GB')"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ds.nbytes/1e9,'GB'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Hindcast Availability"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- center: model\n",
+    "- CESM:\t30LCESM1 46LCESM1\n",
+    "- ECCC:\tGEM GEPS6 GEPS5\n",
+    "- EMC:\tGEFS GEFSv12\n",
+    "- ESRL:\tFIMr1p1\n",
+    "- GMAO:\tGEOS_V2p1\n",
+    "- NCEP:\tCFSv2\n",
+    "- NRL:\tNESM\n",
+    "- RSMAS:\tCCSM4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "30LCESM1 not on-the-fly forecast_time freq:W-WED \n",
+      " Coordinates:\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 44 days 12...\n",
+      "  * latitude       (latitude) float32 -90.0 -89.0 -88.0 -87.0 ... 88.0 89.0 90.0\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 1999-01-06 ... 2015-12-30\n",
+      "  * realization    (realization) float32 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0\n",
+      "  * longitude      (longitude) float32 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0 \n",
+      " Frozen(SortedKeysDict({'lead_time': 45, 'latitude': 181, 'forecast_time': 887, 'realization': 10, 'longitude': 360})) 104.03446566 GB \n",
+      "\n",
+      "46LCESM1 not on-the-fly forecast_time freq:W-WED \n",
+      " Coordinates:\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 44 days 12...\n",
+      "  * latitude       (latitude) float32 -90.0 -89.0 -88.0 -87.0 ... 88.0 89.0 90.0\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 1999-01-06 ... 2015-12-30\n",
+      "  * realization    (realization) float32 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0\n",
+      "  * longitude      (longitude) float32 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0 \n",
+      " Frozen(SortedKeysDict({'lead_time': 45, 'latitude': 181, 'forecast_time': 887, 'realization': 10, 'longitude': 360})) 104.03446566 GB \n",
+      "\n",
+      "GEM not on-the-fly forecast_time freq:D \n",
+      " Coordinates:\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 31 days 12...\n",
+      "  * latitude       (latitude) float32 -90.0 -89.0 -88.0 -87.0 ... 88.0 89.0 90.0\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 1995-01-04 ... 2014-12-28\n",
+      "  * realization    (realization) float32 1.0 2.0 3.0 4.0\n",
+      "  * longitude      (longitude) float32 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0 \n",
+      " Frozen(SortedKeysDict({'lead_time': 32, 'latitude': 181, 'forecast_time': 7299, 'realization': 4, 'longitude': 360})) 243.508714908 GB \n",
+      "\n",
+      "GEPS6 not on-the-fly forecast_time freq:D \n",
+      " Coordinates:\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 31 days 12...\n",
+      "  * latitude       (latitude) float32 -90.0 -89.0 -88.0 -87.0 ... 88.0 89.0 90.0\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 1998-01-03 ... 2017-12-27\n",
+      "  * realization    (realization) float32 1.0 2.0 3.0 4.0\n",
+      "  * longitude      (longitude) float32 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0 \n",
+      " Frozen(SortedKeysDict({'lead_time': 32, 'latitude': 181, 'forecast_time': 7299, 'realization': 4, 'longitude': 360})) 243.508714908 GB \n",
+      "\n",
+      "GEPS5 not on-the-fly forecast_time freq:D \n",
+      " Coordinates:\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 31 days 12...\n",
+      "  * latitude       (latitude) float32 -90.0 -89.0 -88.0 -87.0 ... 88.0 89.0 90.0\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 1998-01-03 ... 2017-12-27\n",
+      "  * realization    (realization) float32 1.0 2.0 3.0 4.0\n",
+      "  * longitude      (longitude) float32 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0 \n",
+      " Frozen(SortedKeysDict({'lead_time': 32, 'latitude': 181, 'forecast_time': 7299, 'realization': 4, 'longitude': 360})) 243.508714908 GB \n",
+      "\n",
+      "GEFS not on-the-fly forecast_time freq:W-WED \n",
+      " Coordinates:\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 34 days 12...\n",
+      "  * latitude       (latitude) float32 90.0 89.0 88.0 87.0 ... -88.0 -89.0 -90.0\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 1999-01-06 ... 2016-12-28\n",
+      "  * realization    (realization) float32 0.0 1.0 2.0 3.0 ... 7.0 8.0 9.0 10.0\n",
+      "  * longitude      (longitude) float32 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0 \n",
+      " Frozen(SortedKeysDict({'lead_time': 35, 'latitude': 181, 'forecast_time': 939, 'realization': 11, 'longitude': 360})) 94.2252796 GB \n",
+      "\n",
+      "center=EMC model=GEFSv12 failed due to OSError: [Errno -90] NetCDF: file not found: b'https://iridl.ldeo.columbia.edu/SOURCES/.Models/.SubX/.EMC/.GEFSv12/.hindcast/.tas/dods' \n",
+      "\n",
+      "FIMr1p1 not on-the-fly forecast_time freq:W-WED \n",
+      " Coordinates:\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 31 days 12...\n",
+      "  * latitude       (latitude) float32 -90.0 -89.0 -88.0 -87.0 ... 88.0 89.0 90.0\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 1999-01-06 ... 2017-06-28\n",
+      "  * realization    (realization) float32 1.0 2.0 3.0 4.0\n",
+      "  * longitude      (longitude) float32 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0 \n",
+      " Frozen(SortedKeysDict({'lead_time': 32, 'latitude': 181, 'forecast_time': 965, 'realization': 4, 'longitude': 360})) 32.194262956 GB \n",
+      "\n",
+      "GEOS_V2p1 not on-the-fly forecast_time freq:D \n",
+      " Coordinates:\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 44 days 12...\n",
+      "  * latitude       (latitude) float32 -90.0 -89.0 -88.0 -87.0 ... 88.0 89.0 90.0\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 1999-01-01 ... 2016-12-27\n",
+      "  * realization    (realization) float32 1.0 2.0 3.0 4.0\n",
+      "  * longitude      (longitude) float32 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0 \n",
+      " Frozen(SortedKeysDict({'lead_time': 45, 'latitude': 181, 'forecast_time': 6571, 'realization': 4, 'longitude': 360})) 308.279834308 GB \n",
+      "\n",
+      "CFSv2 not on-the-fly forecast_time freq:6H \n",
+      " Coordinates:\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 43 days 12...\n",
+      "  * latitude       (latitude) float32 90.0 89.0 88.0 87.0 ... -88.0 -89.0 -90.0\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 1999-01-01 ... 2017-09-30\n",
+      "  * realization    (realization) int32 1\n",
+      "  * longitude      (longitude) float32 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0 \n",
+      " Frozen(SortedKeysDict({'lead_time': 44, 'latitude': 181, 'forecast_time': 27389, 'realization': 1, 'longitude': 360})) 314.101655872 GB \n",
+      "\n",
+      "NESM not on-the-fly forecast_time freq:D \n",
+      " Coordinates:\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 44 days 12...\n",
+      "  * latitude       (latitude) float32 -90.0 -89.0 -88.0 -87.0 ... 88.0 89.0 90.0\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 1999-01-02T12:00:00 ... 201...\n",
+      "  * realization    (realization) int32 1\n",
+      "  * longitude      (longitude) float32 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0 \n",
+      " Frozen(SortedKeysDict({'lead_time': 45, 'latitude': 181, 'forecast_time': 6574, 'realization': 1, 'longitude': 360})) 77.10518632 GB \n",
+      "\n",
+      "CCSM4 not on-the-fly forecast_time freq:D \n",
+      " Coordinates:\n",
+      "  * lead_time      (lead_time) timedelta64[ns] 0 days 12:00:00 ... 44 days 12...\n",
+      "  * latitude       (latitude) float32 -90.0 -89.0 -88.0 -87.0 ... 88.0 89.0 90.0\n",
+      "  * forecast_time  (forecast_time) datetime64[ns] 1999-01-07 ... 2016-12-31\n",
+      "  * realization    (realization) float32 1.0 2.0 3.0\n",
+      "  * longitude      (longitude) float32 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0 \n",
+      " Frozen(SortedKeysDict({'lead_time': 45, 'latitude': 181, 'forecast_time': 6569, 'realization': 3, 'longitude': 360})) 231.139516688 GB \n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "centers = ['CESM',   'CESM',  'ECCC', 'ECCC', 'ECCC', 'EMC', 'EMC',   'ESRL',    'GMAO'    , 'NCEP', 'NRL','RSMAS']\n",
+    "models = ['30LCESM1','46LCESM1','GEM','GEPS6','GEPS5','GEFS','GEFSv12','FIMr1p1','GEOS_V2p1','CFSv2','NESM','CCSM4']\n",
+    "for center,model in zip(centers,models):\n",
+    "    try:\n",
+    "        ds = xr.open_dataset(f'https://iridl.ldeo.columbia.edu/SOURCES/.Models/.SubX/.{center}/.{model}/.hindcast/.tas/dods',\n",
+    "                             chunks='auto', decode_times=False).rename({'S':'forecast_time', 'L':'lead_time','M':'realization', 'X':'longitude', 'Y':'latitude'})\n",
+    "        # calendar '360' not recognized, but '360_day'\n",
+    "        for c in ['hdate','forecast_time']:\n",
+    "            if c in ds.coords:\n",
+    "                if ds[c].attrs['calendar'] == '360':\n",
+    "                    ds[c].attrs['calendar'] = '360_day'\n",
+    "        ds = xr.decode_cf(ds)\n",
+    "        onthefly = True if 'hdate' in ds.coords else False\n",
+    "        forecast_time_freq = xr.infer_freq(ds.forecast_time)\n",
+    "        print(model, 'on-the-fly' if onthefly else 'not on-the-fly',\n",
+    "              'forecast_time freq:'+forecast_time_freq if forecast_time_freq else 'forecast_time freq not found',\n",
+    "              '\\n',ds.coords,'\\n',ds.sizes,ds.nbytes/1e9,'GB','\\n')\n",
+    "    except Exception as e:\n",
+    "        print(f'center={center} model={model} failed due to {type(e).__name__}: {e} \\n')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Opendap magic\n",
+    "\n",
+    "Opendap URLs be appended for server-side preprocessing.\n",
+    "\n",
+    "- https://www.opendap.org/support\n",
+    "- http://iridl.ldeo.columbia.edu/dochelp/topics/DODS/fnlist.html\n",
+    "- https://iridl.ldeo.columbia.edu/dochelp/Documentation/funcindex.html?Set-Language=en"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from subprocess import call\n",
+    "fname = 'GEFS_pra_hc.nc'\n",
+    "# endless magic commands selecting week 3-4 and aggregating pr to tp with unit conversion\n",
+    "dset_url = 'http://iridl.ldeo.columbia.edu/SOURCES/.Models/.SubX/.EMC/.GEFS/.hindcast/.pr/S/(0000%206%20Jan%201999)/(0000%2028%20Dec%202015)/RANGEEDGES/S/(days%20since%201999-01-01)/streamgridunitconvert/Y/1/20/RANGE/X/-20/10/RANGE/L/(14)/(28)/RANGEEDGES/%5BL%5Daverage/S/(Jun-Aug)/VALUES/SOURCES/.Models/.SubX/.EMC/.GEFS/.hindcast/.dc9915/.pr/Y/1/20/RANGE/X/-20/10/RANGE/L/(14)/(28)/RANGEEDGES/%5BL%5Daverage/S/to366daysample/%5BYR%5Daverage/S/sampleDOY/sub/c%3A/0.001/(m3%20kg-1)/%3Ac/mul/c%3A/1000/(mm%20m-1)/%3Ac/mul/c%3A/86400/(s%20day-1)/%3Ac/mul/c%3A/7.0//units//days/def/%3Ac/mul/data.nc'\n",
+    "# download data with curl\n",
+    "call(['curl','-k',dset_url, '-o',fname])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre>&lt;xarray.Dataset&gt;\n",
+       "Dimensions:        (forecast_time: 226, latitude: 20, longitude: 31, realization: 11)\n",
+       "Coordinates:\n",
+       "  * latitude       (latitude) float32 1.0 2.0 3.0 4.0 ... 17.0 18.0 19.0 20.0\n",
+       "  * forecast_time  (forecast_time) datetime64[ns] 1999-06-02 ... 2015-08-26\n",
+       "  * realization    (realization) float32 0.0 1.0 2.0 3.0 ... 7.0 8.0 9.0 10.0\n",
+       "  * longitude      (longitude) float32 -20.0 -19.0 -18.0 -17.0 ... 8.0 9.0 10.0\n",
+       "    lead_time      timedelta64[ns] 14 days\n",
+       "Data variables:\n",
+       "    tp             (realization, forecast_time, latitude, longitude) float64 ...</pre>"
+      ],
+      "text/plain": [
+       "<xarray.Dataset>\n",
+       "Dimensions:        (forecast_time: 226, latitude: 20, longitude: 31, realization: 11)\n",
+       "Coordinates:\n",
+       "  * latitude       (latitude) float32 1.0 2.0 3.0 4.0 ... 17.0 18.0 19.0 20.0\n",
+       "  * forecast_time  (forecast_time) datetime64[ns] 1999-06-02 ... 2015-08-26\n",
+       "  * realization    (realization) float32 0.0 1.0 2.0 3.0 ... 7.0 8.0 9.0 10.0\n",
+       "  * longitude      (longitude) float32 -20.0 -19.0 -18.0 -17.0 ... 8.0 9.0 10.0\n",
+       "    lead_time      timedelta64[ns] 14 days\n",
+       "Data variables:\n",
+       "    tp             (realization, forecast_time, latitude, longitude) float64 ..."
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "ds = xr.open_dataset(fname).rename({'X':'longitude', 'Y':'latitude', 'S':'forecast_time', 'M':'realization', 'aprod':'tp'}).assign_coords(lead_time=pd.Timedelta('14 d'))\n",
+    "ds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "5.080404184 GB\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre>&lt;xarray.DataArray &#x27;tp&#x27; (forecast_time: 886, realization: 11, latitude: 181, longitude: 360)&gt;\n",
+       "dask.array&lt;open_dataset-187ec1ad5a15edadd11711d7cbe1f114pr, shape=(886, 11, 181, 360), dtype=float64, chunksize=(423, 4, 82, 120), chunktype=numpy.ndarray&gt;\n",
+       "Coordinates:\n",
+       "  * latitude       (latitude) float32 90.0 89.0 88.0 87.0 ... -88.0 -89.0 -90.0\n",
+       "  * forecast_time  (forecast_time) datetime64[ns] 2000-01-12 ... 2016-12-28\n",
+       "  * longitude      (longitude) float32 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0\n",
+       "  * realization    (realization) float32 0.0 1.0 2.0 3.0 ... 7.0 8.0 9.0 10.0\n",
+       "    lead_time      timedelta64[ns] 14 days\n",
+       "Attributes: (12/13)\n",
+       "    pointwidth:                                     0.0\n",
+       "    standard_name:                                  precipitation_flux\n",
+       "    parameter_template_discipline_category_number:  1\\n0\\n1\\n8\n",
+       "    parameter_discipline_and_category:              Meteorological products, ...\n",
+       "    long_name:                                      Total Precipitation\n",
+       "    grid_type:                                      Latitude/longitude\n",
+       "    ...                                             ...\n",
+       "    level:                                          0.0\n",
+       "    center:                                         US National Weather Servi...\n",
+       "    production_status:                              Operational products\n",
+       "    level_type:                                     surface\n",
+       "    file_missing_value:                             1e+20\n",
+       "    history:                                        Averaged over L[14.5 days...</pre>"
+      ],
+      "text/plain": [
+       "<xarray.DataArray 'tp' (forecast_time: 886, realization: 11, latitude: 181, longitude: 360)>\n",
+       "dask.array<open_dataset-187ec1ad5a15edadd11711d7cbe1f114pr, shape=(886, 11, 181, 360), dtype=float64, chunksize=(423, 4, 82, 120), chunktype=numpy.ndarray>\n",
+       "Coordinates:\n",
+       "  * latitude       (latitude) float32 90.0 89.0 88.0 87.0 ... -88.0 -89.0 -90.0\n",
+       "  * forecast_time  (forecast_time) datetime64[ns] 2000-01-12 ... 2016-12-28\n",
+       "  * longitude      (longitude) float32 0.0 1.0 2.0 3.0 ... 357.0 358.0 359.0\n",
+       "  * realization    (realization) float32 0.0 1.0 2.0 3.0 ... 7.0 8.0 9.0 10.0\n",
+       "    lead_time      timedelta64[ns] 14 days\n",
+       "Attributes: (12/13)\n",
+       "    pointwidth:                                     0.0\n",
+       "    standard_name:                                  precipitation_flux\n",
+       "    parameter_template_discipline_category_number:  1\\n0\\n1\\n8\n",
+       "    parameter_discipline_and_category:              Meteorological products, ...\n",
+       "    long_name:                                      Total Precipitation\n",
+       "    grid_type:                                      Latitude/longitude\n",
+       "    ...                                             ...\n",
+       "    level:                                          0.0\n",
+       "    center:                                         US National Weather Servi...\n",
+       "    production_status:                              Operational products\n",
+       "    level_type:                                     surface\n",
+       "    file_missing_value:                             1e+20\n",
+       "    history:                                        Averaged over L[14.5 days..."
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# aggregate w34 precip to tp\n",
+    "ds = xr.open_dataset('http://iridl.ldeo.columbia.edu/SOURCES/.Models/.SubX/.EMC/.GEFS/.hindcast/.pr/S/(0000%206%20Jan%202000)/(0000%2031%20Dec%202019)/RANGEEDGES/L/(14)/(28)/RANGEEDGES/[L]sum/dods',\n",
+    "                     chunks='auto').rename({'X':'longitude', 'Y':'latitude', 'S':'forecast_time', 'M':'realization', 'pr':'tp'}).assign_coords(lead_time=pd.Timedelta('14 d'))\n",
+    "print(ds.nbytes/1e9,'GB')\n",
+    "ds.tp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.6"
+  },
+  "toc-autonumbering": true
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/data_access/README.md b/notebooks/data_access/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..764b6d4c9cac786d14a620b599edf956671a7d9b
--- /dev/null
+++ b/notebooks/data_access/README.md
@@ -0,0 +1,13 @@
+# Data Access
+
+- European Weather Cloud:
+    - [`climetlab-s2s-ai-challenge`](https://github.com/ecmwf-lab/climetlab-s2s-ai-challenge)
+    - `wget`: wget_curl.ipynb
+    - `curl`: wget_curl.ipynb
+    - `mouse`: wget_curl.ipynb
+    - `intake`: intake.ipynb
+- [IRI Data Library](iridl.ldeo.columbia.edu/): IRIDL.ipynb
+    - S2S: http://iridl.ldeo.columbia.edu/SOURCES/.ECMWF/.S2S/ (restricted access explained in IRIDL.ipynb)
+    - SubX: http://iridl.ldeo.columbia.edu/SOURCES/.Models/.SubX/
+    - NMME: http://iridl.ldeo.columbia.edu/SOURCES/.Models/.NMME/
+- s2sprediction.net
diff --git a/notebooks/data_access/intake.ipynb b/notebooks/data_access/intake.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..1fa6e099171f84a2a3521758fd374eb92b120edd
--- /dev/null
+++ b/notebooks/data_access/intake.ipynb
@@ -0,0 +1,565 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Data Access from EWC via `intake`\n",
+    "\n",
+    "Data easily available via `climetlab`: https://github.com/ecmwf-lab/climetlab-s2s-ai-challenge\n",
+    "Data holdings listed: https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/test-input/0.3.0/netcdf/index.html\n",
+    "\n",
+    "Therefore, S3 data also accessible with `intake-xarray` and cachable with `fsspec`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.8/site-packages/xarray/backends/cfgrib_.py:27: UserWarning: Failed to load cfgrib - most likely there is a problem accessing the ecCodes library. Try `import cfgrib` to get the full error message\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<xarray.core.options.set_options at 0x7fa0100dcdc0>"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import intake\n",
+    "import fsspec\n",
+    "import xarray as xr\n",
+    "import os, glob\n",
+    "import pandas as pd\n",
+    "xr.set_options(display_style='text')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# prevent aihttp timeout errors\n",
+    "\n",
+    "from aiohttp import ClientSession, ClientTimeout\n",
+    "timeout = ClientTimeout(total=600)\n",
+    "fsspec.config.conf['https'] = dict(client_kwargs={'timeout': timeout})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# intake\n",
+    "\n",
+    "https://github.com/intake/intake-xarray can read and cache `grib` and `netcdf` from catalogs.\n",
+    "\n",
+    "Caching via `fsspec`: https://filesystem-spec.readthedocs.io/en/latest/features.html#caching-files-locally"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import intake_xarray\n",
+    "cache_path = '/work/s2s-ai-challenge-template/data/cache'\n",
+    "fsspec.config.conf['simplecache'] = {'cache_storage': cache_path, 'same_names':True}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Writing EWC_catalog.yml\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile EWC_catalog.yml\n",
+    "plugins:\n",
+    "  source:\n",
+    "    - module: intake_xarray\n",
+    "\n",
+    "sources:\n",
+    "  training-input:\n",
+    "    description: climetlab name in AI/ML community naming for hindcasts as input to the ML-model in training period\n",
+    "    driver: netcdf\n",
+    "    parameters:\n",
+    "      model:\n",
+    "        description: name of the S2S model\n",
+    "        type: str\n",
+    "        default: ecmwf\n",
+    "        allowed: [ecmwf, eccc, ncep]\n",
+    "      param:\n",
+    "        description: variable name\n",
+    "        type: str\n",
+    "        default: tp\n",
+    "        allowed: [t2m, ci, gh, lsm, msl, q, rsn, sm100, sm20, sp, sst, st100, st20, t, tcc, tcw, ttr, tp, v, u]\n",
+    "      date:\n",
+    "        description: initialization weekly thursdays\n",
+    "        type: datetime\n",
+    "        default: 2020.01.02\n",
+    "        min: 2020.01.02\n",
+    "        max: 2020.12.31\n",
+    "      version:\n",
+    "        description: versioning of the data\n",
+    "        type: str\n",
+    "        default: 0.3.0\n",
+    "      format:\n",
+    "        description: data type\n",
+    "        type: str\n",
+    "        default: netcdf\n",
+    "        allowed: [netcdf, grib]\n",
+    "      ending:\n",
+    "        description: data format compatible with format; netcdf -> nc, grib -> grib\n",
+    "        type: str\n",
+    "        default: nc\n",
+    "        allowed: [nc, grib]\n",
+    "    xarray_kwargs:\n",
+    "        engine: h5netcdf\n",
+    "    args: # add simplecache:: for caching: https://filesystem-spec.readthedocs.io/en/latest/features.html#caching-files-locally\n",
+    "      urlpath: https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/training-input/{{version}}/{{format}}/{{model}}-hindcast-{{param}}-{{date.strftime(\"%Y%m%d\")}}.{{ending}}\n",
+    "\n",
+    "  test-input:\n",
+    "    description: climetlab name in AI/ML community naming for 2020 forecasts as input to ML model in test period 2020\n",
+    "    driver: netcdf\n",
+    "    parameters:\n",
+    "      model:\n",
+    "        description: name of the S2S model\n",
+    "        type: str\n",
+    "        default: ecmwf\n",
+    "        allowed: [ecmwf, eccc, ncep]\n",
+    "      param:\n",
+    "        description: variable name\n",
+    "        type: str\n",
+    "        default: tp\n",
+    "        allowed: [t2m, ci, gh, lsm, msl, q, rsn, sm100, sm20, sp, sst, st100, st20, t, tcc, tcw, ttr, tp, v, u]\n",
+    "      date:\n",
+    "        description: initialization weekly thursdays\n",
+    "        type: datetime\n",
+    "        default: 2020.01.02\n",
+    "        min: 2020.01.02\n",
+    "        max: 2020.12.31\n",
+    "      version:\n",
+    "        description: versioning of the data\n",
+    "        type: str\n",
+    "        default: 0.3.0\n",
+    "      format:\n",
+    "        description: data type\n",
+    "        type: str\n",
+    "        default: netcdf\n",
+    "        allowed: [netcdf, grib]\n",
+    "      ending:\n",
+    "        description: data format compatible with format; netcdf -> nc, grib -> grib\n",
+    "        type: str\n",
+    "        default: nc\n",
+    "        allowed: [nc, grib]\n",
+    "    xarray_kwargs:\n",
+    "        engine: h5netcdf\n",
+    "    args: # add simplecache:: for caching: https://filesystem-spec.readthedocs.io/en/latest/features.html#caching-files-locally\n",
+    "      urlpath: https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/test-input/{{version}}/{{format}}/{{model}}-forecast-{{param}}-{{date.strftime(\"%Y%m%d\")}}.{{ending}}\n",
+    "\n",
+    "  training-output-reference:\n",
+    "    description: climetlab name in AI/ML community naming for 2020 forecasts as output reference to compare to ML model output to in training period\n",
+    "    driver: netcdf\n",
+    "    parameters:\n",
+    "      param:\n",
+    "        description: variable name\n",
+    "        type: str\n",
+    "        default: tp\n",
+    "        allowed: [t2m, ci, gh, lsm, msl, q, rsn, sm100, sm20, sp, sst, st100, st20, t, tcc, tcw, ttr, tp, v, u]\n",
+    "      date:\n",
+    "        description: initialization weekly thursdays\n",
+    "        type: datetime\n",
+    "        default: 2020.01.02\n",
+    "        min: 2020.01.02\n",
+    "        max: 2020.12.31\n",
+    "    xarray_kwargs:\n",
+    "        engine: h5netcdf\n",
+    "    args: # add simplecache:: for caching: https://filesystem-spec.readthedocs.io/en/latest/features.html#caching-files-locally\n",
+    "      urlpath: https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/test-output-reference/{{param}}-{{date.strftime(\"%Y%m%d\")}}.nc\n",
+    "            \n",
+    "  test-output-reference:\n",
+    "    description: climetlab name in AI/ML community naming for 2020 forecasts as output reference to compare to ML model output to in test period 2020\n",
+    "    driver: netcdf\n",
+    "    parameters:\n",
+    "      param:\n",
+    "        description: variable name\n",
+    "        type: str\n",
+    "        default: tp\n",
+    "        allowed: [t2m, ci, gh, lsm, msl, q, rsn, sm100, sm20, sp, sst, st100, st20, t, tcc, tcw, ttr, tp, v, u]\n",
+    "      date:\n",
+    "        description: initialization weekly thursdays\n",
+    "        type: datetime\n",
+    "        default: 2020.01.02\n",
+    "        min: 2020.01.02\n",
+    "        max: 2020.12.31\n",
+    "    xarray_kwargs:\n",
+    "        engine: h5netcdf\n",
+    "    args: # add simplecache:: for caching: https://filesystem-spec.readthedocs.io/en/latest/features.html#caching-files-locally\n",
+    "      urlpath: https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/test-output-reference/{{param}}-{{date.strftime(\"%Y%m%d\")}}.nc"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cat = intake.open_catalog('EWC_catalog.yml')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "DatetimeIndex(['2020-01-02', '2020-01-09', '2020-01-16', '2020-01-23',\n",
+       "               '2020-01-30', '2020-02-06', '2020-02-13', '2020-02-20',\n",
+       "               '2020-02-27', '2020-03-05', '2020-03-12', '2020-03-19',\n",
+       "               '2020-03-26', '2020-04-02', '2020-04-09', '2020-04-16',\n",
+       "               '2020-04-23', '2020-04-30', '2020-05-07', '2020-05-14',\n",
+       "               '2020-05-21', '2020-05-28', '2020-06-04', '2020-06-11',\n",
+       "               '2020-06-18', '2020-06-25', '2020-07-02', '2020-07-09',\n",
+       "               '2020-07-16', '2020-07-23', '2020-07-30', '2020-08-06',\n",
+       "               '2020-08-13', '2020-08-20', '2020-08-27', '2020-09-03',\n",
+       "               '2020-09-10', '2020-09-17', '2020-09-24', '2020-10-01',\n",
+       "               '2020-10-08', '2020-10-15', '2020-10-22', '2020-10-29',\n",
+       "               '2020-11-05', '2020-11-12', '2020-11-19', '2020-11-26',\n",
+       "               '2020-12-03', '2020-12-10', '2020-12-17', '2020-12-24',\n",
+       "               '2020-12-31'],\n",
+       "              dtype='datetime64[ns]', freq='7D')"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# dates for 2020 forecasts and their on-the-fly reforecasts\n",
+    "dates=pd.date_range(start='2020-01-02',freq='7D',end='2020-12-31')\n",
+    "dates"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# `hindcast-input`\n",
+    "\n",
+    "on-the-fly hindcasts corresponding to the 2020 forecasts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.8/site-packages/xarray/backends/plugins.py:61: RuntimeWarning: Engine 'cfgrib' loading failed:\n",
+      "/opt/conda/lib/python3.8/site-packages/gribapi/_bindings.cpython-38-x86_64-linux-gnu.so: undefined symbol: codes_bufr_key_is_header\n",
+      "  warnings.warn(f\"Engine {name!r} loading failed:\\n{ex}\", RuntimeWarning)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre>&lt;xarray.Dataset&gt;\n",
+       "Dimensions:        (forecast_time: 20, latitude: 121, lead_time: 32, longitude: 240, realization: 4)\n",
+       "Coordinates:\n",
+       "  * realization    (realization) int64 0 1 2 3\n",
+       "  * forecast_time  (forecast_time) datetime64[ns] 1998-03-12 ... 2017-03-12\n",
+       "  * lead_time      (lead_time) timedelta64[ns] 1 days 2 days ... 31 days 32 days\n",
+       "  * latitude       (latitude) float64 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
+       "  * longitude      (longitude) float64 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5\n",
+       "    valid_time     (forecast_time, lead_time) datetime64[ns] ...\n",
+       "Data variables:\n",
+       "    tp             (realization, forecast_time, lead_time, latitude, longitude) float32 ...\n",
+       "Attributes:\n",
+       "    GRIB_edition:            [2]\n",
+       "    GRIB_centre:             cwao\n",
+       "    GRIB_centreDescription:  Canadian Meteorological Service - Montreal \n",
+       "    GRIB_subCentre:          [0]\n",
+       "    Conventions:             CF-1.7\n",
+       "    institution:             Canadian Meteorological Service - Montreal \n",
+       "    history:                 2021-05-11T10:03 GRIB to CDM+CF via cfgrib-0.9.9...</pre>"
+      ],
+      "text/plain": [
+       "<xarray.Dataset>\n",
+       "Dimensions:        (forecast_time: 20, latitude: 121, lead_time: 32, longitude: 240, realization: 4)\n",
+       "Coordinates:\n",
+       "  * realization    (realization) int64 0 1 2 3\n",
+       "  * forecast_time  (forecast_time) datetime64[ns] 1998-03-12 ... 2017-03-12\n",
+       "  * lead_time      (lead_time) timedelta64[ns] 1 days 2 days ... 31 days 32 days\n",
+       "  * latitude       (latitude) float64 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
+       "  * longitude      (longitude) float64 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5\n",
+       "    valid_time     (forecast_time, lead_time) datetime64[ns] ...\n",
+       "Data variables:\n",
+       "    tp             (realization, forecast_time, lead_time, latitude, longitude) float32 ...\n",
+       "Attributes:\n",
+       "    GRIB_edition:            [2]\n",
+       "    GRIB_centre:             cwao\n",
+       "    GRIB_centreDescription:  Canadian Meteorological Service - Montreal \n",
+       "    GRIB_subCentre:          [0]\n",
+       "    Conventions:             CF-1.7\n",
+       "    institution:             Canadian Meteorological Service - Montreal \n",
+       "    history:                 2021-05-11T10:03 GRIB to CDM+CF via cfgrib-0.9.9..."
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cat['training-input'](date=dates[10], param='tp', model='eccc').to_dask()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# `forecast-input`\n",
+    "\n",
+    "2020"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre>&lt;xarray.Dataset&gt;\n",
+       "Dimensions:        (forecast_time: 1, latitude: 121, lead_time: 46, longitude: 240, realization: 51)\n",
+       "Coordinates:\n",
+       "  * realization    (realization) int64 0 1 2 3 4 5 6 7 ... 44 45 46 47 48 49 50\n",
+       "  * forecast_time  (forecast_time) datetime64[ns] 2020-03-12\n",
+       "  * lead_time      (lead_time) timedelta64[ns] 1 days 2 days ... 45 days 46 days\n",
+       "  * latitude       (latitude) float64 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
+       "  * longitude      (longitude) float64 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5\n",
+       "    valid_time     (forecast_time, lead_time) datetime64[ns] ...\n",
+       "Data variables:\n",
+       "    t2m            (realization, forecast_time, lead_time, latitude, longitude) float32 ...\n",
+       "Attributes:\n",
+       "    GRIB_edition:            [2]\n",
+       "    GRIB_centre:             ecmf\n",
+       "    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts\n",
+       "    GRIB_subCentre:          [0]\n",
+       "    Conventions:             CF-1.7\n",
+       "    institution:             European Centre for Medium-Range Weather Forecasts\n",
+       "    history:                 2021-05-10T16:14:36 GRIB to CDM+CF via cfgrib-0....</pre>"
+      ],
+      "text/plain": [
+       "<xarray.Dataset>\n",
+       "Dimensions:        (forecast_time: 1, latitude: 121, lead_time: 46, longitude: 240, realization: 51)\n",
+       "Coordinates:\n",
+       "  * realization    (realization) int64 0 1 2 3 4 5 6 7 ... 44 45 46 47 48 49 50\n",
+       "  * forecast_time  (forecast_time) datetime64[ns] 2020-03-12\n",
+       "  * lead_time      (lead_time) timedelta64[ns] 1 days 2 days ... 45 days 46 days\n",
+       "  * latitude       (latitude) float64 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
+       "  * longitude      (longitude) float64 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5\n",
+       "    valid_time     (forecast_time, lead_time) datetime64[ns] ...\n",
+       "Data variables:\n",
+       "    t2m            (realization, forecast_time, lead_time, latitude, longitude) float32 ...\n",
+       "Attributes:\n",
+       "    GRIB_edition:            [2]\n",
+       "    GRIB_centre:             ecmf\n",
+       "    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts\n",
+       "    GRIB_subCentre:          [0]\n",
+       "    Conventions:             CF-1.7\n",
+       "    institution:             European Centre for Medium-Range Weather Forecasts\n",
+       "    history:                 2021-05-10T16:14:36 GRIB to CDM+CF via cfgrib-0...."
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cat['test-input'](date=dates[10], param='t2m', model='ecmwf').to_dask()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# `hindcast-like-observations`\n",
+    "\n",
+    "observations matching hindcasts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre>&lt;xarray.Dataset&gt;\n",
+       "Dimensions:        (forecast_time: 1, latitude: 121, lead_time: 47, longitude: 240)\n",
+       "Coordinates:\n",
+       "    valid_time     (lead_time, forecast_time) datetime64[ns] ...\n",
+       "  * latitude       (latitude) float64 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
+       "  * longitude      (longitude) float64 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5\n",
+       "  * forecast_time  (forecast_time) datetime64[ns] 2020-03-12\n",
+       "  * lead_time      (lead_time) timedelta64[ns] 0 days 1 days ... 45 days 46 days\n",
+       "Data variables:\n",
+       "    t2m            (lead_time, forecast_time, latitude, longitude) float32 ...\n",
+       "Attributes:\n",
+       "    source_dataset_name:  temperature daily from NOAA NCEP CPC: Climate Predi...\n",
+       "    source_hosting:       IRIDL\n",
+       "    source_url:           http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP/...\n",
+       "    created_by_software:  climetlab-s2s-ai-challenge\n",
+       "    created_by_script:    tools/observations/makefile</pre>"
+      ],
+      "text/plain": [
+       "<xarray.Dataset>\n",
+       "Dimensions:        (forecast_time: 1, latitude: 121, lead_time: 47, longitude: 240)\n",
+       "Coordinates:\n",
+       "    valid_time     (lead_time, forecast_time) datetime64[ns] ...\n",
+       "  * latitude       (latitude) float64 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
+       "  * longitude      (longitude) float64 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5\n",
+       "  * forecast_time  (forecast_time) datetime64[ns] 2020-03-12\n",
+       "  * lead_time      (lead_time) timedelta64[ns] 0 days 1 days ... 45 days 46 days\n",
+       "Data variables:\n",
+       "    t2m            (lead_time, forecast_time, latitude, longitude) float32 ...\n",
+       "Attributes:\n",
+       "    source_dataset_name:  temperature daily from NOAA NCEP CPC: Climate Predi...\n",
+       "    source_hosting:       IRIDL\n",
+       "    source_url:           http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP/...\n",
+       "    created_by_software:  climetlab-s2s-ai-challenge\n",
+       "    created_by_script:    tools/observations/makefile"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cat['training-output-reference'](date=dates[10], param='t2m').to_dask()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# `forecast-like-observations`\n",
+    "\n",
+    "observations matching 2020 forecasts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre>&lt;xarray.Dataset&gt;\n",
+       "Dimensions:        (forecast_time: 1, latitude: 121, lead_time: 47, longitude: 240)\n",
+       "Coordinates:\n",
+       "    valid_time     (lead_time, forecast_time) datetime64[ns] ...\n",
+       "  * latitude       (latitude) float64 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
+       "  * longitude      (longitude) float64 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5\n",
+       "  * forecast_time  (forecast_time) datetime64[ns] 2020-03-12\n",
+       "  * lead_time      (lead_time) timedelta64[ns] 0 days 1 days ... 45 days 46 days\n",
+       "Data variables:\n",
+       "    t2m            (lead_time, forecast_time, latitude, longitude) float32 ...\n",
+       "Attributes:\n",
+       "    source_dataset_name:  temperature daily from NOAA NCEP CPC: Climate Predi...\n",
+       "    source_hosting:       IRIDL\n",
+       "    source_url:           http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP/...\n",
+       "    created_by_software:  climetlab-s2s-ai-challenge\n",
+       "    created_by_script:    tools/observations/makefile</pre>"
+      ],
+      "text/plain": [
+       "<xarray.Dataset>\n",
+       "Dimensions:        (forecast_time: 1, latitude: 121, lead_time: 47, longitude: 240)\n",
+       "Coordinates:\n",
+       "    valid_time     (lead_time, forecast_time) datetime64[ns] ...\n",
+       "  * latitude       (latitude) float64 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
+       "  * longitude      (longitude) float64 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5\n",
+       "  * forecast_time  (forecast_time) datetime64[ns] 2020-03-12\n",
+       "  * lead_time      (lead_time) timedelta64[ns] 0 days 1 days ... 45 days 46 days\n",
+       "Data variables:\n",
+       "    t2m            (lead_time, forecast_time, latitude, longitude) float32 ...\n",
+       "Attributes:\n",
+       "    source_dataset_name:  temperature daily from NOAA NCEP CPC: Climate Predi...\n",
+       "    source_hosting:       IRIDL\n",
+       "    source_url:           http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP/...\n",
+       "    created_by_software:  climetlab-s2s-ai-challenge\n",
+       "    created_by_script:    tools/observations/makefile"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cat['test-output-reference'](date=dates[10], param='t2m').to_dask()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/data_access/wget_curl.ipynb b/notebooks/data_access/wget_curl.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..0f9e43ede3251f64ce112eb150cb7f33ad057b62
--- /dev/null
+++ b/notebooks/data_access/wget_curl.ipynb
@@ -0,0 +1,186 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Data Access via `curl` or `wget`\n",
+    "\n",
+    "Data easily available via `climetlab`: https://github.com/ecmwf-lab/climetlab-s2s-ai-challenge\n",
+    "\n",
+    "Data holdings listed:\n",
+    "\n",
+    "- https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/test-input/0.3.0/netcdf/index.html\n",
+    "- https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/training-input/0.3.0/netcdf/index.html\n",
+    "- https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/test-output-reference/index.html\n",
+    "- https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/training-output-reference/index.html\n",
+    "\n",
+    "Therefore, S3 data also accessible with `curl` or `wget`. Alternatively, you can click on the html links and download files by mouse click."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.8/site-packages/xarray/backends/cfgrib_.py:27: UserWarning: Failed to load cfgrib - most likely there is a problem accessing the ecCodes library. Try `import cfgrib` to get the full error message\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<xarray.core.options.set_options at 0x7f5170570520>"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import xarray as xr\n",
+    "import os\n",
+    "from subprocess import call\n",
+    "xr.set_options(display_style='text')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# version of the EWC data\n",
+    "version = '0.3.0'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# `hindcast-input`\n",
+    "\n",
+    "on-the-fly hindcasts corresponding to the 2020 forecasts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parameter = 't2m'\n",
+    "date = '20200102'\n",
+    "model = 'ecmwf'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = f'https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/training-input/{version}/netcdf/{model}-hindcast-{parameter}-{date}.nc'\n",
+    "os.system(f'wget {url}')\n",
+    "\n",
+    "assert os.path.exists(f'{model}-hindcast-{parameter}-{date}.nc')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# `forecast-input`\n",
+    "\n",
+    "2020"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = f'https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/test-input/{version}/netcdf/{model}-forecast-{parameter}-{date}.nc'\n",
+    "os.system(f'wget {url}')\n",
+    "\n",
+    "assert os.path.exists(f'{model}-forecast-{parameter}-{date}.nc')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# `hindcast-like-observations`\n",
+    "\n",
+    "CPC observations formatted like training period hindcasts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = f'https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/training-output-reference/{parameter}-{date}.nc'\n",
+    "os.system(f'wget {url}')\n",
+    "\n",
+    "assert os.path.exists(f'{parameter}-{date}.nc')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# `forecast-like-observations`\n",
+    "\n",
+    "CPC observations formatted like test period 2020 forecasts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = f'https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/test-output-reference/{parameter}-{date}.nc'\n",
+    "os.system(f'wget {url}')\n",
+    "\n",
+    "assert os.path.exists(f'{parameter}-{date}.nc')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}