From 34d00106a413fe97e695bc32db61f258f148e8f5 Mon Sep 17 00:00:00 2001 From: Aaron Spring <aaron.spring@mpimet.mpg.de> Date: Wed, 2 Jun 2021 16:13:32 +0000 Subject: [PATCH] Auto-saving for aaron.spring on branch master from commit a9710f0 --- notebooks/ML_train_and_predict.ipynb | 241 ++++++++++++++++++--------- notebooks/WeatherBench | 1 + 2 files changed, 167 insertions(+), 75 deletions(-) create mode 160000 notebooks/WeatherBench diff --git a/notebooks/ML_train_and_predict.ipynb b/notebooks/ML_train_and_predict.ipynb index 9c23068..6d4c129 100644 --- a/notebooks/ML_train_and_predict.ipynb +++ b/notebooks/ML_train_and_predict.ipynb @@ -126,7 +126,16 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.8/site-packages/xarray/backends/cfgrib_.py:27: UserWarning: Failed to load cfgrib - most likely there is a problem accessing the ecCodes library. Try `import cfgrib` to get the full error message\n", + " warnings.warn(\n" + ] + } + ], "source": [ "from tensorflow.keras.layers import Input, Dense, Flatten\n", "from tensorflow.keras.models import Sequential\n", @@ -170,9 +179,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33m\u001b[1mWarning: \u001b[0mRun CLI commands only from project's root directory.\n", + "\u001b[0m\n" + ] + } + ], "source": [ "# preprocessed as renku dataset\n", "!renku storage pull ../data/ecmwf_hindcast-input_2000-2019_biweekly_deterministic.zarr" @@ -180,18 +198,37 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.8/site-packages/xarray/backends/plugins.py:61: RuntimeWarning: Engine 'cfgrib' loading failed:\n", + "/opt/conda/lib/python3.8/site-packages/gribapi/_bindings.cpython-38-x86_64-linux-gnu.so: undefined symbol: codes_bufr_key_is_header\n", + " warnings.warn(f\"Engine {name!r} loading failed:\\n{ex}\", RuntimeWarning)\n" + ] + } + ], "source": [ "hind_2000_2019 = xr.open_zarr(\"../data/ecmwf_hindcast-input_2000-2019_biweekly_deterministic.zarr\", consolidated=True)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33m\u001b[1mWarning: \u001b[0mRun CLI commands only from project's root directory.\n", + "\u001b[0m\n" + ] + } + ], "source": [ "# preprocessed as renku dataset\n", "!renku storage pull ../data/ecmwf_forecast-input_2020_biweekly_deterministic.zarr" @@ -199,7 +236,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -216,9 +253,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33m\u001b[1mWarning: \u001b[0mRun CLI commands only from project's root directory.\n", + "\u001b[0m\n" + ] + } + ], "source": [ "# preprocessed as renku dataset\n", "!renku storage pull ../data/hindcast-like-observations_2000-2019_biweekly_deterministic.zarr" @@ -226,7 +272,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -235,9 +281,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33m\u001b[1mWarning: \u001b[0mRun CLI commands only from project's root directory.\n", + "\u001b[0m\n" + ] + } + ], "source": [ "# preprocessed as renku dataset\n", "!renku storage pull ../data/forecast-like-observations_2020_biweekly_deterministic.zarr" @@ -245,7 +300,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -268,9 +323,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cloning into 'WeatherBench'...\n", + "remote: Enumerating objects: 718, done.\u001b[K\n", + "remote: Counting objects: 100% (3/3), done.\u001b[K\n", + "remote: Compressing objects: 100% (3/3), done.\u001b[K\n", + "remote: Total 718 (delta 0), reused 0 (delta 0), pack-reused 715\u001b[K\n", + "Receiving objects: 100% (718/718), 17.77 MiB | 28.08 MiB/s, done.\n", + "Resolving deltas: 100% (424/424), done.\n" + ] + } + ], "source": [ "# run once only and dont commit\n", "!git clone https://github.com/pangeo-data/WeatherBench/" @@ -278,7 +347,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -290,7 +359,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -384,7 +453,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -406,7 +475,7 @@ " comment: lead_time describes bi-weekly aggregates. The pd.Timedelta corr..." ] }, - "execution_count": 9, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -420,7 +489,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -439,7 +508,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -451,22 +520,22 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/work/mh0727/m300524/conda-envs/s2s-ai/lib/python3.7/site-packages/dask/array/numpy_compat.py:40: RuntimeWarning: invalid value encountered in true_divide\n", + "/opt/conda/lib/python3.8/site-packages/dask/array/numpy_compat.py:39: RuntimeWarning: invalid value encountered in true_divide\n", " x = np.divide(x1, x2, out)\n", - "/work/mh0727/m300524/conda-envs/s2s-ai/lib/python3.7/site-packages/dask/array/numpy_compat.py:40: RuntimeWarning: invalid value encountered in true_divide\n", + "/opt/conda/lib/python3.8/site-packages/dask/array/numpy_compat.py:39: RuntimeWarning: invalid value encountered in true_divide\n", " x = np.divide(x1, x2, out)\n", - "/work/mh0727/m300524/conda-envs/s2s-ai/lib/python3.7/site-packages/dask/array/numpy_compat.py:40: RuntimeWarning: invalid value encountered in true_divide\n", + "/opt/conda/lib/python3.8/site-packages/dask/array/numpy_compat.py:39: RuntimeWarning: invalid value encountered in true_divide\n", " x = np.divide(x1, x2, out)\n", - "/work/mh0727/m300524/conda-envs/s2s-ai/lib/python3.7/site-packages/dask/array/numpy_compat.py:40: RuntimeWarning: invalid value encountered in true_divide\n", + "/opt/conda/lib/python3.8/site-packages/dask/array/numpy_compat.py:39: RuntimeWarning: invalid value encountered in true_divide\n", " x = np.divide(x1, x2, out)\n", - "/work/mh0727/m300524/conda-envs/s2s-ai/lib/python3.7/site-packages/dask/array/numpy_compat.py:40: RuntimeWarning: invalid value encountered in true_divide\n", + "/opt/conda/lib/python3.8/site-packages/dask/array/numpy_compat.py:39: RuntimeWarning: invalid value encountered in true_divide\n", " x = np.divide(x1, x2, out)\n" ] } @@ -480,22 +549,22 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/work/mh0727/m300524/conda-envs/s2s-ai/lib/python3.7/site-packages/dask/array/numpy_compat.py:40: RuntimeWarning: invalid value encountered in true_divide\n", + "/opt/conda/lib/python3.8/site-packages/dask/array/numpy_compat.py:39: RuntimeWarning: invalid value encountered in true_divide\n", " x = np.divide(x1, x2, out)\n", - "/work/mh0727/m300524/conda-envs/s2s-ai/lib/python3.7/site-packages/dask/array/numpy_compat.py:40: RuntimeWarning: invalid value encountered in true_divide\n", + "/opt/conda/lib/python3.8/site-packages/dask/array/numpy_compat.py:39: RuntimeWarning: invalid value encountered in true_divide\n", " x = np.divide(x1, x2, out)\n", - "/work/mh0727/m300524/conda-envs/s2s-ai/lib/python3.7/site-packages/dask/array/numpy_compat.py:40: RuntimeWarning: invalid value encountered in true_divide\n", + "/opt/conda/lib/python3.8/site-packages/dask/array/numpy_compat.py:39: RuntimeWarning: invalid value encountered in true_divide\n", " x = np.divide(x1, x2, out)\n", - "/work/mh0727/m300524/conda-envs/s2s-ai/lib/python3.7/site-packages/dask/array/numpy_compat.py:40: RuntimeWarning: invalid value encountered in true_divide\n", + "/opt/conda/lib/python3.8/site-packages/dask/array/numpy_compat.py:39: RuntimeWarning: invalid value encountered in true_divide\n", " x = np.divide(x1, x2, out)\n", - "/work/mh0727/m300524/conda-envs/s2s-ai/lib/python3.7/site-packages/dask/array/numpy_compat.py:40: RuntimeWarning: invalid value encountered in true_divide\n", + "/opt/conda/lib/python3.8/site-packages/dask/array/numpy_compat.py:39: RuntimeWarning: invalid value encountered in true_divide\n", " x = np.divide(x1, x2, out)\n" ] } @@ -509,7 +578,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -522,7 +591,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -531,7 +600,7 @@ "((32, 121, 240), (32, 121, 240))" ] }, - "execution_count": 15, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -543,16 +612,16 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "<matplotlib.collections.QuadMesh at 0x2b55b159ba90>" + "<matplotlib.collections.QuadMesh at 0x7f6bee949160>" ] }, - "execution_count": 16, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" }, @@ -585,18 +654,18 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "WARNING:tensorflow:AutoGraph could not transform <bound method PeriodicPadding2D.call of <WeatherBench.src.train_nn.PeriodicPadding2D object at 0x2b5766ccf450>> and will run it as-is.\n", + "WARNING:tensorflow:AutoGraph could not transform <bound method PeriodicPadding2D.call of <WeatherBench.src.train_nn.PeriodicPadding2D object at 0x7f6beeaa3460>> and will run it as-is.\n", "Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n", "Cause: module 'gast' has no attribute 'Index'\n", "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n", - "WARNING: AutoGraph could not transform <bound method PeriodicPadding2D.call of <WeatherBench.src.train_nn.PeriodicPadding2D object at 0x2b5766ccf450>> and will run it as-is.\n", + "WARNING: AutoGraph could not transform <bound method PeriodicPadding2D.call of <WeatherBench.src.train_nn.PeriodicPadding2D object at 0x7f6beeaa3460>> and will run it as-is.\n", "Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n", "Cause: module 'gast' has no attribute 'Index'\n", "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n" @@ -612,7 +681,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -640,7 +709,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -649,7 +718,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -659,34 +728,29 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Epoch 1/3\n", - "30/30 [==============================] - 16s 505ms/step - loss: 0.1810 - val_loss: 0.0963\n", - "Epoch 2/3\n", - "30/30 [==============================] - 15s 495ms/step - loss: 0.0917 - val_loss: 0.0657\n", - "Epoch 3/3\n", - "30/30 [==============================] - 14s 462ms/step - loss: 0.0663 - val_loss: 0.0601\n" + "30/30 [==============================] - 22s 641ms/step - loss: 0.2095 - val_loss: 0.1098\n" ] }, { "data": { "text/plain": [ - "<tensorflow.python.keras.callbacks.History at 0x2b55b1642e90>" + "<tensorflow.python.keras.callbacks.History at 0x7f6bee9eeeb0>" ] }, - "execution_count": 21, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "cnn.fit(dg_train, epochs=3, validation_data=dg_valid)" + "cnn.fit(dg_train, epochs=1, validation_data=dg_valid)" ] }, { @@ -700,7 +764,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -732,7 +796,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -742,7 +806,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -751,16 +815,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33m\u001b[1mWarning: \u001b[0mRun CLI commands only from project's root directory.\n", + "\u001b[0m\n" + ] + } + ], "source": [ "!renku storage pull ../data/hindcast-like-observations_2000-2019_biweekly_tercile-edges.nc" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -771,7 +844,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -806,25 +879,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33m\u001b[1mWarning: \u001b[0mRun CLI commands only from project's root directory.\n", + "\u001b[0m\n" + ] + } + ], "source": [ "!renku storage pull ../data/forecast-like-observations_2020_biweekly_terciled.nc" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33m\u001b[1mWarning: \u001b[0mRun CLI commands only from project's root directory.\n", + "\u001b[0m\n" + ] + } + ], "source": [ "!renku storage pull ../data/hindcast-like-observations_2000-2019_biweekly_terciled.zarr" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ @@ -838,10 +929,10 @@ "outputs": [], "source": [ "#on renku with small memory\n", - "#step = 3\n", - "#for year in np.arange(int(time_train_start), int(time_train_end) -1, step): # loop over years to consume less memory on renku\n", - "# preds_is = create_predictions(cnn, hind_2000_2019, obs_2000_2019, time=slice(str(year), str(year+step-1))).compute()\n", - "# print(skill_by_year(preds_is))" + "step = 3\n", + "for year in np.arange(int(time_train_start), int(time_train_end) -1, step): # loop over years to consume less memory on renku\n", + " preds_is = create_predictions(cnn, hind_2000_2019, obs_2000_2019, time=slice(str(year), str(year+step-1))).compute()\n", + " print(skill_by_year(preds_is))" ] }, { @@ -1620,9 +1711,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:s2s-ai]", + "display_name": "Python 3", "language": "python", - "name": "conda-env-s2s-ai-py" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1634,7 +1725,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.10" + "version": "3.8.6" }, "toc-autonumbering": true }, diff --git a/notebooks/WeatherBench b/notebooks/WeatherBench new file mode 160000 index 0000000..11cfbff --- /dev/null +++ b/notebooks/WeatherBench @@ -0,0 +1 @@ +Subproject commit 11cfbffd92a3413be561ac328dcb13ac712a25c2 -- GitLab