Update scoring_script.py

4afed1df · Aaron Spring · ac035bcb · 4afed1df
Commit 4afed1df authored 3 years ago by Aaron Spring
--- a/scoring/scoring_script.py
+++ b/scoring/scoring_script.py
@@ -13,20 +13,19 @@ if __name__ == "__main__":
    cache_path = "scoring"

    observations_terciled_fin = Path(f'{cache_path}/forecast-like-observations_2020_biweekly_terciled.nc')
-    benchmark_forecasts_terciled_fin = Path(f"{cache_path}/ecmwf_recalibrated_benchmark_2020_biweekly_terciled.nc")

    obs_p = xr.open_dataset(observations_terciled_fin)

    fct_p = xr.open_dataset(args.prediction)

-    bench_p = xr.open_dataset(benchmark_forecasts_terciled_fin)
-
+    clim_p = xr.DataArray([1/3, 1/3, 1/3], dims='category', coords={'category':['below normal', 'near normal', 'above normal']}).to_dataset(name='tp')
+    clim_p['t2m'] = clim_p['tp']

    rps_ML = xs.rps(obs_p, fct_p, category_edges=None, dim='forecast_time', input_distributions='p').compute()

-    rps_bench = xs.rps(obs_p, bench_p, category_edges=None, dim='forecast_time', input_distributions='p').compute()
+    rps_clim = xs.rps(obs_p, clim_p, category_edges=None, dim='forecast_time', input_distributions='p').compute()

-    rpss = (1 - rps_ML / rps_bench)
+    rpss = (1 - rps_ML / rps_clim)

    # check for -inf grid cells
    if (rpss==-np.inf).to_array().any():
@@ -35,7 +34,7 @@ if __name__ == "__main__":
        # dirty fix
        rpss = rpss.clip(-1, 1)

-    # what to do with requested grid cells where NaN is submitted? also penalize
+    # what to do with requested grid cells where NaN is submitted? also penalize; todo: see https://renkulab.io/gitlab/aaron.spring/s2s-ai-challenge-template/-/issues/7
    mask = xr.ones_like(rpss.isel(lead_time=0, drop=True)).reset_coords(drop=True).t2m
    boundary_tropics = 30
    mask = xr.concat([mask.where(mask.latitude > boundary_tropics),
@@ -48,13 +47,8 @@ if __name__ == "__main__":

    # weighted area mean
    weights = np.cos(np.deg2rad(np.abs(mask.latitude)))
-    scores = (rpss*mask).weighted(weights).mean('latitude').mean('longitude')
-    pd_scores = scores.reset_coords(drop=True).to_dataframe().unstack(0).round(2)
-
-    # final score
-    scores = rpss.weighted(weights).mean('latitude').mean('longitude')
    # spatially weighted score averaged over lead_times and variables to one single value
-
-    # score transfered to leaderboard
+    scores = (rpss * mask).weighted(weights).mean('latitude').mean('longitude')
    scores = scores.to_array().mean().reset_coords(drop=True)
+    # score transfered to leaderboard
    print(scores.item())