diff --git a/scoring/scoring_script.py b/scoring/scoring_script.py index 1306d14dd632fe4c774e160cd3159b0c0da59559..ad8bfb10e22940645d4bd2ca17de4a22e3c44236 100644 --- a/scoring/scoring_script.py +++ b/scoring/scoring_script.py @@ -15,27 +15,28 @@ if __name__ == "__main__": observations_terciled = xr.open_dataset(observations_terciled_fin, engine='zarr').sel(forecast_reference_time='2020') a=observations_terciled['2t'].sel(time=(observations_terciled.tp.forecast_reference_time + observations_terciled.tp.lead_time[0])) b=observations_terciled['2t'].sel(time=(observations_terciled.tp.forecast_reference_time + observations_terciled.tp.lead_time[1])) - observations_terciled['2t']=xr.concat([a.drop('time'),b.drop('time')],'lead_time') + observations_terciled['2t']=xr.concat([a.drop('time'),b.drop('time')],'lead_time').rename({'category_edge':'category'}) observations_terciled = observations_terciled.drop('time') benchmark_forecasts_terciled = xr.open_dataset(benchmark_forecasts_terciled_fin, chunks={}) - benchmark_forecasts_terciled = benchmark_forecasts_terciled.rename({'category':'category_edge'}) + # benchmark_forecasts_terciled = benchmark_forecasts_terciled.rename({'category':'category_edge'}) ML_predictions_terciled = xr.open_dataset(args.prediction) ML_predictions_terciled = xr.concat([ ML_predictions_terciled.isel(step=[0,1]).mean('step'), ML_predictions_terciled.isel(step=[2,3]).mean('step') ], 'lead_time').assign_coords(lead_time=observations_terciled.lead_time) - ML_predictions_terciled = ML_predictions_terciled.assign_coords(category_edge=observations_terciled.category_edge) + ML_predictions_terciled = ML_predictions_terciled.assign_coords(category=observations_terciled.category) for v in ['2t', 'tp']: - assert v in ML_predictions_terciled.data_vars + if v not in ML_predictions_terciled.data_vars: + raise ValueError(f'Expected both variables in ML_predictions, didnt find {v} in {ML_predictions_terciled.data_vars}') # same number of dimensions assert set(ML_predictions_terciled.dims) == set(benchmark_forecasts_terciled.dims) - rps_ML = xs.rps(observations_terciled, ML_predictions_terciled, category_edges=None, dim=['forecast_reference_time']).compute() - rps_benchmark = xs.rps(observations_terciled, benchmark_forecasts_terciled, category_edges=None, dim=['forecast_reference_time']).compute() + rps_ML = xs.rps(observations_terciled, ML_predictions_terciled, category_edges=None, dim=['forecast_reference_time'], input_distributions='c').compute() + rps_benchmark = xs.rps(observations_terciled, benchmark_forecasts_terciled, category_edges=None, dim=['forecast_reference_time'], input_distributions='c').compute() rpss = 1 - rps_ML / rps_benchmark # positive means ML better than ECMWF benchmark # check for -inf grid cells