Skip to content
Snippets Groups Projects
Commit 5f959df1 authored by Tasko Olevski's avatar Tasko Olevski
Browse files

add scoring code

parent 224242b6
No related branches found
No related tags found
No related merge requests found
import xarray as xr
import xskillscore as xs
import numpy as np
import argparse
from pathlib import Path
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("prediction", help="The netcdf file with predictions")
args = parser.parse_args()
observations_terciled_fin = Path('scoring/terciled_observations.zarr')
benchmark_forecasts_terciled_fin = Path("scoring/ECMWF_rt_2020_cdf_terciled.nc")
observations_terciled = xr.open_dataset(observations_terciled_fin, engine='zarr').sel(forecast_reference_time='2020')
a=observations_terciled['2t'].sel(time=(observations_terciled.tp.forecast_reference_time + observations_terciled.tp.lead_time[0]))
b=observations_terciled['2t'].sel(time=(observations_terciled.tp.forecast_reference_time + observations_terciled.tp.lead_time[1]))
observations_terciled['2t']=xr.concat([a.drop('time'),b.drop('time')],'lead_time')
observations_terciled = observations_terciled.drop('time')
benchmark_forecasts_terciled = xr.open_dataset(benchmark_forecasts_terciled_fin, chunks={})
benchmark_forecasts_terciled = benchmark_forecasts_terciled.rename({'category':'category_edge'})
ML_predictions_terciled = xr.open_dataset(args.prediction)
ML_predictions_terciled = xr.concat([
ML_predictions_terciled.isel(step=[0,1]).mean('step'),
ML_predictions_terciled.isel(step=[2,3]).mean('step')
], 'lead_time').assign_coords(lead_time=observations_terciled.lead_time)
ML_predictions_terciled = ML_predictions_terciled.assign_coords(category_edge=observations_terciled.category_edge)
for v in ['2t', 'tp']:
assert v in ML_predictions_terciled.data_vars
# same number of dimensions
assert set(ML_predictions_terciled.dims) == set(benchmark_forecasts_terciled.dims)
rps_ML = xs.rps(observations_terciled, ML_predictions_terciled, category_edges=None, dim=['forecast_reference_time']).compute()
rps_benchmark = xs.rps(observations_terciled, benchmark_forecasts_terciled, category_edges=None, dim=['forecast_reference_time']).compute()
rpss = 1 - rps_ML / rps_benchmark # positive means ML better than ECMWF benchmark
# check for -inf grid cells
if (rpss==-np.inf).to_array().any():
(rpss == rpss.min()).sum()
# dirty fix
rpss = np.clip(rpss,-1,1)
mask = xr.ones_like(rpss.isel(lead_time=0,drop=True)).reset_coords(drop=True).tp
boundary_tropics=30
mask = xr.concat([mask.where(mask.latitude > boundary_tropics),
mask.where(np.abs(mask.latitude) <= boundary_tropics),
mask.where((mask.latitude < -boundary_tropics) & (mask.latitude > -60))],'area')
mask = mask.assign_coords(area=['northern_extratropics', 'tropics', 'southern_extratropics'])
mask.name='area'
mask = mask.where(rpss[v].isel(lead_time=0,drop=True).notnull())
# weighted area mean
weights = np.cos(np.deg2rad(np.abs(mask.latitude)))
scores = (rpss*mask).weighted(weights).mean('latitude').mean('longitude')
pd_scores = scores.reset_coords(drop=True).to_dataframe().unstack(0).round(2)
# final score
scores = rpss.weighted(weights).mean('latitude').mean('longitude')
# spatially weighted score averaged over lead_times and variables to one single value
scores.to_array().mean().reset_coords(drop=True)
print(scores)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment