diff --git a/scoring/scoring_script.py b/scoring/scoring_script.py
index 1306d14dd632fe4c774e160cd3159b0c0da59559..ad8bfb10e22940645d4bd2ca17de4a22e3c44236 100644
--- a/scoring/scoring_script.py
+++ b/scoring/scoring_script.py
@@ -15,27 +15,28 @@ if __name__ == "__main__":
     observations_terciled = xr.open_dataset(observations_terciled_fin, engine='zarr').sel(forecast_reference_time='2020')
     a=observations_terciled['2t'].sel(time=(observations_terciled.tp.forecast_reference_time + observations_terciled.tp.lead_time[0]))
     b=observations_terciled['2t'].sel(time=(observations_terciled.tp.forecast_reference_time + observations_terciled.tp.lead_time[1]))
-    observations_terciled['2t']=xr.concat([a.drop('time'),b.drop('time')],'lead_time')
+    observations_terciled['2t']=xr.concat([a.drop('time'),b.drop('time')],'lead_time').rename({'category_edge':'category'})
     observations_terciled = observations_terciled.drop('time')
 
     benchmark_forecasts_terciled = xr.open_dataset(benchmark_forecasts_terciled_fin, chunks={})
-    benchmark_forecasts_terciled = benchmark_forecasts_terciled.rename({'category':'category_edge'})
+    # benchmark_forecasts_terciled = benchmark_forecasts_terciled.rename({'category':'category_edge'})
 
     ML_predictions_terciled = xr.open_dataset(args.prediction)
     ML_predictions_terciled = xr.concat([
             ML_predictions_terciled.isel(step=[0,1]).mean('step'),
             ML_predictions_terciled.isel(step=[2,3]).mean('step')
         ], 'lead_time').assign_coords(lead_time=observations_terciled.lead_time)
-    ML_predictions_terciled = ML_predictions_terciled.assign_coords(category_edge=observations_terciled.category_edge)
+    ML_predictions_terciled = ML_predictions_terciled.assign_coords(category=observations_terciled.category)
 
     for v in ['2t', 'tp']:
-        assert v in ML_predictions_terciled.data_vars
+        if v not in ML_predictions_terciled.data_vars:
+            raise ValueError(f'Expected both variables in ML_predictions, didnt find {v} in {ML_predictions_terciled.data_vars}')
 
     # same number of dimensions
     assert set(ML_predictions_terciled.dims) == set(benchmark_forecasts_terciled.dims)
 
-    rps_ML = xs.rps(observations_terciled, ML_predictions_terciled, category_edges=None, dim=['forecast_reference_time']).compute()
-    rps_benchmark = xs.rps(observations_terciled, benchmark_forecasts_terciled, category_edges=None, dim=['forecast_reference_time']).compute()
+    rps_ML = xs.rps(observations_terciled, ML_predictions_terciled, category_edges=None, dim=['forecast_reference_time'], input_distributions='c').compute()
+    rps_benchmark = xs.rps(observations_terciled, benchmark_forecasts_terciled, category_edges=None, dim=['forecast_reference_time'], input_distributions='c').compute()
     rpss = 1 - rps_ML / rps_benchmark # positive means ML better than ECMWF benchmark
 
     # check for -inf grid cells