diff --git a/notebooks/damien_specq_scripts/ML_train_and_predict.R b/notebooks/damien_specq_scripts/ML_train_and_predict.R new file mode 100644 index 0000000000000000000000000000000000000000..d71f88d61e2e2385c25b4941e681f07771c4397d --- /dev/null +++ b/notebooks/damien_specq_scripts/ML_train_and_predict.R @@ -0,0 +1,285 @@ +#### Statistical-dynamical forecasting method inspired from: +#### Specq, D. and L. Batté (2020). Improving subseasonal precipitation forecasts through a statistical–dynamical approach : application to the southwest tropical Pacific +#### No artificial intelligence method was used +#### The method only uses ECMWF hindcasts for t2m and tp and their corresponding observations for training +#### It is directly applied to the ECMWF real-time forecasts + +library(R.utils) +library(lubridate) ### Package for dates management +library(s2dverification) ### Use of the InsertDim function + +### + +file_pred=nc_open("submissions/ML_prediction_2020.nc",write=T) ### Open example NetCDF submission file as a template to overwrite + +### Longitude, latitude and number of points + +lon=ncvar_get(file_pred,"longitude") +lat=ncvar_get(file_pred,"latitude") +nb_points=length(lon)*length(lat) + +### Getting the 0/1 dry mask for precipitation + +dry_masks_tp=ncvar_get(file_pred,"tp") +dry_masks_tp=apply(dry_masks_tp,1:4,sum) +dry_masks_tp[which(!is.na(dry_masks_tp))]=1 +dry_masks_tp[which(is.na(dry_masks_tp))]=0 + +### ECMWF real-time start dates + +inidates=as.character(seq(ymd("2020-01-02"),ymd("2020-12-31"),by="1 day")) +inidates=inidates[seq(1,length(inidates),7)] +inidates=sort(inidates) + +lead_names=c("weeks34","weeks56") ### Name of lead times in NetCDF data file names + +### + +nyrs=20 ### Number of hindcast years +nmb_train=11 ### Hindcast ensemble size +nmb_test=51 ### Real-time ensemble size + +### Computation of the new tercile probability forecasts +### The method is applied separately for each real-time start dates and lead time by training on the corresponding hindcasts + +for (d in 1:length(inidates)){ ### Loop on start dates + for (lead in 1:2){ ### Loop on lead times + + inidate=inidates[d] + inidate_bis=paste(substr(inidate,1,4),substr(inidate,6,7),substr(inidate,9,10),sep="") + + ####### Temperature t2m + + ### Open NetCDF files + hindcast_file=nc_open(paste("data/hindcast-input/ecmwf-hindcast-t2m-",inidate_bis,"-",lead_names[lead],".nc",sep="")) + ### prepared with download_hindcast_input.sh + forecast_file=nc_open(paste("data/forecast-input/ecmwf-forecast-t2m-",inidate_bis,"-",lead_names[lead],".nc",sep="")) + ### prepared with download_forecast_input.sh + training_obs_file=nc_open(paste("data/hindcast-like-observations/t2m-",inidate_bis,"-",lead_names[lead],".nc",sep="")) + ### prepared with download_hindcast_like_observations.sh + + ### Load variables + t2m_mod_train=ncvar_get(hindcast_file,"t2m") + t2m_obs_train=ncvar_get(training_obs_file,"t2m") + t2m_mod_test=ncvar_get(forecast_file,"t2m") + + ### Close NetCDF files + nc_close(hindcast_file) + nc_close(training_obs_file) + nc_close(forecast_file) + + ### Reshape data matrices + dim(t2m_mod_train)=c(nb_points,nyrs*nmb_train) + dim(t2m_obs_train)=c(nb_points,nyrs) + dim(t2m_mod_test)=c(nb_points,nmb_test) + + ### Determine temperature mask based on data availibility in training observations t2m_obs_train + ### i.e if only NA training values, remove the grid point + mask_t2m=apply(t2m_obs_train,1,mean,na.rm=T) + mask_t2m[which(!is.na(mask_t2m))]=1 + mask_t2m[which(is.na(mask_t2m))]=0 + dim(mask_t2m)=nb_points + + ### Apply the temperature mask determined above, reduce the number of grid points + t2m_mod_train=t2m_mod_train[which(mask_t2m==1),] + t2m_obs_train=t2m_obs_train[which(mask_t2m==1),] + t2m_mod_test=t2m_mod_test[which(mask_t2m==1),] + + nb_points_bis=length(which(mask_t2m==1)) + + ### Convert the obs and model training data distributions into normal distributions (mean=0, standard deviation=1) + ### See Specq and Batté (2020) for explanations + t2m_obs_train_p=apply(t2m_obs_train,1,function(x) pemp(x,x)) + t2m_obs_train_p=aperm(t2m_obs_train_p,c(2,1)) + t2m_obs_train_g=qnorm(t2m_obs_train_p,mean=0,sd=1) + + t2m_mod_train_p=apply(t2m_mod_train,1,function(x) pemp(x,x)) + t2m_mod_train_p=aperm(t2m_mod_train_p,c(2,1)) + t2m_mod_train_g=qnorm(t2m_mod_train_p,mean=0,sd=1) + + ### Convert the raw real-time forecast data into a normal variable according to the distribution of the hindcast data + t2m_mod_test_p=apply(abind(InsertDim(t2m_mod_train,3,nmb_test),InsertDim(t2m_mod_test,2,1),along=2),c(1,3),function(x) pemp(x[length(x)],x[1:(length(x)-1)])) + t2m_mod_test_g=qnorm(t2m_mod_test_p,mean=0,sd=1) + + ### Reshape data matrices + dim(t2m_mod_train)=c(nb_points_bis,nyrs,nmb_train) + dim(t2m_mod_train_p)=c(nb_points_bis,nyrs,nmb_train) + dim(t2m_mod_train_g)=c(nb_points_bis,nyrs,nmb_train) + + ### Take ensemble means + t2m_mod_train_g=apply(t2m_mod_train_g,1:2,mean,na.rm=T) + t2m_mod_test_g=apply(t2m_mod_test_g,1,mean,na.rm=T) + + ### Initialize the mean and standard deviation values for the new real-time forecast distributions + t2m_pred_mean=array(NA,nb_points_bis) + t2m_pred_std=array(NA,nb_points_bis) + + for (i in 1:nb_points_bis){ ### Loop on grid points + + ### Parameters of the prior distribution in the Bayesian approach + mean_prior=0 + std_prior=1 + + ### Linear regression of model against observations in the training period (hindcast) + fit=lm(t2m_mod_train_g[i,]~t2m_obs_train_g[i,]) + a=fit[["coefficients"]][1] + b=fit[["coefficients"]][2] + std=summary(fit)[["sigma"]] + + ### Implementation of the Bayesian approach to determine the posterior distribution + tmp=1/std_prior^2+(b/std)^2 + t2m_pred_std[i]=sqrt(1/tmp) + t2m_pred_mean[i]=(1/tmp)*((mean_prior/std_prior^2)+(b/std)^2*(t2m_mod_test_g[i]-a)/b) + + rm(fit,a,b,std) + gc() + } + + ### Computation of tercile probabilities from the new distributions + ### Tercile thresholds are those of the normal distribution mean=0, sd=1 + t2m_pred_category=array(NA,c(nb_points_bis,3)) + t2m_pred_category[,1]=apply(abind(t2m_pred_mean,t2m_pred_std,along=0),2,function(x) pnorm(qnorm(1/3,mean=0,sd=1),mean=x[1],sd=x[2],lower.tail=T)) + t2m_pred_category[,3]=apply(abind(t2m_pred_mean,t2m_pred_std,along=0),2,function(x) pnorm(qnorm(2/3,mean=0,sd=1),mean=x[1],sd=x[2],lower.tail=F)) + t2m_pred_category[,2]=1-t2m_pred_category[,1]-t2m_pred_category[,3] + + ### Filling the masked grid points with NAs and reshaping the matrix of tercile probabilities + t2m_pred_category_bis=array(NA,c(nb_points,3)) + t2m_pred_category_bis[which(mask_t2m==1),]=t2m_pred_category + t2m_pred_category=t2m_pred_category_bis + rm(t2m_pred_category_bis) + gc() + dim(t2m_pred_category)=c(length(lon),length(lat),3) + + ### Inclusion of the tercile probability for the corresponding start dates and lead time in the NetCDF submission file + ncvar_put(nc=file_pred,varid="t2m",vals=t2m_pred_category,start=c(1,1,d,lead,1),count=c(length(lon),length(lat),1,1,3)) + + ### Removing intermediary variables + rm(nb_points_bis,t2m_mod_train,t2m_obs_train,t2m_mod_pred,t2m_mod_train_g,t2m_mod_train_p,t2m_obs_train_g,t2m_obs_train_p,t2m_mod_test_p,t2m_mod_test_g,t2m_pred_category,t2m_pred_mean,t2m_pred_std) + gc() + + + + + ####### Precipitation tp: similar to temperature, only more conditions on masked grid points due to dry areas and training data availability + + ### Open NetCDF files + hindcast_file=nc_open(paste("data/hindcast-input/ecmwf-hindcast-tp-",inidate_bis,"-",lead_names[lead],".nc",sep="")) + ### prepared with download_hindcast_input.sh + forecast_file=nc_open(paste("data/forecast-input/ecmwf-forecast-tp-",inidate_bis,"-",lead_names[lead],".nc",sep="")) + ### prepared with download_forecast_input.sh + training_obs_file=nc_open(paste("data/hindcast-like-observations/tp-",inidate_bis,"-",lead_names[lead],".nc",sep="")) + ### prepared with download_hindcast_like_observations.sh + + ### Load variables + tp_mod_train=ncvar_get(hindcast_file,"tp") + tp_obs_train=ncvar_get(training_obs_file,"tp") + tp_mod_test=ncvar_get(forecast_file,"tp") + + ### Close NetCDF files + nc_close(hindcast_file) + nc_close(training_obs_file) + nc_close(forecast_file) + + ### Reshape data matrices + dim(tp_mod_train)=c(nb_points,nyrs*nmb_train) + dim(tp_obs_train)=c(nb_points,nyrs) + dim(tp_mod_test)=c(nb_points,nmb_test) + + ### Dry mask for the corresponding start date and lead time, as inferred from the template ML_submission_2020.nc file + dry_mask_tp=dry_masks_tp[,,d,lead] + dim(dry_mask_tp)=nb_points + + ### Additional dry mask constraints on training model data, when all non-NA hindcast values are 0 + dry_mask_mod=apply(tp_mod_train,1,function(x) length(unique(x[which(!is.na(x))]))) + dry_mask_mod[which(dry_mask_mod==1)]=0 + dry_mask_mod[which(dry_mask_mod>1)]=1 + + ### Additional mask constraints on training observations based on data availability and all-zero grid points + avail_mask_tp=apply(tp_obs_train,1,mean,na.rm=T) + avail_mask_tp[which(!is.na(avail_mask_tp) & avail_mask_tp>0)]=1 + avail_mask_tp[which(is.na(avail_mask_tp) | avail_mask_tp==0)]=0 + + ### Merging the various mask constraints + mask_tp=dry_mask_tp + mask_tp[which(avail_mask_tp==0)]=0 + mask_tp[which(dry_mask_mod==0)]=0 + + ### Apply the precipitation mask determined above, reduce the number of grid points + tp_mod_train=tp_mod_train[which(mask_tp==1),] + tp_obs_train=tp_obs_train[which(mask_tp==1),] + tp_mod_test=tp_mod_test[which(mask_tp==1),] + + nb_points_bis=length(which(mask_tp==1)) + + ### Convert the obs and model training data distributions into normal distributions (mean=0, standard deviation=1) + ### See Specq and Batté (2020) for explanations + tp_obs_train_p=apply(tp_obs_train,1,function(x) pemp(x,x)) + tp_obs_train_p=aperm(tp_obs_train_p,c(2,1)) + tp_obs_train_g=qnorm(tp_obs_train_p,mean=0,sd=1) + + tp_mod_train_p=apply(tp_mod_train,1,function(x) pemp(x,x)) + tp_mod_train_p=aperm(tp_mod_train_p,c(2,1)) + tp_mod_train_g=qnorm(tp_mod_train_p,mean=0,sd=1) + + ### Convert the raw real-time forecast data into a normal variable according to the distribution of the hindcast data + tp_mod_test_p=apply(abind(InsertDim(tp_mod_train,3,nmb_test),InsertDim(tp_mod_test,2,1),along=2),c(1,3),function(x) pemp(x[length(x)],x[1:(length(x)-1)])) + tp_mod_test_g=qnorm(tp_mod_test_p,mean=0,sd=1) + + ### Reshape data matrices + dim(tp_mod_train)=c(nb_points_bis,nyrs,nmb_train) + dim(tp_mod_train_p)=c(nb_points_bis,nyrs,nmb_train) + dim(tp_mod_train_g)=c(nb_points_bis,nyrs,nmb_train) + + ### Take ensemble means + tp_mod_train_g=apply(tp_mod_train_g,1:2,mean,na.rm=T) + tp_mod_test_g=apply(tp_mod_test_g,1,mean,na.rm=T) + + ### Initialize the mean and standard deviation values for the new real-time forecast distributions + tp_pred_mean=array(NA,nb_points_bis) + tp_pred_std=array(NA,nb_points_bis) + + for (i in 1:nb_points_bis){ ### Loop on grid points + + ### Parameters of the prior distribution in the Bayesian approach + mean_prior=0 + std_prior=1 + + ### Linear regression of model against observations in the training period (hindcast) + fit=lm(tp_mod_train_g[i,]~tp_obs_train_g[i,]) + a=fit[["coefficients"]][1] + b=fit[["coefficients"]][2] + std=summary(fit)[["sigma"]] + + ### Implementation of the Bayesian approach to determine the posterior distribution + tmp=1/std_prior^2+(b/std)^2 + tp_pred_std[i]=sqrt(1/tmp) + tp_pred_mean[i]=(1/tmp)*((mean_prior/std_prior^2)+(b/std)^2*(tp_mod_test_g[i]-a)/b) + } + + ### Computation of tercile probabilities from the new distributions + ### Tercile thresholds are those of the normal distribution mean=0, sd=1 + tp_pred_category=array(NA,c(nb_points_bis,3)) + tp_pred_category[,1]=apply(abind(tp_pred_mean,tp_pred_std,along=0),2,function(x) pnorm(qnorm(1/3,mean=0,sd=1),mean=x[1],sd=x[2],lower.tail=T)) + tp_pred_category[,3]=apply(abind(tp_pred_mean,tp_pred_std,along=0),2,function(x) pnorm(qnorm(2/3,mean=0,sd=1),mean=x[1],sd=x[2],lower.tail=F)) + tp_pred_category[,2]=1-tp_pred_category[,1]-tp_pred_category[,3] + + ### Filling the masked grid points with NAs and reshaping the matrix of tercile probabilities + tp_pred_category_bis=array(NA,c(nb_points,3)) + tp_pred_category_bis[which(mask_tp==1),]=tp_pred_category + tp_pred_category=tp_pred_category_bis + rm(tp_pred_category_bis) + gc() + dim(tp_pred_category)=c(length(lon),length(lat),3) + + ### Inclusion of the tercile probability for the corresponding start dates and lead time in the NetCDF submission file + ncvar_put(nc=file_pred,varid="tp",vals=tp_pred_category,start=c(1,1,d,lead,1),count=c(length(lon),length(lat),1,1,3)) + + ### Removing intermediary variables + rm(nb_points_bis,tp_mod_train,tp_obs_train,tp_mod_pred,tp_mod_train_g,tp_mod_train_p,tp_obs_train_g,tp_obs_train_p,tp_mod_test_p,tp_mod_test_g,tp_pred_category,tp_pred_mean,tp_pred_std,a,b,std) + gc() + + } +} + +### Closing NetCDF file +nc_close(file_pred) \ No newline at end of file diff --git a/notebooks/damien_specq_scripts/dates.txt b/notebooks/damien_specq_scripts/dates.txt new file mode 100644 index 0000000000000000000000000000000000000000..d4dcf1feca86485e8407ae5ee1d6ebd53cb895e3 --- /dev/null +++ b/notebooks/damien_specq_scripts/dates.txt @@ -0,0 +1,53 @@ +"20200102" +"20200109" +"20200116" +"20200123" +"20200130" +"20200206" +"20200213" +"20200220" +"20200227" +"20200305" +"20200312" +"20200319" +"20200326" +"20200402" +"20200409" +"20200416" +"20200423" +"20200430" +"20200507" +"20200514" +"20200521" +"20200528" +"20200604" +"20200611" +"20200618" +"20200625" +"20200702" +"20200709" +"20200716" +"20200723" +"20200730" +"20200806" +"20200813" +"20200820" +"20200827" +"20200903" +"20200910" +"20200917" +"20200924" +"20201001" +"20201008" +"20201015" +"20201022" +"20201029" +"20201105" +"20201112" +"20201119" +"20201126" +"20201203" +"20201210" +"20201217" +"20201224" +"20201231" diff --git a/notebooks/damien_specq_scripts/download_forecast_input.sh b/notebooks/damien_specq_scripts/download_forecast_input.sh new file mode 100644 index 0000000000000000000000000000000000000000..92e5f0b56a0c6f0c0b108de38b42c95afcc86f79 --- /dev/null +++ b/notebooks/damien_specq_scripts/download_forecast_input.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +while read START_DATE ### Loop on the list of start dates stored in dates.txt +do + +START_DATE=${START_DATE:1:8} + +###### Temperature t2m + +### Download t2m real-time forecasts from the ECMWF weather cloud +wget 'https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/test-input/0.3.0/netcdf/ecmwf-forecast-t2m-'$START_DATE'.nc' + +### Select the lead times corresponding to weeks 3-4 and average +ncks -v t2m -d lead_time,14,27 'ecmwf-forecast-t2m-'$START_DATE'.nc' 'ecmwf-forecast-t2m-'$START_DATE'-weeks34.nc' +ncwa -a lead_time -O 'ecmwf-forecast-t2m-'$START_DATE'-weeks34.nc' 'ecmwf-forecast-t2m-'$START_DATE'-weeks34.nc' + +### Select the lead times corresponding to weeks 5-6 and average +ncks -v t2m -d lead_time,28,41 'ecmwf-forecast-t2m-'$START_DATE'.nc' 'ecmwf-forecast-t2m-'$START_DATE'-weeks56.nc' +ncwa -a lead_time -O 'ecmwf-forecast-t2m-'$START_DATE'-weeks56.nc' 'ecmwf-forecast-t2m-'$START_DATE'-weeks56.nc' + +### Clean workspace +rm 'ecmwf-forecast-t2m-'$START_DATE'.nc' + +###### Precipitation tp + +### Download tp real-time forecasts from the ECMWF weather cloud +wget 'https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/test-input/0.3.0/netcdf/ecmwf-forecast-tp-'$START_DATE'.nc' + +### Select the lead times corresponding to the beginning and the end of weeks 3-4, take the difference of accumulated precip +### and correct for the negative values bug (negative accumulated precip) replacing by 0 +ncks -v tp -d lead_time,14 'ecmwf-forecast-tp-'$START_DATE'.nc' 'ecmwf-forecast-tp-'$START_DATE'-weeks34_1.nc' +ncks -v tp -d lead_time,28 'ecmwf-forecast-tp-'$START_DATE'.nc' 'ecmwf-forecast-tp-'$START_DATE'-weeks34_2.nc' +ncdiff 'ecmwf-forecast-tp-'$START_DATE'-weeks34_2.nc' 'ecmwf-forecast-tp-'$START_DATE'-weeks34_1.nc' 'ecmwf-forecast-tp-'$START_DATE'-weeks34.nc' +ncap2 -s 'where(tp<0.) tp=0;' -O 'ecmwf-forecast-tp-'$START_DATE'-weeks34.nc' 'ecmwf-forecast-tp-'$START_DATE'-weeks34.nc' +rm 'ecmwf-forecast-tp-'$START_DATE'-weeks34_1.nc' 'ecmwf-forecast-tp-'$START_DATE'-weeks34_2.nc' ### Clean workspace + +### Select the lead times corresponding to the beginning and the end of weeks 5-6, take the difference of accumulated precip +### and correct for the negative values bug (negative accumulated precip) replacing by 0 +ncks -v tp -d lead_time,28 'ecmwf-forecast-tp-'$START_DATE'.nc' 'ecmwf-forecast-tp-'$START_DATE'-weeks56_1.nc' +ncks -v tp -d lead_time,42 'ecmwf-forecast-tp-'$START_DATE'.nc' 'ecmwf-forecast-tp-'$START_DATE'-weeks56_2.nc' +ncdiff 'ecmwf-forecast-tp-'$START_DATE'-weeks56_2.nc' 'ecmwf-forecast-tp-'$START_DATE'-weeks56_1.nc' 'ecmwf-forecast-tp-'$START_DATE'-weeks56.nc' +ncap2 -s 'where(tp<0.) tp=0;' -O 'ecmwf-forecast-tp-'$START_DATE'-weeks56.nc' 'ecmwf-forecast-tp-'$START_DATE'-weeks56.nc' +rm 'ecmwf-forecast-tp-'$START_DATE'-weeks56_1.nc' 'ecmwf-forecast-tp-'$START_DATE'-weeks56_2.nc' ### Clean workspace + +rm 'ecmwf-forecast-tp-'$START_DATE'.nc' ### Clean workspace + +done < notebooks/dates.txt diff --git a/notebooks/damien_specq_scripts/download_hindcast_input.sh b/notebooks/damien_specq_scripts/download_hindcast_input.sh new file mode 100644 index 0000000000000000000000000000000000000000..93ac9c731e074e1cde3d60c6956e78f415e8788b --- /dev/null +++ b/notebooks/damien_specq_scripts/download_hindcast_input.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +while read START_DATE ### Loop on the list of start dates stored in dates.txt +do + +START_DATE=${START_DATE:1:8} + +###### Temperature t2m + +### Download t2m hindcasts from the ECMWF weather cloud +wget 'https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/training-input/0.3.0/netcdf/ecmwf-hindcast-t2m-'$START_DATE'.nc' + +### Select the lead times corresponding to weeks 3-4 and average +ncks -v t2m -d lead_time,14,27 'ecmwf-hindcast-t2m-'$START_DATE'.nc' 'ecmwf-hindcast-t2m-'$START_DATE'-weeks34.nc' +ncwa -a lead_time -O 'ecmwf-hindcast-t2m-'$START_DATE'-weeks34.nc' 'ecmwf-hindcast-t2m-'$START_DATE'-weeks34.nc' + +### Select the lead times corresponding to weeks 5-6 and average +ncks -v t2m -d lead_time,28,41 'ecmwf-hindcast-t2m-'$START_DATE'.nc' 'ecmwf-hindcast-t2m-'$START_DATE'-weeks56.nc' +ncwa -a lead_time -O 'ecmwf-hindcast-t2m-'$START_DATE'-weeks56.nc' 'ecmwf-hindcast-t2m-'$START_DATE'-weeks56.nc' + +rm 'ecmwf-hindcast-t2m-'$START_DATE'.nc' ### Clean workspace + +###### Precipitation tp + +### Download tp hindcasts from the ECMWF weather cloud +wget 'https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/training-input/0.3.0/netcdf/ecmwf-hindcast-tp-'$START_DATE'.nc' + +### Select the lead times corresponding to the beginning and the end of weeks 3-4, take the difference of accumulated precip +### and correct for the negative values bug (negative accumulated precip) replacing by 0 +ncks -v tp -d lead_time,14 'ecmwf-hindcast-tp-'$START_DATE'.nc' 'ecmwf-hindcast-tp-'$START_DATE'-weeks34_1.nc' +ncks -v tp -d lead_time,28 'ecmwf-hindcast-tp-'$START_DATE'.nc' 'ecmwf-hindcast-tp-'$START_DATE'-weeks34_2.nc' +ncdiff 'ecmwf-hindcast-tp-'$START_DATE'-weeks34_2.nc' 'ecmwf-hindcast-tp-'$START_DATE'-weeks34_1.nc' 'ecmwf-hindcast-tp-'$START_DATE'-weeks34.nc' +ncap2 -s 'where(tp<0.) tp=0;' -O 'ecmwf-hindcast-tp-'$START_DATE'-weeks34.nc' 'ecmwf-hindcast-tp-'$START_DATE'-weeks34.nc' +rm 'ecmwf-hindcast-tp-'$START_DATE'-weeks34_1.nc' 'ecmwf-hindcast-tp-'$START_DATE'-weeks34_2.nc' ### Clean workspace + +### Select the lead times corresponding to the beginning and the end of weeks 5-6, take the difference of accumulated precip +### and correct for the negative values bug (negative accumulated precip) replacing by 0 +ncks -v tp -d lead_time,28 'ecmwf-hindcast-tp-'$START_DATE'.nc' 'ecmwf-hindcast-tp-'$START_DATE'-weeks56_1.nc' +ncks -v tp -d lead_time,42 'ecmwf-hindcast-tp-'$START_DATE'.nc' 'ecmwf-hindcast-tp-'$START_DATE'-weeks56_2.nc' +ncdiff 'ecmwf-hindcast-tp-'$START_DATE'-weeks56_2.nc' 'ecmwf-hindcast-tp-'$START_DATE'-weeks56_1.nc' 'ecmwf-hindcast-tp-'$START_DATE'-weeks56.nc' +ncap2 -s 'where(tp<0.) tp=0;' -O 'ecmwf-hindcast-tp-'$START_DATE'-weeks56.nc' 'ecmwf-hindcast-tp-'$START_DATE'-weeks56.nc' +rm 'ecmwf-hindcast-tp-'$START_DATE'-weeks56_1.nc' 'ecmwf-hindcast-tp-'$START_DATE'-weeks56_2.nc' ### Clean workspace + +rm 'ecmwf-hindcast-tp-'$START_DATE'.nc' ### Clean workspace + +done < notebooks/dates.txt diff --git a/notebooks/damien_specq_scripts/download_hindcast_like_observations.sh b/notebooks/damien_specq_scripts/download_hindcast_like_observations.sh new file mode 100644 index 0000000000000000000000000000000000000000..97f04f9a166ab5ae405203b59f88a39fae63be0a --- /dev/null +++ b/notebooks/damien_specq_scripts/download_hindcast_like_observations.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +while read START_DATE ### Loop on the list of start dates stored in dates.txt +do + +START_DATE=${START_DATE:1:8} + +###### Temperature t2m + +### Download t2m observations corresponding to the hindcasts from the ECMWF weather cloud +wget 'https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/training-output-reference/t2m-'$START_DATE'.nc' + +### Select the lead times corresponding to weeks 3-4 and average +ncks -v t2m -d lead_time,15,28 't2m-'$START_DATE'.nc' 't2m-'$START_DATE'-weeks34.nc' +ncwa -a lead_time -O 't2m-'$START_DATE'-weeks34.nc' 't2m-'$START_DATE'-weeks34.nc' + +### Select the lead times corresponding to weeks 3-4 and average +ncks -v t2m -d lead_time,29,42 't2m-'$START_DATE'.nc' 't2m-'$START_DATE'-weeks56.nc' +ncwa -a lead_time -O 't2m-'$START_DATE'-weeks56.nc' 't2m-'$START_DATE'-weeks56.nc' + +rm 't2m-'$START_DATE'.nc' ### Clean workspace + +###### Precipitation tp + +### Download tp observations corresponding to the hindcasts from the ECMWF weather cloud +wget 'https://storage.ecmwf.europeanweather.cloud/s2s-ai-challenge/data/training-output-reference/tp-'$START_DATE'.nc' + +### Select the lead times corresponding to the beginning and the end of weeks 3-4, take the difference of accumulated precip +### and correct for the negative values bug (negative accumulated precip) replacing by 0 (not necessary for obs) +ncks -v tp -d lead_time,14 'tp-'$START_DATE'.nc' 'tp-'$START_DATE'-weeks34_1.nc' +ncks -v tp -d lead_time,28 'tp-'$START_DATE'.nc' 'tp-'$START_DATE'-weeks34_2.nc' +ncdiff 'tp-'$START_DATE'-weeks34_2.nc' 'tp-'$START_DATE'-weeks34_1.nc' 'tp-'$START_DATE'-weeks34.nc' +ncap2 -s 'where(tp<0.) tp=0;' -O 'tp-'$START_DATE'-weeks34.nc' 'tp-'$START_DATE'-weeks34.nc' +rm 'tp-'$START_DATE'-weeks34_1.nc' 'tp-'$START_DATE'-weeks34_2.nc' ### Clean workspace + +### Select the lead times corresponding to the beginning and the end of weeks 5-6, take the difference of accumulated precip +### and correct for the negative values bug (negative accumulated precip) replacing by 0 (not necessary for obs) +ncks -v tp -d lead_time,28 'tp-'$START_DATE'.nc' 'tp-'$START_DATE'-weeks56_1.nc' +ncks -v tp -d lead_time,42 'tp-'$START_DATE'.nc' 'tp-'$START_DATE'-weeks56_2.nc' +ncdiff 'tp-'$START_DATE'-weeks56_2.nc' 'tp-'$START_DATE'-weeks56_1.nc' 'tp-'$START_DATE'-weeks56.nc' +ncap2 -s 'where(tp<0.) tp=0;' -O 'tp-'$START_DATE'-weeks56.nc' 'tp-'$START_DATE'-weeks56.nc' +rm 'tp-'$START_DATE'-weeks56_1.nc' 'tp-'$START_DATE'-weeks56_2.nc' ### Clean workspace + +rm 'tp-'$START_DATE'.nc' ### Clean workspace + +done < notebooks/dates.txt diff --git a/submissions/.gitattributes b/submissions/.gitattributes index d77cc4d4d24cd5a11b900f6c609e18f9343c31a0..90027e7a11f6a734cc90dad295644d6d94faef0d 100644 --- a/submissions/.gitattributes +++ b/submissions/.gitattributes @@ -1 +1,2 @@ “*.nc†filter=lfs diff=lfs merge=lfs -text +ML_prediction_2020.nc filter=lfs diff=lfs merge=lfs -text diff --git a/submissions/ML_prediction_2020.nc b/submissions/ML_prediction_2020.nc index 4ee741efc322484daabb3e10589bdcdf2d3b7f12..8f2bda851f98be277013e8b0ab6f800fcf37eb02 100644 --- a/submissions/ML_prediction_2020.nc +++ b/submissions/ML_prediction_2020.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de052442cd8ed26807c40ee545645609562687b3c9a894f219de8035bf77c2d0 +oid sha256:bb352c08d531d503ba8aab93cd80e8764686f5ff2ff762a72aa3d695dfc7e520 size 73909016