CLS(GRS)加权平均法的实现
利用R包ForecastComb实现GRS权重
library(ForecastComb)
library(plyr)
load("Obs_E.RData")
load("Sim_E.RData") #加载数据
wei <- matrix(NA,nrow=259200,ncol=13,byrow=FALSE) #权重矩阵
for (k in 1:259200) {
#如果观测值为NA或者模拟值为NA或者模拟值存在某个模型为0值的情况,直接赋给权重矩阵NA值,否则均会报错~
if (sum(is.na(Sim_E[k,,])) > 0 |sum(is.na(Obs_E[k,])) > 0 | sum(Sim_E[k,,]==0)>0)
wei[k,] <- NA
else {
obs <- as.matrix(Obs_E[k,]) # time series 38 years for first grid
sims <- as.matrix(Sim_E[k,,])
train_o<-obs
train_p<-sims
#if 13 model at least have one not exist NA value
data <- foreccomb( train_o, train_p, criterion = "RMSE" )
comb <- comb_CLS(data)
wei[k,] <- comb$Weights #将comb结果中的权重值赋给权重矩阵
}
}
开始试了另一种写法无法实现for循环:
嗯,换了一种if语句的写法后可以成功运行,虽然不明白道理但还是觉得R的bug很神奇…
comb
$Method
[1] "Constrained Least Squares Regression"
$Models
[1] "Series 1" "Series 2" "Series 3" "Series 4" "Series 5" "Series 6" "Series 7" "Series 8" "Series 9"
[10] "Series 10" "Series 11" "Series 12" "Series 13"
$Fitted
Time Series:
Start = 1
End = 38
Frequency = 1
[1] 46.14915 52.68693 50.98800 51.54153 52.13120 52.05020 51.35101 50.26254 60.17366 53.98218 55.25538 57.59891
[13] 45.50858 48.70368 60.72363 59.63428 52.55082 49.66912 56.53192 49.23355 46.03122 48.37652 54.95151 55.63080
[25] 52.78973 60.31763 55.61538 55.72048 51.87493 57.14644 58.02818 53.38190 56.24403 49.15279 51.03570 49.36483
[37] 62.39020 51.81775
$Accuracy_Train
ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set 0.2600359 7.406029 6.125074 -1.157483 11.49217 0.3122972 0.8394151
$Input_Data
$Input_Data$Actual_Train
Time Series:
Start = 1
End = 38
Frequency = 1
Series 1
[1,] 54.02538
[2,] 58.88145
[3,] 54.88387
[4,] 46.50533
[5,] 49.33444
[6,] 61.01971
[7,] 47.65489
[8,] 56.12291
[9,] 52.85560
[10,] 47.59162
[11,] 45.07869
[12,] 51.77551
[13,] 43.24770
[14,] 49.65973
[15,] 51.74428
[16,] 46.94758
[17,] 39.65228
[18,] 49.52254
[19,] 51.65896
[20,] 48.17377
[21,] 48.36468
[22,] 41.81425
[23,] 50.38616
[24,] 54.76865
[25,] 51.32349
[26,] 62.18721
[27,] 57.57950
[28,] 57.16246
[29,] 67.45329
[30,] 64.21703
[31,] 53.42240
[32,] 67.50176
[33,] 66.28686
[34,] 39.92145
[35,] 57.60554
[36,] 65.28342
[37,] 66.72845
[38,] 58.13480
$Input_Data$Forecasts_Train
Time Series:
Start = 1
End = 38
Frequency = 1
Series 1 Series 2 Series 3 Series 4 Series 5 Series 6 Series 7 Series 8 Series 9 Series 10 Series 11
$Weights
[1] -3.962309e-16 -3.474940e-17 -9.156774e-17 4.049795e-18 -8.573420e-18 3.445116e-17 1.149014e-01
[8] -2.311289e-16 1.387779e-17 3.119803e-01 0.000000e+00 2.645099e-01 3.086084e-01
attr(,"class")
[1] "foreccomb_res"
comb可输出模型最佳拟合结果的评价指标ME,RMSE等以及最佳的权重值,这样一来对于多模型时间序列数据的模型加权就加到了每一个网格中
得到权重矩阵后做一下矩阵乘法就能得到模型综合后的结果。(如果无法使用%*%运算,可以老老实实写一个乘法函数)
1129修改:
改进了for循环,不是将所有包含NA值的列去掉,而是利用其它模型的值做模拟,并放在对应的权重矩阵位置中:
for (k in 1:259200) {
if (sum(is.na(Sim_E[k,,])) > 418|sum(is.na(Obs_E[k,]))>0 )# at least two models no NA
wei[k,] <- NA
else {
naid <- which(!is.na(Sim_E[k,1,])) #NA值所在的列数
obs <- as.matrix(Obs_E[k,]) # time series 38 years for first grid
sims <- as.matrix(Sim_E[k,,naid]) #delete NA col
train_o<-obs
train_p<-sims
if(sum(na.omit(train_p) ==0 )>0)
wei[k,] <- NA
else {
#if 13 model at least have one not exist NA value
data <- foreccomb( train_o, train_p,na.impute = TRUE, criterion = "RMSE" )
comb <- comb_CLS(data)
wei[k,naid] <- comb$Weights
}
}
}
11.30又一次修改
这次将所有模型为0的值赋值为NA,且不再去除含有NA值的栅格,而是将有NA值的模型去除,用剩下的模拟值来做回归,我设置为至少有5个模型是存在值的:
we_MAE <- matrix(NA,nrow=259200,ncol=13,byrow=FALSE)
for (k in 1:259200) {
if (sum(is.na(Sim_E[k,,])) > 304|sum(is.na(Obs_E[k,]))>0 )# at least two models no NA
we_MAE[k,] <- NA
else {
oid <- which(Sim_E[k,1,]==0)
Sim_E[k,,oid] <- NA #0 value to NA
naid <- which(!is.na(Sim_E[k,1,])) #the !na model col
obs <- as.matrix(Obs_E[k,]) # time series 38 years for first grid
sims <- as.matrix(Sim_E[k,,naid]) #delete NA col
train_o<-obs
train_p<-sims
# inverse value to NA
for (h in 1:38) {
for (j in 1 :length(train_p[1,])) {
if ( is.na(train_p[h,j]) )
train_p[h,j] <- mean(train_p[h,],na.rm = TRUE)
}
}
#if 13 model at least have one not exist NA value
data <- foreccomb(train_o, train_p,na.impute = TRUE, criterion = "MAE" )
comb <- comb_CLS(data)
we_MAE[k,naid] <- comb$Weights
}
}
然后画图:
Sim <- rowMeans(sim_RMSE, na.rm = T)
Obs <- rowMeans(Obs_E, na.rm = T)
Dataa <- as.data.frame(matrix(NA, nrow = 259200))
Dataa$Obs <- Obs
Dataa$Sim <- Sim
Data2 <- melt(data = Dataa, id.vars = c('Obs'), variable.name = 'Factor', value.name = "Sim")
lm_labels<-function(dat){
mod<-lm(Sim~Obs,data=dat)
formula<-sprintf("italic(y) == %.2f %+.2f * italic(x)",
round(coef(mod)[1],2),round(coef(mod)[2],2))
r<-cor(dat$Sim,dat$Obs , use = "complete.obs")
r2<-sprintf("italic(R^2)== %.2f",r^2)
RMSE1 <- rmse(dat$Sim,dat$Obs)
RMSE<-sprintf("italic(RMSE)== %.2f",RMSE1)
RMSE<-paste(RMSE,'*','mm', sep = ' ')
NSE<- NSE(dat$Sim,dat$Obs)
NSE<-sprintf("italic(NSE)== %.2f", NSE)
MAE <- mean(abs(dat$Sim - dat$Obs))
MAE<-sprintf("italic(MAE)== %.2f",MAE)
MAE<-paste(MAE,'*','mm', sep = ' ')
data.frame(formula=formula,NSE=NSE,r2=r2,RMSE=RMSE, MAE=MAE, stringsAsFactors = FALSE)
}
labels<-ddply(na.omit(Data2),"Factor",lm_labels)
png(filename = "E:/rroom/obs and sim4.png",res=300,width=12,height=10,units="in") #
ggplot(Dataa, aes(x = Obs, y = Sim))+
geom_point(size = 0.7)+ #alpha = 0.8,
geom_smooth(method = lm, colour = "Red")+ #, fullrange=TRUE
geom_abline(slope = 1,colour = "black", linetype = "dashed")+
facet_wrap(~Factor,scales = "free", nrow = 3)+
xlab('Satellite-observation-based evapotranspiration (mm)')+
ylab('Simulated evapotranspiration (mm)')+
# xlim(0,6000)+ #ylim(0,6000)+
# geom_text(x=-Inf,y=Inf,aes(label=formula),data=labels,parse=TRUE,hjust=-0.2, vjust=1.6)+
geom_text(x=-Inf,y=Inf,aes(label=r2),data=labels,parse=TRUE,hjust=-0.12, vjust=1.4)+
geom_text(x=-Inf,y=Inf,aes(label=NSE),data=labels,parse=TRUE,hjust=-0.1, vjust=4)+
#geom_text(x=-Inf,y=Inf,aes(label=RMSE),data=labels,parse=TRUE,hjust=-0.15, vjust=5.8+ 2)+
theme(axis.title = element_text(size = rel(1.2), face = "bold"),
axis.text = element_text(size = rel(1),face = "bold"),
strip.text = element_text(size = rel(1),face = "bold"),
legend.text = element_text(size = rel(1),face = "bold"),
legend.title= element_blank(),legend.background = element_blank(),legend.position = c(0.1,0.9))
dev.off()
画出的图如下:
模型综合后效果还是提高了的