【R语言】多模型综合——CLS（GRS）加权平均法的实现

最新推荐文章于 2023-03-09 15:11:18 发布

double-star

最新推荐文章于 2023-03-09 15:11:18 发布

阅读量1.9k

点赞数

分类专栏： R 文章标签： r语言算法

本文链接：https://blog.csdn.net/doublesql/article/details/110296314

版权

R 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

CLS（GRS）加权平均法的实现

利用R包ForecastComb实现GRS权重

library(ForecastComb)
library(plyr)
load("Obs_E.RData")
load("Sim_E.RData")  #加载数据
wei <- matrix(NA,nrow=259200,ncol=13,byrow=FALSE) #权重矩阵
for (k in 1:259200) {
#如果观测值为NA或者模拟值为NA或者模拟值存在某个模型为0值的情况，直接赋给权重矩阵NA值，否则均会报错~
  if (sum(is.na(Sim_E[k,,])) > 0 |sum(is.na(Obs_E[k,])) > 0 | sum(Sim_E[k,,]==0)>0)    
    wei[k,] <- NA
  else {
    obs <- as.matrix(Obs_E[k,]) # time series 38 years for first grid
    sims <- as.matrix(Sim_E[k,,])
    train_o<-obs
    train_p<-sims
    #if 13 model at least have one not exist NA value
    data <- foreccomb( train_o, train_p, criterion = "RMSE" )
    comb <- comb_CLS(data)
    wei[k,] <- comb$Weights #将comb结果中的权重值赋给权重矩阵
  }
}

开始试了另一种写法无法实现for循环：
在这里插入图片描述
嗯，换了一种if语句的写法后可以成功运行，虽然不明白道理但还是觉得R的bug很神奇…

comb
$Method
[1] "Constrained Least Squares Regression"

$Models
 [1] "Series 1"  "Series 2"  "Series 3"  "Series 4"  "Series 5"  "Series 6"  "Series 7"  "Series 8"  "Series 9" 
[10] "Series 10" "Series 11" "Series 12" "Series 13"

$Fitted
Time Series:
Start = 1 
End = 38 
Frequency = 1 
 [1] 46.14915 52.68693 50.98800 51.54153 52.13120 52.05020 51.35101 50.26254 60.17366 53.98218 55.25538 57.59891
[13] 45.50858 48.70368 60.72363 59.63428 52.55082 49.66912 56.53192 49.23355 46.03122 48.37652 54.95151 55.63080
[25] 52.78973 60.31763 55.61538 55.72048 51.87493 57.14644 58.02818 53.38190 56.24403 49.15279 51.03570 49.36483
[37] 62.39020 51.81775

$Accuracy_Train
                ME     RMSE      MAE       MPE     MAPE      ACF1 Theil's U
Test set 0.2600359 7.406029 6.125074 -1.157483 11.49217 0.3122972 0.8394151

$Input_Data
$Input_Data$Actual_Train
Time Series:
Start = 1 
End = 38 
Frequency = 1 
      Series 1
 [1,] 54.02538
 [2,] 58.88145
 [3,] 54.88387
 [4,] 46.50533
 [5,] 49.33444
 [6,] 61.01971
 [7,] 47.65489
 [8,] 56.12291
 [9,] 52.85560
[10,] 47.59162
[11,] 45.07869
[12,] 51.77551
[13,] 43.24770
[14,] 49.65973
[15,] 51.74428
[16,] 46.94758
[17,] 39.65228
[18,] 49.52254
[19,] 51.65896
[20,] 48.17377
[21,] 48.36468
[22,] 41.81425
[23,] 50.38616
[24,] 54.76865
[25,] 51.32349
[26,] 62.18721
[27,] 57.57950
[28,] 57.16246
[29,] 67.45329
[30,] 64.21703
[31,] 53.42240
[32,] 67.50176
[33,] 66.28686
[34,] 39.92145
[35,] 57.60554
[36,] 65.28342
[37,] 66.72845
[38,] 58.13480

$Input_Data$Forecasts_Train
Time Series:
Start = 1 
End = 38 
Frequency = 1 
   Series 1 Series 2 Series 3  Series 4  Series 5 Series 6 Series 7  Series 8  Series 9 Series 10 Series 11
$Weights
 [1] -3.962309e-16 -3.474940e-17 -9.156774e-17  4.049795e-18 -8.573420e-18  3.445116e-17  1.149014e-01
 [8] -2.311289e-16  1.387779e-17  3.119803e-01  0.000000e+00  2.645099e-01  3.086084e-01

attr(,"class")
[1] "foreccomb_res"

comb可输出模型最佳拟合结果的评价指标ME，RMSE等以及最佳的权重值，这样一来对于多模型时间序列数据的模型加权就加到了每一个网格中
得到权重矩阵后做一下矩阵乘法就能得到模型综合后的结果。（如果无法使用%*%运算，可以老老实实写一个乘法函数）

1129修改：

改进了for循环，不是将所有包含NA值的列去掉，而是利用其它模型的值做模拟，并放在对应的权重矩阵位置中：

for (k in 1:259200) {
  if (sum(is.na(Sim_E[k,,])) > 418|sum(is.na(Obs_E[k,]))>0 )# at least two models no NA
    wei[k,] <- NA
  else {
    naid <- which(!is.na(Sim_E[k,1,]))    #NA值所在的列数
    obs <- as.matrix(Obs_E[k,]) # time series 38 years for first grid
    sims <- as.matrix(Sim_E[k,,naid]) #delete NA col
    train_o<-obs
    train_p<-sims
        if(sum(na.omit(train_p) ==0 )>0)
          wei[k,] <- NA
    else {
        #if 13 model at least have one not exist NA value
        data <- foreccomb( train_o, train_p,na.impute = TRUE, criterion = "RMSE" )
        comb <- comb_CLS(data)
        wei[k,naid] <- comb$Weights
    }
  }
}

11.30又一次修改

这次将所有模型为0的值赋值为NA，且不再去除含有NA值的栅格，而是将有NA值的模型去除，用剩下的模拟值来做回归，我设置为至少有5个模型是存在值的：

we_MAE <- matrix(NA,nrow=259200,ncol=13,byrow=FALSE)
for (k in 1:259200) {
  if (sum(is.na(Sim_E[k,,])) > 304|sum(is.na(Obs_E[k,]))>0 )# at least two models no NA
    we_MAE[k,] <- NA
  else {
    oid <- which(Sim_E[k,1,]==0)   
    Sim_E[k,,oid] <- NA                          #0 value to NA
    naid <- which(!is.na(Sim_E[k,1,]))      #the !na model col
    obs <- as.matrix(Obs_E[k,]) # time series 38 years for first grid
    sims <- as.matrix(Sim_E[k,,naid]) #delete NA col
    train_o<-obs
    train_p<-sims
    # inverse value to NA  
    for (h in 1:38) {
      for (j in 1 :length(train_p[1,])) {
        if (  is.na(train_p[h,j])  )
          train_p[h,j] <- mean(train_p[h,],na.rm = TRUE)
      }
    }
    #if 13 model at least have one not exist NA value
    data <- foreccomb(train_o, train_p,na.impute = TRUE, criterion = "MAE" )
    comb <- comb_CLS(data)
    we_MAE[k,naid] <- comb$Weights
  }
}

然后画图：

Sim <- rowMeans(sim_RMSE, na.rm = T)
Obs <- rowMeans(Obs_E, na.rm = T)

Dataa <- as.data.frame(matrix(NA, nrow = 259200))
Dataa$Obs <- Obs
Dataa$Sim <- Sim

Data2 <- melt(data = Dataa, id.vars = c('Obs'), variable.name = 'Factor', value.name = "Sim")
lm_labels<-function(dat){
  mod<-lm(Sim~Obs,data=dat)
  formula<-sprintf("italic(y) == %.2f %+.2f * italic(x)",
                   round(coef(mod)[1],2),round(coef(mod)[2],2))
  r<-cor(dat$Sim,dat$Obs , use = "complete.obs")
  r2<-sprintf("italic(R^2)== %.2f",r^2)
  
  RMSE1 <- rmse(dat$Sim,dat$Obs)
  RMSE<-sprintf("italic(RMSE)== %.2f",RMSE1)
  RMSE<-paste(RMSE,'*','mm', sep = ' ')
  
  NSE<- NSE(dat$Sim,dat$Obs)
  NSE<-sprintf("italic(NSE)== %.2f", NSE)
  
  MAE <- mean(abs(dat$Sim - dat$Obs))
  MAE<-sprintf("italic(MAE)== %.2f",MAE)
  MAE<-paste(MAE,'*','mm', sep = ' ')
  data.frame(formula=formula,NSE=NSE,r2=r2,RMSE=RMSE, MAE=MAE, stringsAsFactors = FALSE)
}

labels<-ddply(na.omit(Data2),"Factor",lm_labels)
png(filename = "E:/rroom/obs and sim4.png",res=300,width=12,height=10,units="in") #
 ggplot(Dataa, aes(x = Obs, y = Sim))+
  geom_point(size = 0.7)+ #alpha = 0.8,
  geom_smooth(method = lm, colour = "Red")+ #, fullrange=TRUE
  geom_abline(slope = 1,colour = "black", linetype = "dashed")+
  facet_wrap(~Factor,scales = "free", nrow = 3)+
  xlab('Satellite-observation-based evapotranspiration  (mm)')+
  ylab('Simulated evapotranspiration (mm)')+
  # xlim(0,6000)+ #ylim(0,6000)+
  # geom_text(x=-Inf,y=Inf,aes(label=formula),data=labels,parse=TRUE,hjust=-0.2, vjust=1.6)+
  geom_text(x=-Inf,y=Inf,aes(label=r2),data=labels,parse=TRUE,hjust=-0.12, vjust=1.4)+
  geom_text(x=-Inf,y=Inf,aes(label=NSE),data=labels,parse=TRUE,hjust=-0.1, vjust=4)+
  #geom_text(x=-Inf,y=Inf,aes(label=RMSE),data=labels,parse=TRUE,hjust=-0.15, vjust=5.8+ 2)+
  theme(axis.title = element_text(size = rel(1.2), face = "bold"),
        axis.text = element_text(size = rel(1),face = "bold"),
        strip.text = element_text(size = rel(1),face = "bold"),
        legend.text = element_text(size = rel(1),face = "bold"),
        legend.title= element_blank(),legend.background = element_blank(),legend.position = c(0.1,0.9))
dev.off()

画出的图如下：
在这里插入图片描述
模型综合后效果还是提高了的

double-star

关注

0
点赞
踩
10

收藏

觉得还不错? 一键收藏
0
评论
【R语言】多模型综合——CLS（GRS）加权平均法的实现

CLS（GRS）加权平均法的实现利用R包ForecastComb实现GRS权重library(ForecastComb)library(plyr)load("Obs_E.RData")load("Sim_E.RData")wei <- matrix(NA,nrow=259200,ncol=13,byrow=FALSE)for (k in 1:259200) { if (sum(is.na(Sim_E[k,,])) > 0 |sum(is.na(Obs_E[k,])) >
复制链接

扫一扫