,欢迎也在微信公众号查看。
# 定义创建训练集和测试集函数
# x 为行索引
# 输出为测试(或训练)数据的索引列表
kfoldSplit <- function(x, k=10, train=TRUE){
x <- sample(x, size = length(x), replace = FALSE)
out <- suppressWarnings(split(x, factor(1:k)))
if(train) out <- lapply(out, FUN = function(x, len) (1:len)[-x], len=length(unlist(out)))
return(out)
}
# RF的残差
resid.RF <- function(x) return(x$y - x$predicted)
set.seed(12345)
k <- 10
kfolds <- kfoldSplit(1:nrow(climDataPT), k = 10, train = TRUE)
#evalData存放7种算法的精度
evalData <- matrix(NA, nrow=k, ncol=7,
dimnames = list(1:k, c("OK","RF","GLM","GAM","RF_OK","GLM_OK","GAM_OK")))
执行七种算法
library(randomForest)
library(mgcv)
library(gstat)
for(i in 1:k){
cat("K-fold...",i,"of",k,"....\n")
# 训练数据集的索引
idx <- kfolds[[i]]
# 找到训练数据集在原数据集中的行,记为True
idxBool <- (1:nrow(climDataPT)) %in% idx
# 测试数据集
obs.test <- climDataPT[!idxBool, "AvgTemp"]
## ------------------------------------------------------------------
## 普通克里金
## ------------------------------------------------------------------
# 变异函数
formMod <- AvgTemp ~ 1
mod <- vgm(model = "Exp", psill = 3, range = 100, nugget = 0.5)
variog <- variogram(formMod, statPoints[idxBool, ])
# 变异函数拟合
variogFitOLS<-fit.variogram(variog, model = mod, fit.method = 6)
# 预测
OK <- krige(formula = formMod ,
locations = statPoints[idxBool, ],
model = variogFitOLS,
newdata = statPoints[!idxBool, ],
debug.level = 0)
ok.pred.test <- OK@data$var1.pred
evalData[i,"OK"] <- sqrt(mean((ok.pred.test - obs.test)^2))
## RF ---------##
RF <- randomForest(y = climDataPT[idx, "AvgTemp"],
x = climDataPT[idx, c("Lat","Elev","distCoast")],
ntree = 500,
mtry = 2)
#测试数据集的随机森林
rf.pred.test <- predict(RF, newdata = climDataPT[-idx,], type="response")
evalData[i,"RF"] <- sqrt(mean((rf.pred.test - obs.test)^2))
# 对训练数据集的RF残差进行OK
#
statPointsTMP <- statPoints[idxBool, ]
statPointsTMP@data <- cbind(statPointsTMP@data, residRF = resid.RF(RF))
formMod <- residRF ~ 1
mod <- vgm(model = "Exp", psill = 0.6, range = 10, nugget = 0.01)
variog <- variogram(formMod, statPointsTMP)
# 最小二乘法拟合半变异函数
variogFitOLS<-fit.variogram(variog, model = mod, fit.method = 6)
# kriging predictions
RF.OK <- krige(formula = formMod ,
locations = statPointsTMP,
model = variogFitOLS,
newdata = statPoints[!idxBool, ],
debug.level = 0)
#RF趋势预测+残差克里金预测
rf.ok.pred.test <- rf.pred.test + RF.OK@data$var1.pred
evalData[i,"RF_OK"] <- sqrt(mean((rf.ok.pred.test - obs.test)^2))
## ----------------------------------------------------------------------------- ##
## GLM calibration ----
## ----------------------------------------------------------------------------- ##
GLM <- glm(formula = AvgTemp ~ Elev + Lat + distCoast, data = climDataPT[idx, ])
glm.pred.test <- predict(GLM, newdata = climDataPT[-idx,], type="response")
evalData[i,"GLM"] <- sqrt(mean((glm.pred.test - obs.test)^2))
# Ordinary Kriging of GLM residuals
#
statPointsTMP <- statPoints[idxBool, ]
statPointsTMP@data <- cbind(statPointsTMP@data, residGLM = resid(GLM))
formMod <- residGLM ~ 1
mod <- vgm(model = "Exp", psill = 0.4, range = 10, nugget = 0.01)
variog <- variogram(formMod, statPointsTMP)
# Variogram fitting by Ordinary Least Sqaure
variogFitOLS<-fit.variogram(variog, model = mod, fit.method = 6)
# kriging predictions
GLM.OK <- krige(formula = formMod ,
locations = statPointsTMP,
model = variogFitOLS,
newdata = statPoints[!idxBool, ],
debug.level = 0)
glm.ok.pred.test <- glm.pred.test + GLM.OK@data$var1.pred
evalData[i,"GLM_OK"] <- sqrt(mean((glm.ok.pred.test - obs.test)^2))
## ----------------------------------------------------------------------------- ##
## GAM calibration ----
## ----------------------------------------------------------------------------- ##
GAM <- gam(formula = AvgTemp ~ s(Elev) + s(Lat) + s(distCoast), data = climDataPT[idx, ])
gam.pred.test <- predict(GAM, newdata = climDataPT[-idx,], type="response")
evalData[i,"GAM"] <- sqrt(mean((gam.pred.test - obs.test)^2))
# Ordinary Kriging of GAM residuals
#
statPointsTMP <- statPoints[idxBool, ]
statPointsTMP@data <- cbind(statPointsTMP@data, residGAM = resid(GAM))
formMod <- residGAM ~ 1
mod <- vgm(model = "Exp", psill = 0.3, range = 10, nugget = 0.01)
variog <- variogram(formMod, statPointsTMP)
# Variogram fitting by Ordinary Least Sqaure
variogFitOLS<-fit.variogram(variog, model = mod, fit.method = 6)
#plot(variog, variogFitOLS, main="OLS Model")
# kriging predictions
GAM.OK <- krige(formula = formMod ,
locations = statPointsTMP,
model = variogFitOLS,
newdata = statPoints[!idxBool, ],
debug.level = 0)
gam.ok.pred.test <- gam.pred.test + GAM.OK@data$var1.pred
evalData[i,"GAM_OK"] <- sqrt(mean((gam.ok.pred.test - obs.test)^2))
}
## K-fold... 1 of 10 ....
## K-fold... 2 of 10 ....
## K-fold... 3 of 10 ....
## K-fold... 4 of 10 ....
## K-fold... 5 of 10 ....
## K-fold... 6 of 10 ....
## K-fold... 7 of 10 ....
## K-fold... 8 of 10 ....
## K-fold... 9 of 10 ....
## K-fold... 10 of 10 ....
查看七中种方法的精度
round(apply(evalData,2,FUN = function(x,...) c(mean(x,...),sd(x,...))),3)
## OK RF GLM GAM RF_OK GLM_OK GAM_OK
## [1,] 1.227 0.689 0.593 0.568 0.645 0.550 0.527
## [2,] 0.451 0.195 0.174 0.175 0.155 0.187 0.178
rmse结果表明,RK的结果要比单独的回归模型或OK模型的误差更小。基于GAM的RK方法的效果是最好的。
接下来,我们用GAM+OK对所有站点的数据进行插值得到气温表面图
GAM <- gam(formula = AvgTemp ~ s(Elev) + s(Lat) + s(distCoast), data = climDataPT)
#利用GAM模型生成表面趋势图
rstPredGAM <- predict(rst, GAM, type="response")为了生成表面图的参考,将栅格图层转为sp对象
rstPixDF <- as(rst[[1]], "SpatialPixelsDataFrame")
像之前一样,我们用对残差进行Kriging建模,并将结果加到回归结果中。
# 创建一个 SpatialPointsDF 对象用来存储GAM residuals
statPointsTMP <- statPoints
crs(statPointsTMP) <- crs(rstPixDF)
statPointsTMP@data <- cbind(statPointsTMP@data, residGAM = resid(GAM))# Define the kriging parameters and fit the variogram using OLS
formMod <- residGAM ~ 1
mod <- vgm(model = "Exp", psill = 0.15, range = 10, nugget = 0.01)
variog <- variogram(formMod, statPointsTMP)
variogFitOLS <- fit.variogram(variog, model = mod, fit.method = 6)
# Plot the results
plot(variog, variogFitOLS, main="Semi-variogram of GAM residuals")
residKrigMap <- krige(formula = formMod ,
locations = statPointsTMP,
model = variogFitOLS,
newdata = rstPixDF)## [using ordinary kriging]
residKrigRstLayer <- as(residKrigMap, "RasterLayer")
gamKrigMap <- rstPredGAM + residKrigRstLayer
plot(gamKrigMap, main="Annual average air temperature\n(GAM regression-kriging)",
xlab="Longitude", ylab="Latitude", cex.main=0.8, cex.axis=0.7, cex=0.8)
参考链接:
- https://www.r-exercises.com/2018/03/31/advanced-techniques-with-raster-data-part-3-regression-kriging/
- https://zhuanlan.zhihu.com/p/110268967
- https://zhuanlan.zhihu.com/p/53001283