已知名为数据最后的excel表里有对应的区县的16个指标的原始数据,数据集中在2-3180行中的3-18列,第12和16的指标为负向指标,将下面R代码进行更改,导入已有的数据进行计算,去掉PCA步骤。设定评价等级0-0.10 ,每0.1划分一个等级,一共十个,critic权重16个指标权重分别为0.056239409 0.0706845 0.041338941 0.053615617 0.028145041 0.098472751 0.073436629 0.071329194 0.065332485 0.067306697 0.037185717 0.13270281 0.015424533 0.02636872 0.113528673 0.048888285,根据论文中3.2节
算出评价值T。rm(list = ls())
library(e1071) # SVM模型
library(forecast) # 时间序列预测
library(ggplot2) # 数据可视化
library(matrixStats) # 矩阵计算
set.seed(0) # 设置随机种子
### 1. 生成模拟数据(13个城市,2003-2020年,12个指标)----
cities <- c("北京","天津","邯郸","邢台","石家庄","保定","衡水","沧州","廊坊","唐山","秦皇岛","张家口","承德")
numCities <- length(cities)
years <- 2003:2020
numYears <- length(years)
indicators <- c("G1","G2","G3","G4","G5","G7","G8","G9","G10","G12","G13","G14")
numIndicators <- length(indicators)
# 生成历史数据(正态分布)
data <- array(0, dim = c(numCities, numYears, numIndicators))
for (i in 1:numCities) {
for (j in 1:numIndicators) {
baseMean <- 50 + 30*runif(1)
trend <- (0.5 + 0.5*runif(1)) * seq(-1, 2, length.out = numYears)
baseStd <- 5 + 10*runif(1)
data[i, , j] <- baseMean + trend + rnorm(numYears, 0, baseStd)
}
}
# 生成历史WRCC值(0.3-0.7范围)
historicalWRCC <- matrix(0.3 + 0.4*runif(numCities*numYears),
nrow = numCities, ncol = numYears)
### 2. 准备训练数据 ----
X_train <- matrix(NA, nrow = numCities * numYears, ncol = numIndicators)
for (i in 1:numCities) {
for (y in 1:numYears) {
row_idx <- (i-1)*numYears + y
X_train[row_idx, ] <- data[i, y, ]
}
}
Y_train <- as.vector(historicalWRCC)
#################################################################
### 2.5 PCA指标筛选 ----
# 添加PCA分析筛选重要指标
pca_result <- prcomp(X_train, scale. = TRUE)
pca_summary <- summary(pca_result)
# 选择累计贡献率超过85%的主成分
cumulative_var <- cumsum(pca_summary$importance[2,])
n_components <- which(cumulative_var >= 0.85)[1]
# 获取主成分载荷矩阵
loadings <- pca_result$rotation[, 1:n_components]
# 计算每个原始变量的重要性得分(平均绝对载荷)
variable_importance <- rowMeans(abs(loadings))
# 选择重要性得分最高的8个指标
selected_indices <- order(variable_importance, decreasing = TRUE)[1:8]
# 更新训练数据和指标名称
X_train <- X_train[, selected_indices]
indicators <- indicators[selected_indices]
numIndicators <- length(indicators)
cat("筛选后的指标:", indicators, "\n")
#################################################################
### 3. GWO-SVM模型训练 ----
# 灰狼优化算法函数(修复比较运算符)
GWO <- function(SearchAgents, MaxIter, lb, ub, dim, X, Y) {
# 初始化alpha, beta, delta
alpha_pos <- numeric(dim)
alpha_score <- Inf
beta_pos <- numeric(dim)
beta_score <- Inf
delta_pos <- numeric(dim)
delta_score <- Inf
# 初始化灰狼位置(矩阵优化)
positions <- matrix(runif(SearchAgents*dim), ncol = dim) %*% diag(ub - lb) +
matrix(rep(lb, each = SearchAgents), ncol = dim)
for (iter in 1:MaxIter) {
a <- 2 - iter * (2 / MaxIter) # a线性递减
for (i in 1:SearchAgents) {
# 边界处理
positions[i, ] <- pmax(pmin(positions[i, ], ub), lb)
# 计算适应度
fitness <- SVMfitness(positions[i, ], X, Y)
# 修复比较运算符(使用实际的<符号)
if (fitness < alpha_score) {
delta_score <- beta_score
delta_pos <- beta_pos
beta_score <- alpha_score
beta_pos <- alpha_pos
alpha_score <- fitness
alpha_pos <- positions[i, ]
} else if (fitness < beta_score) {
delta_score <- beta_score
delta_pos <- beta_pos
beta_score <- fitness
beta_pos <- positions[i, ]
} else if (fitness < delta_score) {
delta_score <- fitness
delta_pos <- positions[i, ]
}
}
# 更新灰狼位置
for (i in 1:SearchAgents) {
for (d in 1:dim) {
r1 <- runif(1)
r2 <- runif(1)
A1 <- 2*a*r1 - a
C1 <- 2*r2
D_alpha <- abs(C1*alpha_pos[d] - positions[i, d])
X1 <- alpha_pos[d] - A1*D_alpha
r1 <- runif(1)
r2 <- runif(1)
A2 <- 2*a*r1 - a
C2 <- 2*r2
D_beta <- abs(C2*beta_pos[d] - positions[i, d])
X2 <- beta_pos[d] - A2*D_beta
r1 <- runif(1)
r2 <- runif(1)
A3 <- 2*a*r1 - a
C3 <- 2*r2
D_delta <- abs(C3*delta_pos[d] - positions[i, d])
X3 <- delta_pos[d] - A3*D_delta
positions[i, d] <- (X1 + X2 + X3) / 3
}
}
}
return(list(alpha_pos = alpha_pos, alpha_score = alpha_score))
}
# SVM适应度函数
SVMfitness <- function(params, X, Y) {
boxConstraint <- max(0.01, params[1])
kernelScale <- max(0.01, params[2])
tryCatch({
# 数据标准化
X_scaled <- scale(X)
# 使用5折交叉验证
folds <- sample(rep(1:5, length.out = nrow(X_scaled)))
mse <- numeric(5)
for (k in 1:5) {
train_idx <- which(folds != k)
test_idx <- which(folds == k)
gamma_val <- 1/(ncol(X_scaled) * kernelScale^2)
svm_model <- svm(x = X_scaled[train_idx, ], y = Y[train_idx],
type = "eps-regression", kernel = "radial",
cost = boxConstraint, gamma = gamma_val,
scale = FALSE) # 已手动缩放
pred <- predict(svm_model, X_scaled[test_idx, ])
mse[k] <- mean((pred - Y[test_idx])^2)
}
mean(mse)
}, error = function(e) {
warning("适应度计算错误:", e$message)
return(1e10)
})
}
# 执行GWO优化
SearchAgents <- 10
MaxIter <- 30
dim <- 2
lb <- c(0.1, 0.01)
ub <- c(100, 10)
gwo_result <- GWO(SearchAgents, MaxIter, lb, ub, dim, X_train, Y_train)
best_params <- gwo_result$alpha_pos
best_score <- gwo_result$alpha_score
# 确保返回两个参数
if (length(best_params) < 2) {
warning("GWO未能返回两个参数,使用默认值")
boxConstraint <- 10
kernelScale <- 1
} else {
boxConstraint <- best_params[1]
kernelScale <- best_params[2]
}
cat(sprintf("优化结果: BoxConstraint=%.4f, KernelScale=%.4f, MSE=%.6f\n",
boxConstraint, kernelScale, best_score))
# 训练最终SVM模型
gamma_val <- 1/(ncol(X_train) * kernelScale^2)
svmModel <- svm(x = X_train, y = Y_train,
type = "eps-regression", kernel = "radial",
cost = boxConstraint, gamma = gamma_val,
scale = TRUE)
# 验证模型有效性
train_pred <- predict(svmModel, X_train)
if (sd(train_pred) < 1e-5) {
stop("模型输出常数! 请检查参数优化过程")
}
cat(sprintf("训练集预测方差: %.6f\n", sd(train_pred)))
### 4. 蒙特卡洛模拟预测2025年指标 ----
numSamples <- 400
predictYear <- 2025
pred2025 <- matrix(0, nrow = numCities, ncol = numIndicators)
stdDev <- matrix(0, nrow = numCities, ncol = numIndicators)
X_pred <- matrix(0, nrow = numCities*numSamples, ncol = numIndicators)
# 修正的二次指数平滑函数
correctDoubleES <- function(data, alpha) {
n <- length(data)
S1 <- numeric(n)
S2 <- numeric(n)
S1[1] <- data[1]
S2[1] <- data[1]
for (t in 2:n) {
S1[t] <- alpha * data[t] + (1 - alpha) * S1[t-1]
S2[t] <- alpha * S1[t] + (1 - alpha) * S2[t-1]
}
# 正确预测公式
forecast_val <- S1[n] + (S1[n] - S2[n]) * alpha / (1 - alpha)
return(list(
forecast = forecast_val,
stdDev = sd(data) # 使用原始数据标准差
))
}
for (i in 1:numCities) {
for (j in 1:numIndicators) {
# 使用修正的平滑函数
res <- correctDoubleES(data[i, , j], alpha = 0.3)
pred2025[i, j] <- res$forecast
# 确保标准差有效
std_dev <- max(0.1, res$stdDev)
samples <- rnorm(numSamples, mean = res$forecast, sd = std_dev)
startIdx <- (i-1)*numSamples + 1
endIdx <- i*numSamples
X_pred[startIdx:endIdx, j] <- samples
}
}
# 添加异常值检查
if (any(is.na(X_pred)) || any(is.infinite(X_pred))) {
stop("蒙特卡洛模拟生成无效数据")
}
### 5. WRCC预测与概率统计 ----
Y_pred <- predict(svmModel, X_pred)
# 检查预测结果有效性
if (sd(Y_pred) < 1e-5) {
warning("警告:预测结果方差过小!")
}
results <- list()
threshold <- 0.5 # IV级阈值
for (i in 1:numCities) {
startIdx <- (i-1)*numSamples + 1
endIdx <- i*numSamples
cityPred <- Y_pred[startIdx:endIdx]
# 防止概率计算错误
if (length(cityPred) != numSamples) {
stop(paste("城市", i, "样本数量错误"))
}
results[[i]] <- list(
city = cities[i],
meanWRCC = mean(cityPred),
stdWRCC = sd(cityPred),
probGradeIV = sum(cityPred > threshold) / numSamples * 100,
predictions = cityPred
)
}
### 6. 结果可视化 ----
# 显示各城市概率结果
cat("\n===== 2025年WRCC预测结果 =====\n")
cat(sprintf("%-8s %-10s %-10s %s\n", "城市", "均值", "标准差", "达IV级概率(%)"))
for (i in 1:numCities) {
res <- results[[i]]
cat(sprintf("%-8s %.6f %.6f %.2f\n",
res$city, res$meanWRCC, res$stdWRCC, res$probGradeIV))
}
# 绘制北京市概率分布
beijingIdx <- which(sapply(results, function(x) x$city == "北京"))
if (length(beijingIdx) > 0) {
beijingData <- data.frame(WRCC = results[[beijingIdx]]$predictions)
p <- ggplot(beijingData, aes(x = WRCC)) +
geom_histogram(aes(y = ..density..), bins = 30, fill = "skyblue", alpha = 0.7) +
geom_density(color = "red", linewidth = 1) +
geom_vline(xintercept = threshold, color = "blue", linetype = "dashed", size = 1) +
labs(title = "北京市WRCC预测值概率分布",
x = "WRCC值", y = "概率密度") +
theme_minimal()
print(p)
}
# 绘制所有城市达IV级概率
probabilities <- sapply(results, function(x) x$probGradeIV)
cityNames <- sapply(results, function(x) x$city)
p <- ggplot(data.frame(City = factor(cityNames, levels = cityNames),
Prob = probabilities),
aes(x = City, y = Prob)) +
geom_bar(stat = "identity", fill = "steelblue", alpha = 0.8) +
geom_text(aes(label = sprintf("%.1f%%", Prob)), vjust = -0.5) +
labs(title = "各城市WRCC达IV级概率", y = "概率 (%)") +
ylim(0, 100) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10))
print(p)
最新发布