【R语言 商务数据分析实战5】
chapter 5
金融服务机构资金流量预测
数据理解和预处理>>检验平稳性+纯随机性>>得到模型阶数建立ARIMA模型
5.2 任务实现
检验数的平稳性
# 设置工作目录并读取数据
setwd()
# 快速读取数据
dataFile <- data.frame(data.table::fread('./data/user_balance_table.csv'))
# 日期转化为标准时间格式
dataFile$report_date_new <- as.Date(as.character(dataFile$report_date), '%Y%m%d')
# 创建一个新的数据集,对相同日期的资金申购量进行统计
library(plyr)
data.purchase <- ddply(dataFile, .(report_date_new), summarize,
purchase = sum(total_purchase_amt))
# 绘制时序图
purchase <- ts(data.purchase$purchase, start = c(2013, 7),
end = c(2014, 8), frequency = 365)
plot(purchase, type="l", ylab = "inflows", xlab = "date", xaxt = "n")
acf(purchase, lag.max = 30) # 数据自相关检验
# 处理非平稳序列
# 筛选2014-03-01至2014-07-31的数据作为训练集,2014—08数据作为测试集
index <- which(data.purchase$report_date_new >= as.Date("2014-03-01") &
data.purchase$report_date_new <= as.Date("2014-07-31"))
halcyon.purchase <- ts(data.purchase$purchase[index])
# 绘制时序图
plot(halcyon.purchase, ylab = "inflows", xlab = "date", xaxt = "n")
acf(halcyon.purchase, lag.max = 30) # 自相关性检验
# 差分
purchase.diff <- diff(halcyon.purchase, lag = 7, differences = 1)
# 绘制时序图
plot(purchase.diff, ylab = 'diff_inflows', xlab = 'date', xaxt = "n")
acf(purchase.diff, lag.max = 30) # 自相关性检验
# 单位根检验
library(tseries)
adf.test(purchase.diff)
检验数据的纯随机性
Box.test(purchase.diff, lag = 1, type = "Ljung-Box") # 纯随机性
模型定阶
# 模型定阶
library(TSA)
res1 <- armasubsets(y = halcyon.purchase, nar = 5, nma = 5) # 原始序列BIC定阶
plot(res1)
res2 <- armasubsets(y = purchase.diff, nar = 5, nma = 5) # 周期性差分序列BIC定阶
plot(res2)
模型残差检验
# 模型残差检验
library(forecast)
for(p in c(0, 1, 3)){
for(q in c(3, 4)){
for (P in c(0, 1, 3)) {
arima.model <- Arima(halcyon.purchase, order = c(p, 0, q),
seasonal = list(order = c(P, 1, 1), period = 7))
box.test.result <- Box.test(arima.model$residuals, lag = 1,
type = "Ljung-Box")
print(paste('p =', p, '; q =', q , '; P =', P,'; 残差P值:',
round(box.test.result$p.value, 4), collapse = ""))
}
}
}
模型评估
# 模型评估
for(p in c(0, 1, 3)){
for(q in c(3, 4)){
for (P in c(0, 1, 3)) {
arima.model <- Arima(halcyon.purchase, order = c(p, 0, q),
seasonal = list(order = c(P, 1, 1), period = 7))
forecast.data <- forecast(arima.model, h = 31)
pred <- forecast.data$mean
index <- which(data.purchase$report_date_new >= as.Date("2014-08-1"))
target <- ts(data.purchase$purchase[index])
result <- data.frame(target,pred)
result$target <- as.numeric(result$target)
result$pred <- as.numeric(result$pred)
result$error <- abs(result$target - result$pred) / result$target
result$score <- 5 * cos(10 / 3 * pi * result$error) + 5
result$score[which(result$error >= 0.3)] <- 0
print(paste('p =', p, '; q =', q , '; P =', P,';误差:',
round(mean(result$error), 4),'; 得分:',
round(mean(result$score), 4), collapse = ""))
}
}
}
拟合相对最优模型
# 拟合相对最优ARIMA模型
arima.model <- Arima(halcyon.purchase, order = c(0, 0, 3),
seasonal = list(order = c(1, 1, 1), period = 7))
forecast.data <- forecast(arima.model, h = 31)
pred <- forecast.data$mean
index <- which(data.purchase$report_date_new >= as.Date("2014-08-1"))
target <- ts(data.purchase$purchase[index])
result <- data.frame(target, pred)
result$target <- as.numeric(result$target)
result$pred <- as.numeric(result$pred)
result$error <- abs(result$target - result$pred) / result$target
result$score <- 5 * cos(10 / 3 * pi * result$error) + 5
result$score[which(result$error >= 0.3)] <- 0
head(result)
在R中好像有一个可以自动定阶的函数auto.arima 函数包,依据的是AIC等模型评价指标。