看书标记【R语言 商务数据分析实战5】

看书标记——关于R语言


【R语言 商务数据分析实战5】


chapter 5

金融服务机构资金流量预测
数据理解和预处理>>检验平稳性+纯随机性>>得到模型阶数建立ARIMA模型

5.2 任务实现

检验数的平稳性

# 设置工作目录并读取数据
setwd()
# 快速读取数据
dataFile <- data.frame(data.table::fread('./data/user_balance_table.csv'))
# 日期转化为标准时间格式
dataFile$report_date_new <- as.Date(as.character(dataFile$report_date), '%Y%m%d')                     
# 创建一个新的数据集,对相同日期的资金申购量进行统计
library(plyr)
data.purchase <- ddply(dataFile, .(report_date_new), summarize, 
                       purchase = sum(total_purchase_amt))

# 绘制时序图
purchase <- ts(data.purchase$purchase, start = c(2013, 7), 
               end = c(2014, 8), frequency = 365)
plot(purchase, type="l", ylab = "inflows", xlab = "date", xaxt = "n")
acf(purchase, lag.max = 30)  # 数据自相关检验

# 处理非平稳序列
# 筛选2014-03-01至2014-07-31的数据作为训练集,2014—08数据作为测试集
index <- which(data.purchase$report_date_new >= as.Date("2014-03-01") & 
                 data.purchase$report_date_new <= as.Date("2014-07-31"))
halcyon.purchase <- ts(data.purchase$purchase[index])
# 绘制时序图
plot(halcyon.purchase, ylab = "inflows", xlab = "date", xaxt = "n")
acf(halcyon.purchase, lag.max = 30)  # 自相关性检验

# 差分
purchase.diff <- diff(halcyon.purchase, lag = 7, differences = 1)
# 绘制时序图
plot(purchase.diff, ylab = 'diff_inflows', xlab = 'date', xaxt = "n")
acf(purchase.diff, lag.max = 30)  # 自相关性检验

# 单位根检验
library(tseries)
adf.test(purchase.diff)

检验数据的纯随机性

Box.test(purchase.diff, lag = 1, type = "Ljung-Box")  # 纯随机性

模型定阶

# 模型定阶
library(TSA)
res1 <- armasubsets(y = halcyon.purchase, nar = 5, nma = 5)  # 原始序列BIC定阶
plot(res1)
res2 <- armasubsets(y = purchase.diff, nar = 5, nma = 5)  # 周期性差分序列BIC定阶
plot(res2)

模型残差检验

# 模型残差检验
library(forecast)
for(p in c(0, 1, 3)){
  for(q in c(3, 4)){
    for (P in c(0, 1, 3)) {
      arima.model <- Arima(halcyon.purchase, order = c(p, 0, q),  
                           seasonal = list(order = c(P, 1, 1), period = 7))
      box.test.result <- Box.test(arima.model$residuals, lag = 1, 
                                  type = "Ljung-Box") 
      print(paste('p =', p, '; q =', q , '; P =', P,'; 残差P值:', 
                  round(box.test.result$p.value, 4), collapse = ""))
    }
  }
}

模型评估

# 模型评估
for(p in c(0, 1, 3)){
  for(q in c(3, 4)){
    for (P in c(0, 1, 3)) {
      arima.model <- Arima(halcyon.purchase, order = c(p, 0, q), 
                           seasonal = list(order = c(P, 1, 1), period = 7))
      forecast.data <- forecast(arima.model, h = 31)
      pred <- forecast.data$mean
      index <- which(data.purchase$report_date_new >= as.Date("2014-08-1"))
      target <-  ts(data.purchase$purchase[index])
      result <- data.frame(target,pred)
      result$target <- as.numeric(result$target)
      result$pred <- as.numeric(result$pred)
      result$error <- abs(result$target - result$pred) / result$target
      result$score <- 5 * cos(10 / 3 * pi * result$error) + 5
      result$score[which(result$error >= 0.3)] <- 0
      print(paste('p =', p, '; q =', q , '; P =', P,';误差:', 
                  round(mean(result$error), 4),'; 得分:', 
                  round(mean(result$score), 4), collapse = ""))
    }
  }
} 

拟合相对最优模型

# 拟合相对最优ARIMA模型
arima.model <- Arima(halcyon.purchase, order = c(0, 0, 3), 
                     seasonal = list(order = c(1, 1, 1), period = 7))
forecast.data <- forecast(arima.model, h = 31)
pred <- forecast.data$mean
index <- which(data.purchase$report_date_new >= as.Date("2014-08-1"))
target <-  ts(data.purchase$purchase[index])
result <- data.frame(target, pred)
result$target <- as.numeric(result$target)
result$pred <- as.numeric(result$pred)
result$error <- abs(result$target - result$pred) / result$target
result$score <- 5 * cos(10 / 3 * pi * result$error) + 5
result$score[which(result$error >= 0.3)] <- 0
head(result)

在R中好像有一个可以自动定阶的函数auto.arima 函数包,依据的是AIC等模型评价指标。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值