向量自回归®
传统计量方法以经济理论为基础描述变量关系,但对于变量间的动态关系,经济理论很难给出较好的经济说明。Sim(1980)年提出向量自回归模型(VAR),VAR模型不以经济理论为基础,基于多方程联立模型,对每一个方程,内生变量对其他全部内生变量以及滞后值进行回归,进而估计得到全部内生变量的动态关系。
1 数据生成
rm(list = ls())
library(vars)
N = 1000
set.seed(1)
a0 = 0.2;a1 = 0.3;a2 = 0.5;a3 = -0.3
b0 = 0.1;b1 = 0.2;b2 = -0.4;b3 = -0.1
e1 = rnorm((N-2),0,2)
e2 = rnorm((N-2),0,1)
y1 = double(N)
y2 = double(N)
y1[1] = 10;y1[2] = 5
y2[1] = 6;y2[2] = 7
for(i in 3:N){
y1[i] =a0*y1[i-1]+a1*y1[i-2]+a2*y2[i-1]+a3*y2[i-2]+e1[i]
y2[i] =b0*y1[i-1]+b1*y1[i-2]+b2*y2[i-1]+b3*y2[i-2]+e2[i]
}
data = data.frame(y1 = y1[1:998] ,y2 = y2[1:998])
par(mfrow = c(2,1))
plot(data$y1)
plot(data$y2)
2 VAR估计
2.1 滞后阶数选择
options(digits = 4)
VARselect(data, lag.max = 5, type="const")
# $selection
# AIC(n) HQ(n) SC(n) FPE(n)
# 2 2 2 2
#
# $criteria
# 1 2 3 4 5
# AIC(n) 1.838 1.544 1.551 1.557 1.562
# HQ(n) 1.849 1.563 1.577 1.591 1.604
# SC(n) 1.867 1.594 1.620 1.646 1.671
# FPE(n) 6.282 4.684 4.717 4.745 4.770
各准则表明滞后2阶模型拟合最佳(数据生成过程也是选用的2阶),各内生变量滞后2阶使用极大似然估计
# VAR回归
fit = VAR(data, p = 2)
# 查看回归系数,与原始参数ab差异不大
coef(fit)
# $y1
# Estimate Std. Error t value Pr(>|t|)
# y1.l1 0.15221 0.03019 5.0420 5.475e-07
# y2.l1 0.46642 0.05632 8.2815 3.909e-16
# y1.l2 0.27895 0.02845 9.8064 9.908e-22
# y2.l2 -0.34428 0.05841 -5.8944 5.151e-09
# const -0.02022 0.06555 -0.3084 7.578e-01
#
# $y2
# Estimate Std. Error t value Pr(>|t|)
# y1.l1 0.09278 0.01519 6.1077 1.450e-09
# y2.l1 -0.39337 0.02834 -13.8807 3.662e-40
# y1.l2 0.21234 0.01431 14.8350 4.122e-45
# y2.l2 -0.05327 0.02939 -1.8126 7.020e-02
# const -0.01695 0.03299 -0.5137 6.076e-01
查看相关回归结果
summary(fit)
# VAR Estimation Results:
# =========================
# Endogenous variables: y1, y2
# Deterministic variables: const
# Sample size: 996
# Log Likelihood: -3585.432
# Roots of the characteristic polynomial:
# 0.639 0.639 0.579 0.246
# Call:
# VAR(y = data, p = 2)
#
#
# Estimation results for equation y1:
# ===================================
# y1 = y1.l1 + y2.l1 + y1.l2 + y2.l2 + const
#
# Estimate Std. Error t value Pr(>|t|)
# y1.l1 0.1522 0.0302 5.04 5.5e-07 ***
# y2.l1 0.4664 0.0563 8.28 3.9e-16 ***
# y1.l2 0.2790 0.0284 9.81 < 2e-16 ***
# y2.l2 -0.3443 0.0584 -5.89 5.2e-09 ***
# const -0.0202 0.0656 -0.31 0.76
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
#
# Residual standard error: 2.07 on 991 degrees of freedom
# Multiple R-Squared: 0.239, Adjusted R-squared: 0.236
# F-statistic: 77.9 on 4 and 991 DF, p-value: <2e-16
#
#
# Estimation results for equation y2:
# ===================================
# y2 = y1.l1 + y2.l1 + y1.l2 + y2.l2 + const
#
# Estimate Std. Error t value Pr(>|t|)
# y1.l1 0.0928 0.0152 6.11 1.4e-09 ***
# y2.l1 -0.3934 0.0283 -13.88 < 2e-16 ***
# y1.l2 0.2123 0.0143 14.83 < 2e-16 ***
# y2.l2 -0.0533 0.0294 -1.81 0.07 .
# const -0.0169 0.0330 -0.51 0.61
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
#
# Residual standard error: 1.04 on 991 degrees of freedom
# Multiple R-Squared: 0.287, Adjusted R-squared: 0.284
# F-statistic: 99.7 on 4 and 991 DF, p-value: <2e-16
#
# Covariance matrix of residuals:
# y1 y2
# y1 4.2793 -0.0104
# y2 -0.0104 1.0835
#
# Correlation matrix of residuals:
# y1 y2
# y1 1.00000 -0.00482
# y2 -0.00482 1.00000
2.2 相关检验
# 白噪音检验
serialtest <- serial.test(fit)
serialtest$serial
plot(serialtest)
# Portmanteau Test (asymptotic)
#
# data: Residuals of VAR object fit
# Chi-squared = 51, df = 56, p-value = 0.7
p=0.7>0.05表明不存在自相关问题;
# 系统平稳性检验,超出红线表明不平稳
fit.sta <- stability(fit, type = "OLS-CUSUM",
h = 2, dynamic = FALSE, rescale = TRUE)
plot(fit.sta)
# 单位根模长
roots(fit)
# 0.6391 0.6391 0.5787 0.2464
# 正态性检验JB检验,也包括峰度和偏度检验
norm.test <- normality.test(fit,multivariate.only = TRUE)
norm.test
# $JB
#
# JB-Test (multivariate)
#
# data: Residuals of VAR object fit
# Chi-squared = 0.2, df = 4, p-value = 1
#
#
# $Skewness
#
# Skewness only (multivariate)
#
# data: Residuals of VAR object fit
# Chi-squared = 0.18, df = 2, p-value = 0.9
#
#
# $Kurtosis
#
# Kurtosis only (multivariate)
#
# data: Residuals of VAR object fit
# Chi-squared = 0.016, df = 2, p-value = 1
# 正态性假设对VAR模型影响不大但对预测值的估计区间有影响
# 区间估计依赖正态假设
plot(norm.test)
# 格兰杰因果检验
grangertest(y1 ~ y2, order = 2, data = data)
grangertest(y2 ~ y1, order = 2, data = data)
# 也可以使用vars包自带的causality
# Granger causality test
#
# Model 1: y1 ~ Lags(y1, 1:2) + Lags(y2, 1:2)
# Model 2: y1 ~ Lags(y1, 1:2)
# Res.Df Df F Pr(>F)
# 1 991
# 2 993 -2 72.5 <2e-16 ***
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Granger causality test
#
# Model 1: y2 ~ Lags(y2, 1:2) + Lags(y1, 1:2)
# Model 2: y2 ~ Lags(y2, 1:2)
# Res.Df Df F Pr(>F)
# 1 991
# 2 993 -2 139 <2e-16 ***
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
3 IRF估计
# 脉冲响应分析
IRF = irf(fit,n.ahead = 10,ortho = TRUE, ci = 0.95)
plot(IRF)
自带图形排版僵硬,循环作图自定义排版
par(mfrow = c(2,2))
for(i in 1:2){
for(j in 1:2){
# 变量
p = as.data.frame(IRF$irf[[i]])[,j]
l = as.data.frame(IRF$Lower[[i]])[,j]
u = as.data.frame(IRF$Upper[[i]])[,j]
# 或者调用ggplot2这里略
plot(p,type = "l",lty = 1,lwd = 1,col = "green",
ylim = c(min(l)-0.1,max(u)+0.1),
main = paste(IRF$response[j],"from",IRF$impulse[i]),
ylab = "response",xlab = "Lag order")
lines(l,type = "l",lty = 1,lwd = 1,col = "red")
lines(u,type = "l",lty = 1,lwd = 1,col = "blue")
abline(h=0,col='blue',lwd = 1)
grid()
}
}
4 方差分解
# 各变量对预测方差的贡献度
fit.fevd <- fevd(fit, n.ahead = 10)
fit.fevd
plot(fit.fevd )
# $y1
# y1 y2
# [1,] 1.0000 0.00000
# [2,] 0.9489 0.05110
# [3,] 0.9137 0.08627
# [4,] 0.9028 0.09719
# [5,] 0.9001 0.09988
# [6,] 0.9008 0.09922
# [7,] 0.9006 0.09940
# [8,] 0.9003 0.09968
# [9,] 0.9002 0.09983
# [10,] 0.9001 0.09987
#
# $y2
# y1 y2
# [1,] 2.323e-05 1.0000
# [2,] 2.919e-02 0.9708
# [3,] 1.307e-01 0.8693
# [4,] 1.312e-01 0.8688
# [5,] 1.477e-01 0.8523
# [6,] 1.471e-01 0.8529
# [7,] 1.472e-01 0.8528
# [8,] 1.477e-01 0.8523
# [9,] 1.477e-01 0.8523
# [10,] 1.479e-01 0.8521
5 模型预测
fit.prd <- predict(fit, n.ahead = 20, ci = 0.95)
fit.prd$fcst
# $y1
# fcst lower upper CI
# [1,] -1.25546 -5.310 2.799 4.054
# [2,] -0.20962 -4.419 4.000 4.209
# [3,] -0.35981 -4.894 4.175 4.534
# [4,] -0.22593 -4.833 4.381 4.607
# [5,] -0.06079 -4.679 4.558 4.618
# [6,] -0.14528 -4.779 4.489 4.634
# [7,] -0.03710 -4.672 4.597 4.634
# [8,] -0.06823 -4.705 4.569 4.637
# [9,] -0.05039 -4.688 4.587 4.638
# [10,] -0.04000 -4.678 4.598 4.638
# [11,] -0.04870 -4.686 4.589 4.638
# [12,] -0.03807 -4.676 4.600 4.638
# [13,] -0.04261 -4.680 4.595 4.638
# [14,] -0.04024 -4.678 4.598 4.638
# [15,] -0.03983 -4.678 4.598 4.638
# [16,] -0.04061 -4.678 4.597 4.638
# [17,] -0.03958 -4.677 4.598 4.638
# [18,] -0.04015 -4.678 4.598 4.638
# [19,] -0.03984 -4.678 4.598 4.638
# [20,] -0.03987 -4.678 4.598 4.638
#
# $y2
# fcst lower upper CI
# [1,] -0.23397 -2.274 1.806 2.040
# [2,] -0.08153 -2.307 2.143 2.225
# [3,] -0.25844 -2.631 2.114 2.373
# [4,] 0.01117 -2.363 2.385 2.374
# [5,] -0.10494 -2.509 2.300 2.405
# [6,] -0.02988 -2.440 2.380 2.410
# [7,] -0.02599 -2.439 2.387 2.413
# [8,] -0.03942 -2.453 2.374 2.414
# [9,] -0.01426 -2.428 2.400 2.414
# [10,] -0.02840 -2.443 2.386 2.414
# [11,] -0.01943 -2.434 2.395 2.414
# [12,] -0.02080 -2.435 2.393 2.414
# [13,] -0.02160 -2.436 2.393 2.414
# [14,] -0.01938 -2.434 2.395 2.414
# [15,] -0.02095 -2.435 2.393 2.414
# [16,] -0.01991 -2.434 2.394 2.414
# [17,] -0.02022 -2.434 2.394 2.414
# [18,] -0.02022 -2.434 2.394 2.414
# [19,] -0.02004 -2.434 2.394 2.414
# [20,] -0.02021 -2.434 2.394 2.414
plot(fit.prd)
fanchart(fit.prd ,colors = 2,names = "y1")
fanchart(fit.prd ,colors = 3,names = "y2")