# python二维随机游走_利用python进行时间序列分析——从随机游走到GARCH模型（二）...

Autoregressive Models - AR(p)

# Simulate an AR(1) process with alpha = 0.6

np.random.seed(1)

n_samples = int(1000)

a = 0.6

x = w = np.random.normal(size=n_samples)

for t in range(n_samples):

x[t] = a*x[t-1] + w[t]

tsplot(x, lags=30)

# Fit an AR(p) model to simulated AR(1) model with alpha = 0.6

mdl = smt.AR(x).fit(maxlag=30, ic='aic', trend='nc')

%time est_order = smt.AR(x).select_order(maxlag=30, ic='aic', trend='nc')

#ic : 有四个选择 {‘aic’,’bic’,’hqic’,’t-stat’}

#trend：是否包含常数项， ‘c’ - include constant. ‘nc’ - no constant.

true_order = 1

print('\nalpha estimate: {:3.5f} | best lag order = {}'.format(mdl.params[0], est_order))

print('\ntrue alpha = {} | true order = {}'.format(a, true_order))

Wall time: 19.8 s

alpha estimate: 0.58227 | best lag order = 1

true alpha = 0.6 | true order = 1

# Simulate an AR(2) process

n = int(1000)

alphas = np.array([.666, -.333])

betas = np.array([0.])

# Python requires us to specify the zero-lag value which is 1

# Also note that the alphas for the AR model must be negated

# We also set the betas for the MA equal to 0 for an AR(p) model

ar = np.r_[1, -alphas] #np.r_是按列连接两个矩阵，就是把两矩阵上下相加，要求列数相等，类似于pandas中的concat()

ma = np.r_[1, betas] #np.c_是按行连接两个矩阵，就是把两矩阵左右相加，要求行数相等，类似于pandas中的merge()

ar2 = smt.arma_generate_sample(ar=ar, ma=ma, nsample=n)

tsplot(ar2, lags=30)

# Fit an AR(p) model to simulated AR(2) process

max_lag = 10

mdl = smt.AR(ar2).fit(maxlag=max_lag, ic='aic', trend='nc')

est_order = smt.AR(ar2).select_order(maxlag=max_lag, ic='aic', trend='nc')

true_order = 2

print('\ncoef estimate: {:3.4f} {:3.4f} | best lag order = {}'.format(mdl.params[0],mdl.params[1], est_order))

print('\ntrue coefs = {} | true order = {}'.format([.666,-.333], true_order))

# coef estimate: 0.6291 -0.3196 | best lag order = 2

# true coefs = [0.666, -0.333] | true order = 2

coef estimate: 0.6760 -0.3393 | best lag order = 2

true coefs = [0.666, -0.333] | true order = 2

# Select best lag order for MSFT returns

max_lag = 30

mdl = smt.AR(lrets.MSFT).fit(maxlag=max_lag, ic='aic', trend='nc')

est_order = smt.AR(lrets.MSFT).select_order(maxlag=max_lag, ic='aic', trend='nc')

print('best estimated lag order = {}'.format(est_order))

best estimated lag order = 23

AR(p)模型的特征根及平稳性检验

，其中

AR(p)模型的定阶

Moving Average Models - MA(q)

MA(q)模型与AR(p)模型非常相似。不同之处在于，MA(q)模型是对过去的白噪声误差项的线性组合，而不是过去观测的线性组合。MA模型的动机是我们可以直接通过拟合误差项的模型来观察误差过程中的“冲击”。在一个AR(p)模型中，通过在一系列过去的观察中使用ACF间接观察到这些冲击。MA(q)模型的公式是：

，方差是

。让我们使用beta=0.6模拟这个过程。

# Simulate an MA(1) process

n = int(1000)

# set the AR(p) alphas equal to 0

alphas = np.array([0.])

betas = np.array([0.6])

# add zero-lag and negate alphas

ar = np.r_[1, -alphas]

ma = np.r_[1, betas]

ma1 = smt.arma_generate_sample(ar=ar, ma=ma, nsample=n)

tsplot(ma1, lags=30)

ACF函数表明滞后1是显着的，这表明MA(1)模型可能适合我们的模拟系列。但PACF在滞后2，3和4时的显着性不知道什么鬼，当ACF仅在滞后1时显示重要性。无论我们努力去拟合MA(1)模型模拟出来的数据。

# Fit the MA(1) model to our simulated time series

# Specify ARMA model with order (p, q)

max_lag = 30

mdl = smt.ARMA(ma1, order=(0, 1)).fit(maxlag=max_lag, method='mle', trend='nc')

print(mdl.summary())

ARMA Model Results

==============================================================================

Dep. Variable: y No. Observations: 1000

Model: ARMA(0, 1) Log Likelihood -1390.513

Method: mle S.D. of innovations 0.972

Date: Tue, 03 Apr 2018 AIC 2785.025

Time: 20:55:43 BIC 2794.841

Sample: 0 HQIC 2788.756

==============================================================================

coef std err z P>|z| [0.025 0.975]

------------------------------------------------------------------------------

ma.L1.y 0.5874 0.026 22.762 0.000 0.537 0.638

Roots

=============================================================================

Real Imaginary Modulus Frequency

-----------------------------------------------------------------------------

MA.1 -1.7024 +0.0000j 1.7024 0.5000

-----------------------------------------------------------------------------

# Simulate MA(3) process with betas 0.6, 0.4, 0.2

n = int(1000)

alphas = np.array([0.])

betas = np.array([0.6, 0.4, 0.2])

ar = np.r_[1, -alphas]

ma = np.r_[1, betas]

ma3 = smt.arma_generate_sample(ar=ar, ma=ma, nsample=n)

tsplot(ma3, lags=30)

# Fit MA(3) model to simulated time series

max_lag = 30

mdl = smt.ARMA(ma3, order=(0, 3)).fit(

maxlag=max_lag, method='mle', trend='nc')

print(mdl.summary())

ARMA Model Results

==============================================================================

Dep. Variable: y No. Observations: 1000

Model: ARMA(0, 3) Log Likelihood -1427.038

Method: mle S.D. of innovations 1.008

Date: Tue, 03 Apr 2018 AIC 2862.075

Time: 21:02:38 BIC 2881.706

Sample: 0 HQIC 2869.536

==============================================================================

coef std err z P>|z| [0.025 0.975]

------------------------------------------------------------------------------

ma.L1.y 0.6025 0.031 19.322 0.000 0.541 0.664

ma.L2.y 0.4060 0.034 11.806 0.000 0.339 0.473

ma.L3.y 0.1683 0.031 5.420 0.000 0.107 0.229

Roots

=============================================================================

Real Imaginary Modulus Frequency

-----------------------------------------------------------------------------

MA.1 -0.1714 -1.6856j 1.6943 -0.2661

MA.2 -0.1714 +1.6856j 1.6943 0.2661

MA.3 -2.0700 -0.0000j 2.0700 -0.5000

-----------------------------------------------------------------------------

# Fit MA(3) to SPY returns

max_lag = 30

Y = lrets.SPY

mdl = smt.ARMA(Y, order=(0, 3)).fit(

maxlag=max_lag, method='mle', trend='nc')

print(mdl.summary())

tsplot(mdl.resid, lags=max_lag)

ARMA Model Results

==============================================================================

Dep. Variable: SPY No. Observations: 2013

Model: ARMA(0, 3) Log Likelihood 5756.953

Method: mle S.D. of innovations 0.014

Date: Tue, 03 Apr 2018 AIC -11505.905

Time: 21:05:56 BIC -11483.476

Sample: 01-04-2007 HQIC -11497.672

- 12-31-2014

==============================================================================

coef std err z P>|z| [0.025 0.975]

------------------------------------------------------------------------------

ma.L1.SPY -0.0959 0.022 -4.314 0.000 -0.139 -0.052

ma.L2.SPY -0.0737 0.023 -3.256 0.001 -0.118 -0.029

ma.L3.SPY 0.0274 0.022 1.260 0.208 -0.015 0.070

Roots

=============================================================================

Real Imaginary Modulus Frequency

-----------------------------------------------------------------------------

MA.1 -2.8909 -0.0000j 2.8909 -0.5000

MA.2 2.7906 -2.2012j 3.5543 -0.1063

MA.3 2.7906 +2.2012j 3.5543 0.1063

-----------------------------------------------------------------------------

MA(q)模型：

,

Autoregressive Moving Average Models - ARMA(p, q)

1. AR(p)模型试图捕捉(解释)交易市场中经常观察到的动量和均值回复效应。

2. MA(q)模型尝试捕捉(解释)在白噪声条件下观察到的冲击效应。这些冲击效应可以被认为是影响观察过程的意外事件，例如，恐怖袭击

ARMA模型的弱点在于忽视了大多数金融时间序列中的波动聚集效应。

# Simulate an ARMA(2, 2) model with alphas=[0.5,-0.25] and betas=[0.5,-0.3]

max_lag = 30

n = int(5000) # lots of samples to help estimates

burn = int(n/10) # number of samples to discard before fit

alphas = np.array([0.5, -0.25])

betas = np.array([0.5, -0.3])

ar = np.r_[1, -alphas]

ma = np.r_[1, betas]

#burnin: to reduce the effect of initial conditions, burnin observations at the beginning of the sample are dropped

arma22 = smt.arma_generate_sample(ar=ar, ma=ma, nsample=n, burnin=burn)

_ = tsplot(arma22, lags=max_lag)

mdl = smt.ARMA(arma22, order=(2, 2)).fit(maxlag=max_lag, method='mle', trend='nc', burnin=burn)

print(mdl.summary())

ARMA Model Results

==============================================================================

Dep. Variable: y No. Observations: 5000

Model: ARMA(2, 2) Log Likelihood -7076.176

Method: mle S.D. of innovations 0.996

Date: Tue, 03 Apr 2018 AIC 14162.352

Time: 21:50:44 BIC 14194.938

Sample: 0 HQIC 14173.773

==============================================================================

coef std err z P>|z| [0.025 0.975]

------------------------------------------------------------------------------

ar.L1.y 0.4730 0.051 9.338 0.000 0.374 0.572

ar.L2.y -0.2645 0.015 -17.489 0.000 -0.294 -0.235

ma.L1.y 0.5224 0.052 10.089 0.000 0.421 0.624

ma.L2.y -0.2699 0.047 -5.684 0.000 -0.363 -0.177

Roots

=============================================================================

Real Imaginary Modulus Frequency

-----------------------------------------------------------------------------

AR.1 0.8943 -1.7267j 1.9446 -0.1739

AR.2 0.8943 +1.7267j 1.9446 0.1739

MA.1 -1.1867 +0.0000j 1.1867 0.5000

MA.2 3.1219 +0.0000j 3.1219 0.0000

-----------------------------------------------------------------------------

# Simulate an ARMA(3, 2) model with alphas=[0.5,-0.25,0.4] and betas=[0.5,-0.3]

max_lag = 30

n = int(5000)

burn = 2000

alphas = np.array([0.5, -0.25, 0.4])

betas = np.array([0.5, -0.3])

ar = np.r_[1, -alphas]

ma = np.r_[1, betas]

arma32 = smt.arma_generate_sample(ar=ar, ma=ma, nsample=n, burnin=burn)

_ = tsplot(arma32, lags=max_lag)

# pick best order by aic

# smallest aic value wins

best_aic = np.inf

best_order = None

best_mdl = None

rng = range(5)

for i in rng:

for j in rng:

try:

tmp_mdl = smt.ARMA(arma32, order=(i, j)).fit(method='mle', trend='nc')

tmp_aic = tmp_mdl.aic

if tmp_aic < best_aic:

best_aic = tmp_aic

best_order = (i, j)

best_mdl = tmp_mdl

except: continue

print('aic: {:6.5f} | order: {}'.format(best_aic, best_order))

print(best_mdl.summary())

tsplot(best_mdl.resid, lags=max_lag)

aic: 14149.61736 | order: (4, 4)

ARMA Model Results

==============================================================================

Dep. Variable: y No. Observations: 5000

Model: ARMA(4, 4) Log Likelihood -7065.809

Method: mle S.D. of innovations 0.994

Date: Tue, 03 Apr 2018 AIC 14149.617

Time: 22:13:45 BIC 14208.272

Sample: 0 HQIC 14170.175

==============================================================================

coef std err z P>|z| [0.025 0.975]

------------------------------------------------------------------------------

ar.L1.y -0.3127 0.138 -2.262 0.024 -0.584 -0.042

ar.L2.y 0.1931 0.075 2.588 0.010 0.047 0.339

ar.L3.y 0.0893 0.043 2.092 0.036 0.006 0.173

ar.L4.y 0.3522 0.055 6.421 0.000 0.245 0.460

ma.L1.y 1.3075 0.139 9.387 0.000 1.035 1.581

ma.L2.y 0.0407 0.091 0.449 0.654 -0.137 0.218

ma.L3.y -0.2056 0.083 -2.467 0.014 -0.369 -0.042

ma.L4.y 0.0849 0.040 2.136 0.033 0.007 0.163

Roots

=============================================================================

Real Imaginary Modulus Frequency

-----------------------------------------------------------------------------

AR.1 -1.1110 -0.0000j 1.1110 -0.5000

AR.2 1.2661 -0.0000j 1.2661 -0.0000

AR.3 -0.2043 -1.4059j 1.4206 -0.2730

AR.4 -0.2043 +1.4059j 1.4206 0.2730

MA.1 -1.1086 -0.0815j 1.1116 -0.4883

MA.2 -1.1086 +0.0815j 1.1116 0.4883

MA.3 2.3193 -2.0371j 3.0869 -0.1147

MA.4 2.3193 +2.0371j 3.0869 0.1147

-----------------------------------------------------------------------------

# Fit ARMA model to SPY returns

best_aic = np.inf

best_order = None

best_mdl = None

rng = range(5) # [0,1,2,3,4,5]

for i in rng:

for j in rng:

try:

tmp_mdl = smt.ARMA(lrets['SPY'], order=(i, j)).fit(

method='mle', trend='nc'

)

tmp_aic = tmp_mdl.aic

if tmp_aic < best_aic:

best_aic = tmp_aic

best_order = (i, j)

best_mdl = tmp_mdl

except: continue

print('aic: {:6.5f} | order: {}'.format(best_aic, best_order))

print(best_mdl.summary())

tsplot(best_mdl.resid, lags=30)

aic: -11519.12958 | order: (4, 3)

ARMA Model Results

==============================================================================

Dep. Variable: SPY No. Observations: 2013

Model: ARMA(4, 3) Log Likelihood 5767.565

Method: mle S.D. of innovations 0.014

Date: Wed, 04 Apr 2018 AIC -11519.130

Time: 11:07:29 BIC -11474.271

Sample: 01-04-2007 HQIC -11502.664

- 12-31-2014

==============================================================================

coef std err z P>|z| [0.025 0.975]

------------------------------------------------------------------------------

ar.L1.SPY -0.0572 0.152 -0.377 0.707 -0.355 0.240

ar.L2.SPY -0.4674 0.142 -3.288 0.001 -0.746 -0.189

ar.L3.SPY 0.6737 0.168 4.005 0.000 0.344 1.003

ar.L4.SPY 0.0213 0.036 0.588 0.557 -0.050 0.092

ma.L1.SPY -0.0381 0.148 -0.257 0.797 -0.329 0.253

ma.L2.SPY 0.4126 0.114 3.612 0.000 0.189 0.637

ma.L3.SPY -0.7205 0.142 -5.084 0.000 -0.998 -0.443

Roots

=============================================================================

Real Imaginary Modulus Frequency

-----------------------------------------------------------------------------

AR.1 -0.3724 -0.9396j 1.0107 -0.3101

AR.2 -0.3724 +0.9396j 1.0107 0.3101

AR.3 1.4228 -0.0000j 1.4228 -0.0000

AR.4 -32.3409 -0.0000j 32.3409 -0.5000

MA.1 -0.3758 -0.9523j 1.0238 -0.3098

MA.2 -0.3758 +0.9523j 1.0238 0.3098

MA.3 1.3242 -0.0000j 1.3242 -0.0000

-----------------------------------------------------------------------------

ACF和PACF没有显示出明显的自相关性。QQ和概率图显示残差大致为正态且厚尾。但是，这个模型的残差看起来不像白噪声！看模型没有捕捉到的明显的条件异方差(条件波动性)的突出区域。

AR和MA模型都是ARMA(p,q)的特殊形式。

ARMA建模步骤：(摘自Chen Li老师ppt)

Autoregressive Integrated Moving Average Models - ARIMA(p, d, q)

ARIMA是ARMA模型的延伸。如前所述，我们的许多时间序列不是平稳的，但是它们可以通过差分来实现平稳。我们看到了一个例，当我们对高斯随机游走进行一阶差分后证明它等于白噪声。换言之，我们通过一阶差分将非平稳机游走其转化为平稳的白噪声。

# Fit ARIMA(p, d, q) model to SPY Returns

# pick best order and final model based on aic

best_aic = np.inf

best_order = None

best_mdl = None

pq_rng = range(5) # [0,1,2,3,4]

d_rng = range(2) # [0,1]

for i in pq_rng:

for d in d_rng:

for j in pq_rng:

try:

tmp_mdl = smt.ARIMA(lrets.SPY, order=(i,d,j)).fit(method='mle', trend='nc')

tmp_aic = tmp_mdl.aic

if tmp_aic < best_aic:

best_aic = tmp_aic

best_order = (i, d, j)

best_mdl = tmp_mdl

except: continue

print('aic: {:6.5f} | order: {}'.format(best_aic, best_order))

# ARIMA model resid plot

print(best_mdl.summary())

_ = tsplot(best_mdl.resid, lags=30)

#aic: -11519.12958 | order: (4, 0, 3)

# Create a 21 day forecast of SPY returns with 95%, 99% CI

n_steps = 21

f, err95, ci95 = best_mdl.forecast(steps=n_steps) # 95% CI

_, err99, ci99 = best_mdl.forecast(steps=n_steps, alpha=0.01) # 99% CI

idx = pd.date_range(data.index[-1], periods=n_steps, freq='D')

fc_95 = pd.DataFrame(np.column_stack([f, ci95]),

index=idx, columns=['forecast', 'lower_ci_95', 'upper_ci_95'])

fc_99 = pd.DataFrame(np.column_stack([ci99]),

index=idx, columns=['lower_ci_99', 'upper_ci_99'])

fc_all = fc_95.combine_first(fc_99)

# Plot 21 day forecast for SPY returns

plt.style.use('bmh')

fig = plt.figure(figsize=(9,7))

ax = plt.gca()

ts = lrets.SPY.iloc[-500:].copy()

ts.plot(ax=ax, label='Spy Returns')

# in sample prediction

pred = best_mdl.predict(ts.index[0], ts.index[-1])

pred.plot(ax=ax, style='r-', label='In-sample prediction')

styles = ['b-', '0.2', '0.75', '0.2', '0.75']

fc_all.plot(ax=ax, style=styles)

plt.fill_between(fc_all.index, fc_all.lower_ci_95, fc_all.upper_ci_95, color='gray', alpha=0.7)

plt.fill_between(fc_all.index, fc_all.lower_ci_99, fc_all.upper_ci_99, color='gray', alpha=0.2)

plt.title('{} Day SPY Return Forecast\nARIMA{}'.format(n_steps, best_order))

plt.legend(loc='best', fontsize=10)

• 0
点赞
• 1
收藏
• 0
评论
06-27
11-29 746
11-18 5733
10-08 9130
12-23 9205
02-29 1万+

• 非常没帮助
• 没帮助
• 一般
• 有帮助
• 非常有帮助