import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime,timedelta
from time import time
读取数据
cat_fish = pd.read_csv('./data/catfish.csv',parse_dates=[0],index_col=0,squeeze=True)
cat_fish.head()
Date
1986-01-01 9034
1986-02-01 9596
1986-03-01 10558
1986-04-01 9002
1986-05-01 9239
Name: Total, dtype: int64
ADF检验
检验序列的平稳性,0假设是序列非平稳
from statsmodels.tsa.stattools import adfuller
adf_result = adfuller(cat_fish,autolag='AIC')
print('p_value:',adf_result[1])
p_value: 0.48866351571884103
序列非平稳
确定性因素分解
x t = T t + S t + C t + I t x_t = T_t+S_t+C_t+I_t xt=Tt+St+Ct+It
from statsmodels.tsa.seasonal import seasonal_decompose
累加模型
decompose_model = seasonal_decompose(cat_fish,period = 12,
model = 'additive')
fig, axarr = plt.subplots(4, sharex=True)
fig.set_size_inches(5.5, 5.5)
cat_fish.plot(ax=axarr[0], color='b', linestyle='-')
axarr[0].set_title('origin')
pd.Series(data=decompose_model.trend, index=cat_fish.index).plot(color='r', linestyle='-', ax=axarr[1])
axarr[1].set_title('Trend component ')
pd.Series(data=decompose_model.seasonal, index=cat_fish.index).plot(color='g', linestyle='-', ax=axarr[2])
axarr[2].set_title('Seasonal component ')
pd.Series(data=decompose_model.resid, index=cat_fish.index).plot(color='k', linestyle='-', ax=axarr[3])
axarr[3].set_title('Irregular variations')
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=2.0)
plt.xticks(rotation=10)
plt.show()
上图中的origin是原始序列,Trend是趋势因素,seasonal是季节性波动因素,Irregular是随机因素
adf_result = adfuller(decompose_model.resid.dropna(),autolag='AIC')
print('p_value:',adf_result[1])
p_value: 4.789237508659252e-16
提取出 T t , S t T_t,S_t Tt,St因素后,残差序列变得平稳
累乘模型
decompose_model = seasonal_decompose(cat_fish,period = 12,
model = 'multiplicative')
fig, axarr = plt.subplots(4, sharex=True)
fig.set_size_inches(5.5, 5.5)
cat_fish.plot(ax=axarr[0], color='b', linestyle='-')
axarr[0].set_title('origin')
pd.Series(data=decompose_model.trend, index=cat_fish.index).plot(color='r', linestyle='-', ax=axarr[1])
axarr[1].set_title('Trend component ')
pd.Series(data=decompose_model.seasonal, index=cat_fish.index).plot(color='g', linestyle='-', ax=axarr[2])
axarr[2].set_title('Seasonal component ')
pd.Series(data=decompose_model.resid, index=cat_fish.index).plot(color='k', linestyle='-', ax=axarr[3])
axarr[3].set_title('Irregular variations')
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=2.0)
plt.xticks(rotation=10)
plt.show()
adf_result = adfuller(decompose_model.resid.dropna(),autolag='AIC')
print('p_value:',adf_result[1])
p_value: 8.424411975282345e-14
使用x_13_arima模型进行确定性分析
x13拓展软件下载地址:https://www.census.gov/srd/www/x13as/x13down_pc.html
from statsmodels.tsa.x13 import x13_arima_analysis
import os
# 进入解压后的软件目录
os.chdir(r'D:/Downloads/x13as/')
model = x13_arima_analysis(endog=cat_fish,trading=True)
model.plot()
plt.show()
指数平滑预测
from statsmodels.tsa.api import ExponentialSmoothing,SimpleExpSmoothing
# 生成测试数据
data = [446.6565, 454.4733, 455.663 , 423.6322, 456.2713, 440.5881, 425.3325, 485.1494, 506.0482, 526.792 , 514.2689, 494.211 ]
index= pd.date_range(start='1996', end='2008', freq='A')
oildata = pd.Series(data, index)
data = [17.5534, 21.86 , 23.8866, 26.9293, 26.8885, 28.8314, 30.0751, 30.9535, 30.1857, 31.5797, 32.5776, 33.4774, 39.0216, 41.3864, 41.5966]
index= pd.date_range(start='1990', end='2005', freq='A')
air = pd.Series(data, index)
data = [263.9177, 268.3072, 260.6626, 266.6394, 277.5158, 283.834 , 290.309 , 292.4742, 300.8307, 309.2867, 318.3311, 329.3724, 338.884 , 339.2441, 328.6006, 314.2554, 314.4597, 321.4138, 329.7893, 346.3852, 352.2979, 348.3705, 417.5629, 417.1236, 417.7495, 412.2339, 411.9468, 394.6971, 401.4993, 408.2705, 414.2428]
index= pd.date_range(start='1970', end='2001', freq='A')
livestock2 = pd.Series(data, index)
data = [407.9979 , 403.4608, 413.8249, 428.105 , 445.3387, 452.9942, 455.7402]
index= pd.date_range(start='2001', end='2008', freq='A')
livestock3 = pd.Series(data, index)
data = [41.7275, 24.0418, 32.3281, 37.3287, 46.2132, 29.3463, 36.4829, 42.9777, 48.9015, 31.1802, 37.7179, 40.4202, 51.2069, 31.8872, 40.9783, 43.7725, 55.5586, 33.8509, 42.0764, 45.6423, 59.7668, 35.1919, 44.3197, 47.9137]
index= pd.date_range(start='2005', end='2010-Q4', freq='QS-OCT')
aust = pd.Series(data, index)
简单指数平滑
(无趋势,无季节性)
model1 = SimpleExpSmoothing(oildata).fit(smoothing_level=5)
oil_pred = model1.forecast(4).rename(r'$\alpha=%s$'%model1.model.params['smoothing_level'])
plt.plot(oildata, marker='o', color='black',label = 'origin')
plt.plot(oil_pred, marker='o', color='blue',label = 'prediction')
plt.legend()
<matplotlib.legend.Legend at 0x18ee8c762e0>
Holt两参数指数平滑
有长期趋势,无季节性
from statsmodels.tsa.holtwinters import Holt
model2 = Holt(air).fit(smoothing_level=0.8,smoothing_slope = 0.2)
air_pred = model2.forecast(5)
plt.plot(air, marker='o', color='black',label = 'origin')
plt.plot(air_pred, marker='o', color='blue',label = 'prediction')
plt.legend()
<matplotlib.legend.Legend at 0x18ee735d1f0>
Holt-Winters三参数指数回归
有季节性的时间序列
model3 = ExponentialSmoothing(aust,seasonal_periods=4,trend = 'add',
seasonal='add').fit()
aust_pred = model3.forecast(8)
plt.plot(aust, marker='o', color='black',label = 'origin')
plt.plot(aust_pred, marker='o', color='blue',label = 'prediction')
plt.legend()
<matplotlib.legend.Legend at 0x18ee6b0d430>