模型介绍
https://blog.csdn.net/u012735708/article/details/82460962
实例介绍
给定某航班的乘客变化数据,使用ARIMA计算预测模型
代码
import pandas as pd
import numpy as np
from statsmodels.tsa.arima_model import ARIMA
import matplotlib as mpl
import matplotlib.pyplot as plt
import warnings
import matplotlib.patches as mpatches
from statsmodels.tools.sm_exceptions import HessianInversionWarning
def extend(a, b):
return 1.05*a-0.05*b, 1.05*b-0.05*a
def date_parser(date):
# 转换时间索引
return pd.datetime.strptime(date, '%Y-%m')
if __name__ == '__main__':
warnings.filterwarnings(action='ignore', category=HessianInversionWarning)
# 设置显示宽度
pd.set_option('display.width', 100)
# 全部打印,参数suppress表示是否用科学计数法来表示浮点数
np.set_printoptions(linewidth=100, suppress=True)
# 参数parse_dates为把某一列解析为时间索引;date_parser返回一个包含指定日的详细信息的关联数组
data = pd.read_csv('AirPassengers.csv', header=0, parse_dates=['Month'], date_parser=date_parser, index_col=['Month'])
# 对列索引重新命名
data.rename(columns={'#Passengers': 'Passengers'}, inplace=True)
print(data.dtypes)
mpl.rcParams['font.sans-serif'] = [u'SimHei']
mpl.rcParams['axes.unicode_minus'] = False
x = data['Passengers'].astype(np.float)
# 对数计算-默认以e为底
x = np.log(x)
print(x.head(10))
show = 'prime' # 'diff', 'ma', 'prime'
d = 1
diff = x - x.shift(periods=d)
ma = x.rolling(window=12).mean()
xma = x - ma
# 时间序列分析-ARIMA
p = 2
q = 2
model = ARIMA(endog=x, order=(p, d, q)) # 自回归函数p,差分d,移动平均数q
arima = model.fit(disp=-1) # disp<0:不输出过程
prediction = arima.fittedvalues
print(type(prediction))
y = prediction.cumsum() + x[0]
mse = ((x - y)**2).mean()
rmse = np.sqrt(mse)
plt.figure(facecolor='w')
if show == 'diff':
plt.plot(x, 'r-', lw=2, label=u'原始数据')
plt.plot(diff, 'g-', lw=2, label=u'%d阶差分' % d)
#plt.plot(prediction, 'r-', lw=2, label=u'预测数据')
title = u'乘客人数变化曲线 - 取对数'
elif show == 'ma':
#plt.plot(x, 'r-', lw=2, label=u'原始数据')
#plt.plot(ma, 'g-', lw=2, label=u'滑动平均数据')
plt.plot(xma, 'g-', lw=2, label=u'ln原始数据 - ln滑动平均数据')
plt.plot(prediction, 'r-', lw=2, label=u'预测数据')
title = u'滑动平均值与MA预测值'
else:
plt.plot(x, 'r-', lw=2, label=u'原始数据')
plt.plot(y, 'g-', lw=2, label=u'预测数据')
title = u'对数乘客人数与预测值(AR=%d, d=%d, MA=%d):RMSE=%.4f' % (p, d, q, rmse)
plt.legend(loc='upper left')
plt.grid(b=True, ls=':')
plt.title(title, fontsize=18)
plt.tight_layout(2)
plt.savefig('%s.png' % title)
plt.show()
实验结果
运行结果:
Passengers int64
dtype: object
Month
1949-01-01 4.718499
1949-02-01 4.770685
1949-03-01 4.882802
1949-04-01 4.859812
1949-05-01 4.795791
1949-06-01 4.905275
1949-07-01 4.997212
1949-08-01 4.997212
1949-09-01 4.912655
1949-10-01 4.779123
Name: Passengers, dtype: float64
show = 'prime’
show = 'ma’
show = 'diff’