[数据挖掘] 44 时间序列预测3 自相关与ARIMA模型 基于回归的预测 美国铁路(美铁)客运公司数据_哔哩哔哩_bilibili
from statsmodels.graphics import tsaplots
# 序列自相关模型绘图
tsaplots.plot_acf(train_df['1991-01-01':'1993-01-01'].Ridership,lags=23)
# plt.show()
# 趋势与季节性效应的残差序列自相关
formula = 'Ridership~trend + np.square(trend) + C(Month)'
ridership_lm_trendseason = smf.ols(formula=formula, data=train_df_season).fit()
ridership_lm_trendseason.predict(train_df_season)
residual2 = train_df_season['Ridership'] - ridership_lm_trendseason.predict(train_df_season)
print(residual2[-1]) #12.107558346749784
tsaplots.plot_acf(residual2,lags=(len(residual2[:14])-1))
plt.title('趋势与季节性效应模型的残差序列自相关')
# plt.show()
def singleGraphLayout(ax, ylim, train_ts, test_ts):
if test_ts is not None:
ax.set_xlim('1990', '2004-6')
ax.set_ylim(*ylim)
ax.set_xlabel('Time')
one_month = pd.Timedelta('31 days')
xtrain = (min(train_ts.index), max(train_ts.index) - one_month)
xtest = (min(test_ts.index) + one_month, max(test_ts.index) - one_month)
xtv = xtrain[1] + 0.5 * (xtest[0] - xtrain[1])
ypos = 0.9 * ylim[1] + 0.1 * ylim[0]
ax.add_line(plt.Line2D(xtrain, (ypos, ypos), color='black', linewidth=0.5))
ax.add_line(plt.Line2D(xtest, (ypos, ypos), color='black', linewidth=0.5))
ax.axvline(x=xtv, ymin=0, ymax=1, color='black', linewidth=0.5)
ypos = 0.925 * ylim[1] + 0.075 * ylim[0]
ax.text('1995', ypos, 'Training')
ax.text('2002-3', ypos, 'test')
else:
one_month = pd.Timedelta('31 days')
xtrain = (min(train_df.index), max(train_df.index) - one_month)
xtest = (min(test_df.index) + one_month, max(test_df.index) - one_month)
xtv = xtrain[1] + 0.5 * (xtest[0] - xtrain[1])
ax.set_xlim('1990', '2004-6')
ax.set_ylim(-250, 250)
ax.set_xlabel('Time')
one_month = pd.Timedelta('31 days')
xtrain = (min(train_ts.index), max(train_ts.index) - one_month)
ypos = 0.9 * 250 + 0.1 * (-250)
ax.add_line(plt.Line2D(xtrain, (ypos, ypos), color='black', linewidth=0.5))
ax.axvline(x=xtv, ymin=0, ymax=1, color='black', linewidth=0.5)
ypos = 0.925 * ylim[1] + 0.075 * ylim[0]
ax.text('1995', 0.9 * 250 - 12.5, 'Training')
ax.text('2002-3', 0.9 * 250 - 12.5, 'test')
def graphLayout(axes, train_ts, test_ts):
if test_ts is not None:
singleGraphLayout(axes[0], [1300, 2550], train_ts, test_ts)
singleGraphLayout(axes[1], [-550, 550], train_ts, test_ts)
test_ts.plot(y='Ridership', ax=axes[0], color='C0', linestyle='dashed', linewidth=0.75)
train_ts.plot(y='Ridership', ax=axes[0], color='C0', linewidth=0.75)
axes[1].axhline(y=0, xmin=0, xmax=1, color='black', linewidth=0.5)
axes[0].set_xlabel('')
axes[0].set_ylabel('Ridership (in 000s)')
axes[1].set_ylabel('Forecast Errors')
if axes[0].get_legend():
axes[0].get_legend().remove()
else:
singleGraphLayout(axes, [-550, 550], train_ts, test_ts=None)
axes.set_ylabel('Forecast Errors')
# 加入自相关信息提高预测准确性
formula = 'Ridership~trend + np.square(trend) + C(Month)'
train_lm_trendseason = smf.ols(formula=formula, data=train_df_season).fit()
train_res_arima = ARIMA(train_lm_trendseason.resid, order=(1, 0, 0), freq='MS', trend='n').fit()
forecast = train_res_arima.get_forecast(1) # 预测未来1个时间点
forecast_mean = forecast.predicted_mean[0] # 获取预测值的均值
conf_int = forecast.conf_int() # 获取预测值的置信区间
conf_int_low, conf_int_high = conf_int.iloc[0,0], conf_int.iloc[0,1] # 获取置信区间上下限
# print(forecast)
print(conf_int)
print(f'Forecast {forecast_mean:.3f} [{conf_int_low:.3f}, {conf_int_high:.3f}]')
print(pd.DataFrame({'coef': train_res_arima.params, 'std err': train_res_arima.bse}))
# print(train_res_arima.summary())
ridership_trendseason = ridership_df
ridership_trendseason['Month'] = ridership_trendseason.index.month
train_df_trendseason = ridership_trendseason[:nTrain]
valid_df_trendseason = ridership_trendseason[nTrain:]
# 提取训练集的残差序列
fitted_values = train_res_arima.fittedvalues # 拟合的值
pred_residuals = train_res_arima.resid # 预测的残差
actual_rresiduals = train_df_trendseason['Ridership'] - train_lm_trendseason.fittedvalues # 实际的残差
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(9, 3.725), dpi=200)
actual_rresiduals.plot(ax=axes, color='green')
pred_residuals.plot(ax=axes,color='purple')
graphLayout(axes, train_df_season, test_ts=None)
plt.show()
#预测残差之间的自相关性
tsaplots.plot_acf(pred_residuals,lags=(len(residual2[:14])-1))
plt.show()