先把代码写上
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import statsmodels
from matplotlib.pylab import rcParams
from pylab import *
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.arima_model import ARIMA
def test_stationarity(timeseries):
# 决定起伏统计
rolmean=timeseries.rolling(window=12).mean()
rol_weighted_mean = timeseries.ewm(com=12).mean() # 对size个数据进行加权移动平均
rolstd = timeseries.rolling(window=12).std() # 偏离原始值多少
# 画出起伏统计
orig = plt.plot(timeseries, color='blue', label='Original')
mean = plt.plot(rolmean, color='red', label='Rolling Mean')
weighted_mean = plt.plot(rol_weighted_mean, color='green', label='weighted Mean')
std = plt.plot(rolstd, color='black', label='Rolling Std')
plt.legend(loc='best')
plt.title('Rolling Mean & Standard Deviation')
# plt.show()
# 进行df测试
print ('Result of Dickry-Fuller test')
dftest = adfuller(timeseries, autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of observations Used'])
for key, value in dftest[4].items():
dfoutput['Critical value(%s)' % key] = value
print (dfoutput)
def decompose(timeseries):
# 返回包含三个部分 trend(趋势部分) , seasonal(季节性部分) 和residual (残留部分)
decomposition = seasonal_decompose(timeseries)
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid
plt.subplot(411)
plt.plot(ts_log, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal, label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='best')
plt.tight_layout()
return trend, seasonal, residual
if __name__=="__main__":
rcParams['figure.figsize'] = 20, 10
mpl.rcParams['font.sans-serif'] = ['SimHei']
os.chdir(u'D:\exercise')
data=pd.read_excel("weather data.xlsx",usecols=[0,1])
data["Date"]=pd.to_datetime(data["Date"],format="%d/%m/%Y")
data["Date"] = data["Date"].apply(lambda x: str(x)[0:7])
data["Date"] = pd.to_datetime(data["Date"], format="%Y-%m")
data2=data.groupby(data["Date"]).mean()
data2.columns=["AVG"]
# data2=pd.DataFrame()
# data2["Date"]=data["Date"]
# data2["AVG"]=mean["Avg"]
# data.columns=["DATE","AVG"]
# data2.set_index(["Date"])
# print(data2.index)
# print (data2.head())
# plt.subplot(121)
# plt.legend(loc='best')
# plt.plot(data2)
# plt.title("气温趋势")
# plt.show()
# plt.plot(data2)
# plt.show()
# test_stationarity(data2)
# plt.show()
ts_log = np.log(data2["AVG"])
# print(ts_log)
# plt.subplot(121)
# moving_avg = ts_log.rolling(window=12).mean()
# ts_log_moving_avg_diff = ts_log - moving_avg
# ts_log_moving_avg_diff.dropna(inplace=True)
# test_stationarity(ts_log_moving_avg_diff)
# plt.plot(ts_log, color='blue')
# plt.plot(moving_avg, color='red')
# plt.show()
# ts=data2["Avg"]
# print (ts)
# print(data2.head())
# plt.subplot(122)
# expweighted_avg = ts_log.ewm(halflife=12).mean()
# ts_log_ewma_diff = ts_log - expweighted_avg
# test_stationarity(ts_log_ewma_diff)
# plt.show()
ts_log_diff = ts_log - ts_log.shift()
ts_log_diff.dropna(inplace=True)
# test_stationarity(ts_log_diff)
# plt.show()
# 消除了trend 和seasonal之后,只对residual部分作为想要的时序数据进行处理
# trend, seasonal, residual = decompose(ts_log)
# residual.dropna(inplace=True)
# test_stationarity(residual)
# plt.show()
lag_acf = acf(ts_log_diff, nlags=20)
lag_pacf = pacf(ts_log_diff, nlags=20, method='ols')
# Plot ACF:
plt.subplot(121)
plt.plot(lag_acf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
plt.title('Autocorrelation Function')
# Plot PACF:
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
plt.title('Partial Autocorrelation Function')
plt.tight_layout()
plt.show()
model = ARIMA(ts_log, order=(2, 1, 0))
results_AR = model.fit(disp=-1)
plt.subplot(311)
plt.plot(ts_log_diff)
plt.plot(results_AR.fittedvalues, color='red')
plt.title('RSS: %.4f' % sum((results_AR.fittedvalues - ts_log_diff) ** 2))
model = ARIMA(ts_log, order=(0, 1, 2))
results_MA = model.fit(disp=-1)
plt.subplot(312)
plt.plot(ts_log_diff)
plt.plot(results_MA.fittedvalues, color='red')
plt.title('RSS: %.4f' % sum((results_MA.fittedvalues - ts_log_diff) ** 2))
model = ARIMA(ts_log, order=(2, 1, 2))
results_ARIMA = model.fit(disp=-1)
plt.subplot(313)
plt.plot(ts_log_diff)
plt.plot(results_ARIMA.fittedvalues, color='red')
plt.title('RSS: %.4f' % sum((results_ARIMA.fittedvalues - ts_log_diff) ** 2))
plt.show()
最终结果展示