时间序列



import pandas as pd, numpy as np

import matplotlib.pyplot as plt

from statsmodels.graphics.tsaplots import plot_pacf, plot_acf

from matplotlib.ticker import MaxNLocator

def acf_pacf(series, lags=40, title=None, figsize=(10,6)):

    #ACF and PACF plots

    # 求自相关和偏自相关函数

    # series 输入的时间序列

    # lags 自相关和偏自相关函数的滞后取值范围

    fig = plt.figure(figsize=figsize)

    ax1 = fig.add_subplot(211)

    ax1.set_xlabel('lags')

    ax1.set_ylabel('Autocorrelation')

    # x坐标为整数

    ax1.xaxis.set_major_locator(MaxNLocator(integer=True))

    plot_acf(series.dropna(), lags=lags, ax=ax1, title='ACF of %s'%title)

    ax2 = fig.add_subplot(212)

    ax2.set_xlabel('lags')

    ax2.set_ylabel('Partial Autocorrelation')

    ax2.xaxis.set_major_locator(MaxNLocator(integer=True))

    plot_pacf(series.dropna(), lags=lags, ax=ax2, title='PACF of %s'%title)

    fig.tight_layout()

    

def test_stationarity(timeseries, window=10):

    # 求移动平均和移动标准差

    # window为选取的时间窗口个数

    rolmean = timeseries.rolling(window=window, center=False).mean()

    rolstd = timeseries.rolling(window=window, center=False).std()

 

    # 将以周重取样后的时间序列、移动平均和移动标准差制成折线图

#    orig = plt.plot(timeseries, color='blue', label='Original')

#    mean = plt.plot(rolmean, color='red', label='Rolling Mean')

#    std = plt.plot(rolstd, color='black', label='Rolling Std')

#    plt.legend(loc='best')

#    plt.title('Rolling Mean & Standard Deviation')

#    plt.show(block=False)

 

    # Augmented Dickey-Fuller检验测试时间序列稳定性:

    print('Results of Augmented Dickey-Fuller Test:')

    # 使用减小AIC的办法估算ADF测试所需的滞后数

    dftest = adfuller(timeseries, autolag='AIC')

    # ADF测试结果、显著性概率、所用的滞后数和所用的观测数打印出来

    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value',

                                             'Num Lags Used',

                                             'Num Observations Used'])

    for key, value in dftest[4].items():

        dfoutput['Critical Value (%s)' % key] = value

#    print(dfoutput)    

    

from statsmodels.tsa.seasonal import seasonal_decompose

from statsmodels.stats.diagnostic import acorr_ljungbox

def decompose_plot(series, title=''):

    decomposition = seasonal_decompose(series)

    trend = decomposition.trend

    seasonal = decomposition.seasonal

    residual = decomposition.resid

    fig = decomposition.plot()

    fig.set_size_inches(10,6)

    fig.suptitle(title)

    fig.tight_layout()

    fig2 = acf_pacf(residual, title='Residuals', figsize=(10,4))

 

# 读取CSV

df = pd.read_csv('D:/jingjihu/portland-oregon-average-monthly-.csv')

#print(df.head(), '\nindex type:\n', type(df.index))

 

# '%Y-%m'格式转换CSV的日期, 为了方便处理, 将时间段转为timestamp

df['Month'] = pd.to_datetime(df['Month'], format='%Y-%m')

 

# 索引并resample为月

indexed_df = df.set_index('Month')

ts = indexed_df['riders']

ts = ts.resample('M').sum()

print(ts.head(), '\nindex type:\n', type(ts.index))

 

ts.plot(title='Monthly Num. of Ridership')

test_stationarity(ts)

decomposition = seasonal_decompose(ts)  

fig = plt.figure()  

fig = decomposition.plot()  

fig.set_size_inches(10, 6)

acf_pacf(decomposition.resid, title='Residuals')

ts_sdiff = ts - ts.shift(12)

print(ts_sdiff.shape)

test_stationarity(ts_sdiff.dropna())

 

decomposition = seasonal_decompose(ts_sdiff.dropna(), freq=12)  

fig = plt.figure()  

fig = decomposition.plot()  

fig.set_size_inches(10, 6)

acf_pacf(decomposition.resid, title='Residuals')

ts_diff_of_sdiff = ts_sdiff - ts_sdiff.shift(1)

test_stationarity(ts_diff_of_sdiff.dropna())

decomposition = seasonal_decompose(ts_diff_of_sdiff.dropna(), freq=12)  

fig = plt.figure()  

fig = decomposition.plot()  

fig.set_size_inches(10, 6)

acf_pacf(decomposition.resid, title='Residuals')

ts_log = np.log(ts)

ts_log_sdiff = ts_log - ts_log.shift(12)

ts_diff_of_log_sdiff = ts_log_sdiff - ts_log_sdiff.shift(1)

test_stationarity(ts_diff_of_log_sdiff.dropna())

decomposition = seasonal_decompose(ts_diff_of_log_sdiff.dropna(), freq=12)  

fig = plt.figure()  

fig = decomposition.plot()  

fig.set_size_inches(10, 6)

acf_pacf(decomposition.resid, title='Residuals')

from statsmodels.tsa.statespace.sarimax import SARIMAX

# SARIMA的参数: order = p, d, q, seasonal_order=P, D, Q, season的周期=12

model = SARIMAX(ts, order=(0,1,0),seasonal_order=(0,1,1,12))

# 已拟合的模型

fitted = model.fit()  

# ARIMA的结果

print(fitted.summary())

 

 比较拟合的数据和原始数据,

#ts.plot()

fitted.fittedvalues.plot(color='red')

 

# 拟合的数据和原始数据的残差

residuals = pd.DataFrame(fitted.resid)a

 

acf_pacf(residuals, title='Residuals', figsize=(10,4))

 

# 计算拟残差平方和(RSS)

plt.title('RSS: %.4f'% sum((fitted.fittedvalues-ts)**2))

 

# 残差的核密度估计

residuals.plot(kind='kde')

print('\n\nResiduals Summary:\n', residuals.describe())

#

plt.show()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值