时间序列实现-上海宝山气温分析

先把代码写上

#!/usr/bin/python3
# -*- coding: utf-8 -*-
import  pandas as pd
import  numpy as np
import  matplotlib.pyplot as plt
import  os
import  statsmodels
from matplotlib.pylab import rcParams
from pylab import *
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.arima_model import ARIMA

def test_stationarity(timeseries):
    # 决定起伏统计
    rolmean=timeseries.rolling(window=12).mean()
    rol_weighted_mean = timeseries.ewm(com=12).mean()  # 对size个数据进行加权移动平均
    rolstd = timeseries.rolling(window=12).std()  # 偏离原始值多少
    # 画出起伏统计
    orig = plt.plot(timeseries, color='blue', label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    weighted_mean = plt.plot(rol_weighted_mean, color='green', label='weighted Mean')
    std = plt.plot(rolstd, color='black', label='Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    # plt.show()
    # 进行df测试
    print ('Result of Dickry-Fuller test')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of observations Used'])
    for key, value in dftest[4].items():
        dfoutput['Critical value(%s)' % key] = value
    print (dfoutput)


def decompose(timeseries):
    # 返回包含三个部分 trend(趋势部分) , seasonal(季节性部分) 和residual (残留部分)
    decomposition = seasonal_decompose(timeseries)

    trend = decomposition.trend
    seasonal = decomposition.seasonal
    residual = decomposition.resid

    plt.subplot(411)
    plt.plot(ts_log, label='Original')
    plt.legend(loc='best')
    plt.subplot(412)
    plt.plot(trend, label='Trend')
    plt.legend(loc='best')
    plt.subplot(413)
    plt.plot(seasonal, label='Seasonality')
    plt.legend(loc='best')
    plt.subplot(414)
    plt.plot(residual, label='Residuals')
    plt.legend(loc='best')
    plt.tight_layout()

    return trend, seasonal, residual
if __name__=="__main__":
    rcParams['figure.figsize'] = 20, 10
    mpl.rcParams['font.sans-serif'] = ['SimHei']
    os.chdir(u'D:\exercise')

    data=pd.read_excel("weather data.xlsx",usecols=[0,1])
    data["Date"]=pd.to_datetime(data["Date"],format="%d/%m/%Y")
    data["Date"] = data["Date"].apply(lambda x: str(x)[0:7])
    data["Date"] = pd.to_datetime(data["Date"], format="%Y-%m")
    data2=data.groupby(data["Date"]).mean()
    data2.columns=["AVG"]
    # data2=pd.DataFrame()
    # data2["Date"]=data["Date"]
    # data2["AVG"]=mean["Avg"]
    # data.columns=["DATE","AVG"]

    # data2.set_index(["Date"])
    # print(data2.index)
    # print (data2.head())
    # plt.subplot(121)
    # plt.legend(loc='best')
    # plt.plot(data2)
    # plt.title("气温趋势")
    # plt.show()


    # plt.plot(data2)
    # plt.show()
    # test_stationarity(data2)
    # plt.show()

    ts_log = np.log(data2["AVG"])
    # print(ts_log)
    # plt.subplot(121)
    # moving_avg = ts_log.rolling(window=12).mean()
    # ts_log_moving_avg_diff = ts_log - moving_avg
    # ts_log_moving_avg_diff.dropna(inplace=True)
    # test_stationarity(ts_log_moving_avg_diff)
    # plt.plot(ts_log, color='blue')
    # plt.plot(moving_avg, color='red')
    # plt.show()
    # ts=data2["Avg"]
    # print (ts)
    # print(data2.head())
    # plt.subplot(122)
    # expweighted_avg = ts_log.ewm(halflife=12).mean()
    # ts_log_ewma_diff = ts_log - expweighted_avg
    # test_stationarity(ts_log_ewma_diff)
    # plt.show()
    ts_log_diff = ts_log - ts_log.shift()
    ts_log_diff.dropna(inplace=True)
    # test_stationarity(ts_log_diff)
    # plt.show()
    # 消除了trend 和seasonal之后,只对residual部分作为想要的时序数据进行处理
    # trend, seasonal, residual = decompose(ts_log)
    # residual.dropna(inplace=True)
    # test_stationarity(residual)
    # plt.show()
    lag_acf = acf(ts_log_diff, nlags=20)
    lag_pacf = pacf(ts_log_diff, nlags=20, method='ols')
    # Plot ACF:
    plt.subplot(121)
    plt.plot(lag_acf)
    plt.axhline(y=0, linestyle='--', color='gray')
    plt.axhline(y=-1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
    plt.axhline(y=1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
    plt.title('Autocorrelation Function')

    # Plot PACF:
    plt.subplot(122)
    plt.plot(lag_pacf)
    plt.axhline(y=0, linestyle='--', color='gray')
    plt.axhline(y=-1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
    plt.axhline(y=1.96 / np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
    plt.title('Partial Autocorrelation Function')
    plt.tight_layout()
    plt.show()
    model = ARIMA(ts_log, order=(2, 1, 0))
    results_AR = model.fit(disp=-1)
    plt.subplot(311)
    plt.plot(ts_log_diff)
    plt.plot(results_AR.fittedvalues, color='red')
    plt.title('RSS: %.4f' % sum((results_AR.fittedvalues - ts_log_diff) ** 2))
    model = ARIMA(ts_log, order=(0, 1, 2))
    results_MA = model.fit(disp=-1)
    plt.subplot(312)
    plt.plot(ts_log_diff)
    plt.plot(results_MA.fittedvalues, color='red')
    plt.title('RSS: %.4f' % sum((results_MA.fittedvalues - ts_log_diff) ** 2))
    model = ARIMA(ts_log, order=(2, 1, 2))
    results_ARIMA = model.fit(disp=-1)
    plt.subplot(313)
    plt.plot(ts_log_diff)
    plt.plot(results_ARIMA.fittedvalues, color='red')
    plt.title('RSS: %.4f' % sum((results_ARIMA.fittedvalues - ts_log_diff) ** 2))
    plt.show()

 

最终结果展示

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值