数据探查
import pandas as pd
import numpy as np
import matplotlib. pylab as plt
from matplotlib. pylab import rcParams
rcParams[ 'figure.figsize' ] = 15 , 6
path = '/Users/***/workspace/Analytics_Vidhya/Articles/Time_Series_Analysis/AirPassengers.csv'
dateparse = lambda dates: pd. datetime. strptime( dates, '%Y-%m' )
data = pd. read_csv( path, parse_dates= [ 'Month' ] , index_col= 'Month' , date_parser= dateparse)
data. head( 6 )
Month
#Passengers
1949-01-01
112
1949-02-01
118
1949-03-01
132
1949-04-01
129
1949-05-01
121
1949-06-01
135
from statsmodels. tsa. stattools import adfuller
def test_stationarity ( timeseries) :
rolmean = timeseries. rolling( window= 12 ) . mean( )
rolstd = timeseries. rolling( 12 ) . std( )
fig = plt. figure( )
fig. add_subplot( )
orig = plt. plot( timeseries, color= 'blue' , label= 'Original' )
mean = plt. plot( rolmean, color= 'red' , label= 'rolling mean' )
std = plt. plot( rolstd, color= 'black' , label= 'Rolling standard deviation' )
plt. legend( loc= 'best' )
plt. title( 'Rolling Mean & Standard Deviation' )
plt. show( block= False )
print ( 'Results of Dickey-Fuller Test:' )
dftest = adfuller( timeseries, autolag= 'AIC' )
dfoutput = pd. Series( dftest[ 0 : 4 ] , index= [ 'Test Statistic