import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import datetime
import warnings
warnings.filterwarnings("ignore")
import statsmodels.api as sm
from statsmodels.graphics.api import qqplot
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller as ADF
## 设置字符集,防止中文乱码
mpl.rcParams['font.sans-serif']=[u'simHei']
mpl.rcParams['axes.unicode_minus']=False
data=pd.read_excel("天猫销售数据.xlsx")
data.info()
#fx=lambda x: pd.datetime.strptime(x,"%Y/%m/%d")
#data["Date"]=data["Date"].apply(fx)
df=data.set_index("Date") #将Date设置为索引
df
df=pd.Series(df.Orders,index=df.index )
fig=plt.figure(figsize=(10,4))
ax=fig.add_subplot(111)
plt.plot(df)
ax.set(title="Orders of Date",
ylabel="Orders",
xlabel="Date")
plt.show()
## 做差分,检查平稳性
def diff(timeseries):
time_diff1=timeseries.diff(1).fillna(0) #1阶差分
time_diff2=time_diff1.diff(1).fillna(0) #2阶差分
time_adf=ADF(timeseries)
time_diff1_adf=ADF(time_diff1)
time_diff2_adf=ADF(time_diff2)
return [time_diff1_adf,time_diff2_adf]
diff(df)
#[(-3.683025128820224,
# 0.004358356299291195,
# 10,
# 138,
# {'1%': -3.47864788917503,
# '5%': -2.882721765644168,
# '10%': -2.578065326612056},
# 1423.5325819802563),
# (-9.388939765399352,
# 6.641377737915045e-16,
# 9,
# 139,
# {'1%': -3.4782936965183815,
# '5%': -2.882567574015525,
# '10%': -2.5779830117488745},
# 1434.1670882621088)]
#
def autocorr(time_series,lags):
fig=plt.figure(figsize=(12,8))
ax1=fig.add_subplot(211)
sm.graphics.tsa.plot_acf(time_series,lags=lags,ax=ax1)
ax2=fig.add_subplot(212)
sm.graphics.tsa.plot_pacf(time_series,lags=lags,ax=ax2)
plt.show()
time_diff1=df.diff(1).fillna(0)
autocorr(time_diff1,30)
data_eva=sm.tsa.arma_order_select_ic(df,ic=["aic","bic"],trend="nc",max_ar=7,max_ma=7)
print("data_AIC",data_eva.aic_min_order)
print("data_BIC",data_eva.bic_min_order)
#
#data_AIC (7, 7)
#data_BIC (1, 1)
#data_AIC (1, 3)
#data_BIC (1, 1)
arma_77=sm.tsa.SARIMAX(df,order=(3,1,1)).fit()
print("arma_77",arma_77.aic,arma_77.bic,arma_77.hqic)
arma_71=sm.tsa.SARIMAX(df,order=(1,1,7)).fit()
print("arma_71",arma_71.aic,arma_71.bic,arma_71.hqic)
arma_11=sm.tsa.SARIMAX(df,order=(1,1,1)).fit()
print("arma_11",arma_11.aic,arma_11.bic,arma_11.hqic)
arma_35=sm.tsa.SARIMAX(df,order=(3,1,5)).fit()
print("arma_35",arma_35.aic,arma_35.bic,arma_35.hqic)
arma_77.plot_diagnostics(figsize=(12,8))
# D-W检验
# DW趋近2,P=0,不存在自相关性
print(sm.stats.durbin_watson(arma_77.resid.values))
resid=arma_77.resid
fig=plt.figure(figsize=(16,12))
ax1=fig.add_subplot(211)
sm.graphics.tsa.plot_acf(resid,lags=15,ax=ax1) #自相关系数
ax2=fig.add_subplot(212)
sm.graphics.tsa.plot_pacf(resid,lags=15,ax=ax2) #偏相关系数
acf,q,p=sm.tsa.acf(resid.values.squeeze(),nlags=20,qstat=True)
data=np.c_[range(1,21),acf[1:],q,p]
table=pd.DataFrame(data,columns=["lag","AC","Q","P-value"])
print(table.set_index("lag"))
pre=arma_77.predict("2020-07-01","2020-07-23",dynamic=True)
#绘制预测曲线图
fig,ax=plt.subplots(figsize=(12,8))
ax=df.ix["2020-04-05":].plot(ax=ax)
fig=arma_77.predict("2020-07-01","2020-07-23",dynamic=True,ax=ax,plot_insample=False).plot(style="r-.")
plt.title("未来10天的销售额预测",fontsize=20)
plt.show()
python按时间坐标预测销量
最新推荐文章于 2023-09-26 13:57:23 发布