销量预测03(数据的初步处理:ARIMA的baseline)

导入计算库

import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
plt.style.use("fivethirtyeight")
plt.rcParams["font.sans-serif"] = ["Microsoft YaHei"]
plt.rcParams["axes.unicode_minus"] = False
from sklearn.preprocessing import OneHotEncoder

import statsmodels.api as sm 
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
from pandas.tseries.offsets import DateOffset


from statsmodels.tsa.arima.model import ARIMA

import warnings
warnings.filterwarnings("ignore")

导入数据

path_train = "../preocess_data/train_data_o.csv"
path_test = "../data/test_data.csv"
data  = pd.read_csv(path_train)
data_test = pd.read_csv(path_test)
data["运营日期"] = pd.to_datetime(data["运营日期"] )
data_test["运营日期"] = pd.to_datetime(data_test["日期"])
data.drop(["行ID","日期"],axis=1,inplace=True) 
data_test.drop(["行ID","日期"],axis=1,inplace=True)

折扣编码

enc = OneHotEncoder(drop="if_binary")
enc.fit(data["折扣"].values.reshape(-1,1))
enc.transform(data["折扣"].values.reshape(-1,1)).toarray()
enc.transform(data_test["折扣"].values.reshape(-1,1)).toarray()
array([[0.],
       [0.],
       [0.],
       ...,
       [1.],
       [0.],
       [0.]])
data["折扣"] = enc.transform(data["折扣"].values.reshape(-1,1)).toarray()
data_test["折扣"]  = enc.transform(data_test["折扣"].values.reshape(-1,1)).toarray()

日期衍生

def time_derivation(t,col="运营日期"):
    t["year"] = t[col].dt.year
    t["month"] = t[col].dt.month
    t["day"] = t[col].dt.day
    t["quarter"] = t[col].dt.quarter
    t["weekofyear"] = t[col].dt.weekofyear
    t["dayofweek"] = t[col].dt.dayofweek+1
    t["weekend"] = (t["dayofweek"]>5).astype(int)
    return t

data_train  = time_derivation(data)
data_test_  = time_derivation(data_test)

对每家店进行探索

对第一家尝试

# 训练集合
data_train_1 = data_train[data_train["商店ID"] ==1]
# 测试集合
data_test_1 = data_test_[data_test_["商店ID"] ==1]
plt.figure(figsize=(16,8))
plt.plot(data_train_1["运营日期"],data_train_1["销量"])
plt.xlabel("日期",fontsize= 20)
plt.ylabel("销量",fontsize= 20)
plt.title("1号店的销量",fontsize=20)
Text(0.5, 1.0, '1号店的销量')


png

data_train_1
商店ID商店类型位置地区节假日折扣销量运营日期yearmonthdayquarterweekofyeardayofweekweekend
01S1L3R111.07011.842018-01-012018111110
6071S1L3R101.042369.002018-01-022018121120
10461S1L3R101.050037.002018-01-032018131130
12071S1L3R101.044397.002018-01-042018141140
17521S1L3R101.047604.002018-01-052018151150
................................................
1865691S1L3R101.033075.002019-05-27201952722210
1871651S1L3R101.037317.002019-05-28201952822220
1873911S1L3R101.044652.002019-05-29201952922230
1879621S1L3R101.042387.002019-05-30201953022240
1881131S1L3R111.039843.782019-05-31201953122250

516 rows × 15 columns

假设检验: 平稳性检验

**原假设:**数据是不平稳序列,如果该假设未被拒绝,则表示时间序列中有单位根。
**备择假设:**数据是平稳的,拒绝原假设,说明该时间序列中没有单位根,数据平稳。

  • p>0.05 无法拒绝原假设,数据有单位根并且不平稳。
  • p<=0.05 拒绝原假设,数据没有单位根,并且数据是平稳的。
from statsmodels.tsa.stattools import adfuller
data_train_1["销量"].std()
11816.030434190745
result = adfuller(data_train_1["销量"])
result 
(-4.0896913250202225,
 0.0010072301346594798,
 13,
 502,
 {'1%': -3.4434437319767452,
  '5%': -2.8673146875484368,
  '10%': -2.569845688481135},
 10594.579494253907)
def adfuller_test(data):
    from statsmodels.tsa.stattools import adfuller
    result = adfuller(data)
    labels = ["ADF Test Statistic","P-Value"]
    
    for value,label  in zip(result,labels):
        print(label+":"+str(value))
    
    if result[1]<=0.05:
        print("拒绝原假设,数据具有平稳性!")
    else:
        print("无法拒绝原假设,数据不平稳!")
    
    return result[1]

p_value = adfuller_test(data_train_1["销量"])
p_value
ADF Test Statistic:-4.0896913250202225
P-Value:0.0010072301346594798
拒绝原假设,数据具有平稳性!





0.0010072301346594798
def diff_test(data,col = "销量"):
    columns = ["D1","D2","D3","D4","D5","D6","D7","D8","D9","D10","D11","D12","D24"]

    pvalues = []
    stds = []

    for idx,degree in enumerate([*range(1,13),24]):

        print("{}步差分".format(degree))


        data[columns[idx]] = data[col] - data[col].shift(idx+1)


        pvalue_ = adfuller_test(data[columns[idx]].dropna())
        std_ = data[columns[idx]].std()


        pvalues.append(pvalue_)
        stds.append(std_)


        print("差分后数据的标准差为{}".format(std_))
        print("\n")
    return pvalues,stds
pvalues,stds=  diff_test(data_train_1,col = "销量")
1步差分
ADF Test Statistic:-8.464898318877633
P-Value:1.5318720959824456e-13
拒绝原假设,数据具有平稳性!
差分后数据的标准差为13817.551491473438


2步差分
ADF Test Statistic:-8.522739847666854
P-Value:1.0895357544082096e-13
拒绝原假设,数据具有平稳性!
差分后数据的标准差为15742.483969864334


3步差分
ADF Test Statistic:-8.735227057175196
P-Value:3.113347686828788e-14
拒绝原假设,数据具有平稳性!
差分后数据的标准差为17859.203983779495


4步差分
ADF Test Statistic:-9.58524793091614
P-Value:2.1057694542062046e-16
拒绝原假设,数据具有平稳性!
差分后数据的标准差为16987.96156086255


5步差分
ADF Test Statistic:-8.960763076444596
P-Value:8.236684531392401e-15
拒绝原假设,数据具有平稳性!
差分后数据的标准差为17033.86992851215


6步差分
ADF Test Statistic:-7.769437143794817
P-Value:9.003228411011962e-12
拒绝原假设,数据具有平稳性!
差分后数据的标准差为15529.629177726565


7步差分
ADF Test Statistic:-7.083109863637503
P-Value:4.611558193984965e-10
拒绝原假设,数据具有平稳性!
差分后数据的标准差为17024.943907771467


8步差分
ADF Test Statistic:-7.869282346208565
P-Value:5.035687145492254e-12
拒绝原假设,数据具有平稳性!
差分后数据的标准差为17695.94594762459


9步差分
ADF Test Statistic:-7.26305598879107
P-Value:1.662101581895993e-10
拒绝原假设,数据具有平稳性!
差分后数据的标准差为18068.592304247017


10步差分
ADF Test Statistic:-8.276441746470041
P-Value:4.642809018487743e-13
拒绝原假设,数据具有平稳性!
差分后数据的标准差为16707.40284171362


11步差分
ADF Test Statistic:-6.82747605010546
P-Value:1.931318243001963e-09
拒绝原假设,数据具有平稳性!
差分后数据的标准差为15272.56312237816


12步差分
ADF Test Statistic:-7.204131545505776
P-Value:2.3239728071593555e-10
拒绝原假设,数据具有平稳性!
差分后数据的标准差为13708.569562249815


24步差分
ADF Test Statistic:-6.063721465758527
P-Value:1.1955709996228333e-07
拒绝原假设,数据具有平稳性!
差分后数据的标准差为15386.586684555326


data_train_1["D12"].plot()
<AxesSubplot:>


png

fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(data_train_1["D12"].dropna(),lags=40,ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(data_train_1["D12"].dropna(),lags=40,ax=ax2)


png



diff = [data_train_1["销量"],data_train_1["销量"].diff().dropna()
        ,data_train_1["销量"].diff().diff().dropna()
        ,data_train_1["销量"].diff().diff().diff().dropna()
       ]

titles = ["d=0","d=1","d=2","d=3"]

fig, (ax1, ax2, ax3, ax4) = plt.subplots(4,1,figsize=(8,14))

for idx, ts, fig, title in zip(range(4),diff,[ax1,ax2,ax3,ax4],titles):
    print("{}阶差分的标准差为{}".format(idx,ts.std()))
    plot_acf(ts, ax=fig, title=title)
0阶差分的标准差为11816.030434190745
1阶差分的标准差为13817.551491473438
2阶差分的标准差为22637.790854681327
3阶差分的标准差为41052.92772437857

png

由上图可知,d = 0,原始数据不会出现过差分的情况

data_train__1 =data_train_1[["运营日期","销量",'D1', 'D2',
       'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10', 'D11', 'D12', 'D24']].copy()
data_train__1
运营日期销量D1D2D3D4D5D6D7D8D9D10D11D12D24
02018-01-017011.84NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
6072018-01-0242369.0035357.16NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
10462018-01-0350037.007668.0043025.16NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
12072018-01-0444397.00-5640.002028.0037385.16NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
17522018-01-0547604.003207.00-2433.005235.0040592.16NaNNaNNaNNaNNaNNaNNaNNaNNaN
................................................
1865692019-05-2733075.008040.00-7479.00375.00-8715.00-3678.00-3537.009579.00-35658.00-19802.82-23178.00-14544.00-1383.0014415.00
1871652019-05-2837317.004242.0012282.00-3237.004617.00-4473.00564.00705.0013821.00-31416.00-15560.82-18936.00-10302.002859.00
1873912019-05-2944652.007335.0011577.0019617.004098.0011952.002862.007899.008040.0021156.00-24081.00-8225.82-11601.00-2967.00
1879622019-05-3042387.00-2265.005070.009312.0017352.001833.009687.00597.005634.005775.0018891.00-26346.00-10490.82-13866.00
1881132019-05-3139843.78-2543.22-4808.222526.786768.7814808.78-710.227143.78-1946.223090.783231.7816347.78-28889.22-13034.04

516 rows × 15 columns

data_train__1.index =pd.DatetimeIndex(data_train__1.index.values) 
arima_0 = ARIMA(endog = data_train__1["销量"],order = (1,0,1))
ARIMA(endog =data_train__1["销量"],seasonal_order=(5,1,3,3))
arima_0 = arima_0.fit()
data_train__1["arima"] = arima_0.predict(start =456,endogd = 516,dynamic =True)
arima_0 = ARIMA(endog =data_train__1["销量"],seasonal_order=(5,1,3,3))
arima_0 = arima_0.fit()
data_train__1["arima"] = arima_0.predict(start =456,endogd = 516,dynamic =True)
 arima_0.predict(start =516,endogd = 577)
516    26816.992973
dtype: float64
plt.figure(figsize=(12,7))
plt.plot(data_train__1["运营日期"],data_train__1["销量"],label="真实值")
plt.plot(data_train__1["运营日期"],data_train__1["arima"],label="预测值")
plt.xlabel("ds")
plt.ylabel("销量")
plt.title("ARIMA的验证")
plt.legend()
<matplotlib.legend.Legend at 0x14693679cc8>


png

def symmetric_mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.sum(np.abs(y_true - y_pred) * 2) / np.sum(np.abs(y_true) + np.abs(y_pred))

def arima_smape(y_true, y_pred):
    smape_val = symmetric_mean_absolute_percentage_error(y_true, y_pred)
    return 'SMAPE', smape_val, False
data_train__1[["运营日期","arima"]].loc[]
运营日期arima
1970-01-01 00:00:00.0000000002018-01-01NaN
1970-01-01 00:00:00.0000006072018-01-02NaN
1970-01-01 00:00:00.0000010462018-01-03NaN
1970-01-01 00:00:00.0000012072018-01-04NaN
1970-01-01 00:00:00.0000017522018-01-05NaN
.........
1970-01-01 00:00:00.0001865692019-05-2739525.339871
1970-01-01 00:00:00.0001871652019-05-2840588.767755
1970-01-01 00:00:00.0001873912019-05-2936283.064445
1970-01-01 00:00:00.0001879622019-05-3036961.130673
1970-01-01 00:00:00.0001881132019-05-3136744.677331

516 rows × 2 columns

np.array(data_train__1.tail(60)["销量"])
array([29886.  , 25941.  , 26334.  , 15522.  , 15056.34, 36531.  ,
       36753.  , 40422.  , 43416.  , 17151.  , 26004.  , 24963.84,
       22871.25, 29687.04, 29430.  , 27664.2 , 34278.  , 34278.  ,
       51531.  , 48954.45, 49641.  , 17910.  , 31206.  , 27207.  ,
       14484.  , 56808.  , 27330.  , 36516.  , 15531.  , 61656.  ,
       34710.  , 52362.  , 66564.  , 79806.  , 64491.  , 66288.  ,
       22731.  , 22276.38, 37437.  , 53016.  , 44736.  , 34479.  ,
       18660.  , 34458.  , 47619.  , 56253.  , 52877.82, 68733.  ,
       23496.  , 36612.  , 36753.  , 41790.  , 32700.  , 40554.  ,
       25035.  , 33075.  , 37317.  , 44652.  , 42387.  , 39843.78])
np.array(data_train__1.tail(60)["arima"])
array([34356.47414133, 30091.17999724, 30376.52066402, 25228.70141222,
       34244.25905574, 32995.51895358, 37726.00344591, 40044.53290539,
       42214.3534088 , 34202.13819704, 35451.01651558, 34338.58535838,
       31275.42293545, 29675.36232876, 29506.96478827, 28477.909069  ,
       33900.40136957, 34297.3848415 , 35255.19304449, 39942.21217251,
       41116.3200547 , 35621.55813049, 35888.26578326, 35328.00500634,
       30599.00844142, 29755.72777736, 29110.97021646, 28915.11380027,
       33544.52883198, 34129.90655345, 34610.741271  , 39822.63505499,
       40867.2933485 , 36014.714246  , 36260.94281065, 35881.50575625,
       30702.6187137 , 29872.99671427, 29123.33802667, 28896.08000654,
       33185.78296174, 33751.32898899, 34267.22060165, 39685.9774967 ,
       40729.80605295, 36178.33135918, 36617.97940246, 36327.50562255,
       30948.14619832, 30019.91310149, 29228.82155029, 28809.38738525,
       32836.70491725, 33341.59736765, 33974.7119752 , 39525.33987072,
       40588.76775503, 36283.06444474, 36961.13067342, 36744.67733075])
from sklearn.metrics import mean_squared_error
mean_squared_error(np.array(data_train__1.tail(60)["销量"]).reshape(-1,1),
             np.array(data_train__1.tail(60)["arima"]).reshape(-1,1))**0.5
14166.52822810936
arima_smape(np.array(data_train__1.tail(60)["销量"]).reshape(-1,1),
             np.array(data_train__1.tail(60)["arima"]).reshape(-1,1))
('SMAPE', 0.2935515805745867, False)

很显然,预测失效。

print(arima_0.summary(alpha = 0.05))
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                     销量   No. Observations:                  516
Model:                 ARIMA(1, 0, 1)   Log Likelihood               -5543.620
Date:                Mon, 05 Dec 2022   AIC                          11095.239
Time:                        19:40:59   BIC                          11112.224
Sample:                             0   HQIC                         11101.895
                                - 516                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        3.47e+04    745.604     46.537      0.000    3.32e+04    3.62e+04
ar.L1          0.3295      0.128      2.566      0.010       0.078       0.581
ma.L1         -0.0161      0.148     -0.109      0.914      -0.307       0.275
sigma2      1.243e+08      0.001    8.4e+10      0.000    1.24e+08    1.24e+08
===================================================================================
Ljung-Box (L1) (Q):                   0.01   Jarque-Bera (JB):                 0.76
Prob(Q):                              0.93   Prob(JB):                         0.68
Heteroskedasticity (H):               1.48   Skew:                             0.02
Prob(H) (two-sided):                  0.01   Kurtosis:                         3.18
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number  1e+27. Standard errors may be unstable.
arima_0.resid
1970-01-01 00:00:00.000000000   -27686.602325
1970-01-01 00:00:00.000000607    16390.813526
1970-01-01 00:00:00.000001046    13075.357759
1970-01-01 00:00:00.000001207     4855.537661
1970-01-01 00:00:00.000001752     9788.351222
                                     ...     
1970-01-01 00:00:00.000186569     1375.328883
1970-01-01 00:00:00.000187165     3175.598301
1970-01-01 00:00:00.000187391     9141.968919
1970-01-01 00:00:00.000187962     4556.391375
1970-01-01 00:00:00.000188113     2685.556382
Length: 516, dtype: float64

LJung-Box test : 检验样本和样本之间的相关性,LBQ下的Prob(Q)项大于0.05时候模型可用,否则可以考虑更换模型的超参数组合。
**Heteroskedasticity test:**异方差检验用于检验序列中的方差是否稳定。 异方差检验下的Prob(H)项大于0.05时候模型可用,否则可以考虑更换模型的超参数组合。
Jarque-Bera test: 雅克贝拉检验是正态性检验中的一种方法,用于检验当前序列是否符合正太分布。雅克贝拉检验下的Prob(JB)项大于0.05时候模型可用,否则可以考虑更换模型的超参数组合。

确定模型

data_arima = data_train__1["销量"].copy()
arimas = arimas = [ARIMA(endog = data_arima,order=(1,0,1))
         ,ARIMA(endog = data_arima,order=(1,0,2))
         ,ARIMA(endog = data_arima,order=(2,0,1))
         ,ARIMA(endog = data_arima,order=(2,0,2))
         ,ARIMA(endog = data_arima,order=(1,0,3))
         ,ARIMA(endog = data_arima,order=(3,0,1))
         ,ARIMA(endog = data_arima,order=(3,0,2))
         ,ARIMA(endog = data_arima,order=(2,0,3))
         ,ARIMA(endog = data_arima,order=(3,0,3))
         ]
sarimas = [ARIMA(endog = data_arima,seasonal_order=(1,0,1,12))
          ,ARIMA(endog = data_arima,seasonal_order=(1,0,2,12))
           
          ,ARIMA(endog = data_arima,seasonal_order=(2,0,1,12))
          ,ARIMA(endog = data_arima,seasonal_order=(2,0,2,12))
           
          ,ARIMA(endog = data_arima,seasonal_order=(1,0,3,12))
          ,ARIMA(endog = data_arima,seasonal_order=(3,0,1,12))
          ,ARIMA(endog = data_arima,seasonal_order=(3,0,2,12))
          ,ARIMA(endog = data_arima,seasonal_order=(2,0,3,12))
          ,ARIMA(endog = data_arima,seasonal_order=(3,0,3,12))
           
          ,ARIMA(endog = data_arima,seasonal_order=(1,0,1,4))
          ,ARIMA(endog = data_arima,seasonal_order=(1,0,2,4))
          ,ARIMA(endog = data_arima,seasonal_order=(2,0,1,4))
          ,ARIMA(endog = data_arima,seasonal_order=(2,0,2,4))
          ,ARIMA(endog = data_arima,seasonal_order=(1,0,3,4))
          ,ARIMA(endog = data_arima,seasonal_order=(3,0,1,4))
          ,ARIMA(endog = data_arima,seasonal_order=(3,0,2,4))
          ,ARIMA(endog = data_arima,seasonal_order=(2,0,3,4))
          ,ARIMA(endog = data_arima,seasonal_order=(3,0,3,4))     ,
          ]



sarimas_ = [
             ARIMA(endog = data_arima,seasonal_order=(4,0,1,12))
            ,ARIMA(endog = data_arima,seasonal_order=(4,0,2,12))
            ,ARIMA(endog = data_arima,seasonal_order=(4,0,3,12))
            ,ARIMA(endog = data_arima,seasonal_order=(4,0,4,12))
           
            ,ARIMA(endog = data_arima,seasonal_order=(4,0,1,3))
            ,ARIMA(endog = data_arima,seasonal_order=(4,0,2,3))
            ,ARIMA(endog = data_arima,seasonal_order=(4,0,3,3))
            ,ARIMA(endog = data_arima,seasonal_order=(4,0,4,3))
    
            ,ARIMA(endog = data_arima,seasonal_order=(4,0,1,4))
            ,ARIMA(endog = data_arima,seasonal_order=(4,0,2,4))
            ,ARIMA(endog = data_arima,seasonal_order=(4,0,3,4))
            ,ARIMA(endog = data_arima,seasonal_order=(4,0,4,4))
          ]

sarimas__ = [
             ARIMA(endog = data_arima,seasonal_order=(5,0,1,12))
            ,ARIMA(endog = data_arima,seasonal_order=(5,0,2,12))
            ,ARIMA(endog = data_arima,seasonal_order=(5,0,3,12))
            ,ARIMA(endog = data_arima,seasonal_order=(5,0,4,12))
            ,ARIMA(endog = data_arima,seasonal_order=(5,0,5,12))
           
            ,ARIMA(endog = data_arima,seasonal_order=(5,0,1,3))
            ,ARIMA(endog = data_arima,seasonal_order=(5,0,2,3))
            ,ARIMA(endog = data_arima,seasonal_order=(5,0,3,3))
            ,ARIMA(endog = data_arima,seasonal_order=(5,0,4,3))
            ,ARIMA(endog = data_arima,seasonal_order=(5,0,5,3))
    
            ,ARIMA(endog = data_arima,seasonal_order=(5,0,1,4))
            ,ARIMA(endog = data_arima,seasonal_order=(5,0,2,4))
            ,ARIMA(endog = data_arima,seasonal_order=(5,0,3,4))
            ,ARIMA(endog = data_arima,seasonal_order=(5,0,4,4))
            ,ARIMA(endog = data_arima,seasonal_order=(5,0,5,4))
          ]


sarimas___ = [
             ARIMA(endog = data_arima,seasonal_order=(5,1,1,12))
            ,ARIMA(endog = data_arima,seasonal_order=(5,1,2,12))
            ,ARIMA(endog = data_arima,seasonal_order=(5,1,3,12))
            ,ARIMA(endog = data_arima,seasonal_order=(5,1,4,12))
            ,ARIMA(endog = data_arima,seasonal_order=(5,1,5,12))
           
            ,ARIMA(endog = data_arima,seasonal_order=(5,1,1,3))
            ,ARIMA(endog = data_arima,seasonal_order=(5,1,2,3))
            ,ARIMA(endog = data_arima,seasonal_order=(5,1,3,3))
            ,ARIMA(endog = data_arima,seasonal_order=(5,1,4,3))
            ,ARIMA(endog = data_arima,seasonal_order=(5,1,5,3))
    
            ,ARIMA(endog = data_arima,seasonal_order=(5,1,1,4))
            ,ARIMA(endog = data_arima,seasonal_order=(5,1,2,4))
            ,ARIMA(endog = data_arima,seasonal_order=(5,1,3,4))
            ,ARIMA(endog = data_arima,seasonal_order=(5,1,4,4))
            ,ARIMA(endog = data_arima,seasonal_order=(5,1,5,4))
          ]
def plot_summary(models,summary=True,model_name=[]):
 
    for idx,model in enumerate(models):
        m = model.fit()
        if summary:
            print("\n")
            print(model_name[idx])
            print(m.summary(alpha=0.01))
        else:
            data_train__1["arima"]  = m.predict(start =456,endogd = 516,dynamic=True)
            data_train__1[["销量","arima"]].plot(figsize=(12,2),title="Model {}".format(idx))
plot_summary(arimas,False)


png

png

png

png

png

png

png

png

png

从上面的图初步来看,用arima 直接预测未来两个月的是有困难的。至此,ARIMA和prophet都对每天的销量预测进行了尝试(并且对其进行了初步的掌握),但是效果目前都不让人满意,因此,对每天的销量预测可以继续尝试:

  1. 从时间窗历史数据预测未来的时刻,就从构造特征的角度去考虑,或者二者相结合。
  2. 机器学习的角度思考。
  3. 深度学习的角度思考。

思考: 在这里发现数据是平稳的,但是如果从机器学习的角度来考虑的话,在arima中, 一般获取差分当中不存在过差分问题的、方差较小、噪音较少的阶数作为d的取值。方差较小的话,从方差过滤的角度来讲是否可以考虑去时序化呢?

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值