《商务与经济统计》Python实现笔记(五)

时间序列分析

移动平均

import pandas as pd
import numpy as np


data = pd.DataFrame([17,21,19,23,18,16,20,18,22,20,15,22],columns=['sale'])
data["rolling_mean"] = data.rolling(3).mean().shift(1)#选前3个时间单位的平均值,预测值对应下一个样本,所以将结果下移一行,与观察值相对应


data['mse'] = (abs(data["sale"]-data["rolling_mean"]))**2 #计算预测差平方和
data['mse'].mean()
data

指数平滑(一次指数平滑)

from statsmodels.tsa.holtwinters import SimpleExpSmoothing
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

data = pd.DataFrame([17,21,19,23,18,16,20,18,22,20,15,22],columns=['sale'])

fit1 = SimpleExpSmoothing(data).fit(smoothing_level=0.2,optimized=False)#指定平滑常数α=0.2

# fit1 = SimpleExpSmoothing(data).fit() #自行优化出参数

data['fit'] = fit1.fittedvalues #拟合,拟合值与观察值已对应
data['mse'] = (abs(data["sale"]-data["fit"]))**2 #计算预测差平方和

data['mse'].sum()/(len(data)-1)#计算平方和的平均值,因第一项为实际值,所以分母为样本量-1

Holt线性指数平滑(二次指数平滑)

在一次指数平滑的结果上再次进行指数平滑

from statsmodels.tsa.holtwinters import  Holt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

data = pd.DataFrame([21.6,22.9,25.5,21.9,23.9,27.5,31.5,29.7,28.6,31.4],columns=['sale'])
fit1 = Holt(data).fit()


data['fit'] = fit1.fittedvalues #拟合,拟合值与观察值已对应
data['mse'] = (abs(data["sale"]-data["fit"]))**2 #计算预测差平方和
data['mse'].mean()

fit2.forecast(1)#预测后一个值

Holt-Winters 方法(三次指数平滑)

趋势和季节的加乘法模型

from statsmodels.tsa.holtwinters import ExponentialSmoothing
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

data = pd.DataFrame([4.8,4.4,6,6.5,5.8,5.2,6.8,7.4,6,5.6,7.5,7.8,6.3,5.9,8,8.4],columns=['sale'])

#Box-Cox变换可以明显地改善数据的正态性、对称性和方差相等性
#damped 添加阻尼系数
fit1 = ExponentialSmoothing(data, seasonal_periods=4, trend='add', seasonal='add').fit(use_boxcox=True)
fit2 = ExponentialSmoothing(data, seasonal_periods=4, trend='add', seasonal='mul').fit(use_boxcox=True)
fit3 = ExponentialSmoothing(data, seasonal_periods=4, trend='add', seasonal='add', damped=True).fit(use_boxcox=True)
fit4 = ExponentialSmoothing(data, seasonal_periods=4, trend='add', seasonal='mul', damped=True).fit(use_boxcox=True)


data['fit1'] = fit1.fittedvalues
data['fit2'] = fit2.fittedvalues
data['fit3'] = fit3.fittedvalues
data['fit4'] = fit4.fittedvalues


l1, = plt.plot(list(fit1.fittedvalues) + list(fit1.forecast(5)), marker='^')
l2, = plt.plot(list(fit2.fittedvalues) + list(fit2.forecast(5)), marker='*')
l3, = plt.plot(list(fit3.fittedvalues) + list(fit3.forecast(5)), marker='.')
l4, = plt.plot(list(fit4.fittedvalues) + list(fit4.forecast(5)), marker='.')

l5, = plt.plot(data.values, marker='.')
plt.legend(handles = [l1, l2, l3, l4, l5], labels = ["aa", "am", "aa damped", "am damped","data"], loc = 'best', prop={'size': 7})

plt.show()

data['mse1'] = (abs(data["sale"]-data["fit1"]))**2 #计算预测差平方和
data['mse2'] = (abs(data["sale"]-data["fit2"]))**2
data['mse3'] = (abs(data["sale"]-data["fit3"]))**2
data['mse4'] = (abs(data["sale"]-data["fit4"]))**2
data.mean()[-4:]

ARIMA (p,d,q)模型

import pandas as pd
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
from statsmodels.tsa.stattools import adfuller as ADF
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA

filename = r"C:\Users\liuhao\Desktop\python_work\Python数据分析与挖掘实战\chapter5\demo\data\arima_data.xls"
data = pd.read_excel(filename,index_col=0)

# 做平稳性检测,输出的第二个值为 p值,大于0.05则为非平稳序列,此处 p = 0.998
ADF(data['销量']) 

# 画自相关图和偏自相关图,判断拖尾和截尾情况
# plot_acf(data).show()
# plot_pacf(data).show()


#一阶拆分
D_data = data.diff().dropna()

#再次做平稳性检测,p = 0.022
ADF(D_data[u'销量']) 
# plot_acf(D_data).show()
# plot_pacf(D_data).show()

#自动定阶,此处求得 (0,1)
(p,q) =(sm.tsa.arma_order_select_ic(D_data,max_ar=3,max_ma=3,ic='bic')['bic_min_order']) 
# 需要设定自动取阶的 p和q 的最大值,即函数里面的max_ar,和max_ma,一般不超过 length/10
# ic 参数表示选用的选取标准,这里设置的为bic,当然也可以用aic

model = ARIMA(data, (0,1,1)).fit() #此处拟合原始数据,非拆分后的数据
# model.summary2() # 给出一份模型报告

# 残差检验
resid = model.resid
resid_test = ADF(resid)

model.forecast(5)
# 作为后期5天的预测,返回预测结果、标准误差、置信区间。

data["fit"] = model.predict(start=str("2015-01-02"), end=str("2015-02-06"), dynamic = False,typ='levels')
# 预测样本数据,第一个观测值无法预测
# typ:str {‘linear’, ‘levels’},"linear"返回的是差分后的拟合值,"levels"返回的是原始拟合值

data['mape'] = (abs(data["销量"]-data["fit"]))/data["销量"] # 预测精度,绝对误差百分数

时间序列分解法

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

data = pd.DataFrame([4.8,4.1,6,6.5,5.8,5.2,6.8,7.4,6,5.6,7.5,7.8,6.3,5.9,8,8.4],columns=['sale'])

rolling = data['sale'].rolling(4).mean()
data['trend'] = rolling.rolling(2).mean().shift(-2) #中心化移动平均数
data['s*i'] = data['sale']/data['trend'] #季节-不规则值

index = ['1','2','3','4'] * 4
data['seasonal'] = index #引入季度序列
group = data['s*i'].groupby(data['seasonal'])
seasonal = group.mean() #求各季度的季节-不规则值的平均值,即为季节指数

# 填充季节指数序列
seasonal.index = list(range(4))
for i in range(4,16):
    seasonal[i] = seasonal[i-4]

# 消除季节影响
data['t*i'] = data['sale']/seasonal
data['seasonal'] = seasonal
plt.plot(data['t*i'])
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值