代码一:
from sklearn.metrics import r2_score
import pandas as pd
def evaluate_arima_model(X, arima_order):
# prepare training dataset
train_size = int(len(X) * 0.66)
train, test = X[0:train_size], X[train_size:]
history = [x for x in train]
# make predictions
predictions = list()
for t in range(len(test)):
model = ARIMA(history, order=arima_order)
model_fit = model.fit(disp=0)
yhat = model_fit.forecast()[0]
predictions.append(yhat)
history.append(test[t])
# calculate out of sample error
error = r2_score(test, predictions)
return error
# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(dataset, p_values, d_values, q_values):
dataset = dataset.astype('float32')
best_score, best_cfg = float("inf"), None
for p in p_values:
for d in d_values:
for q in q_values:
order = (p,d,q)
try:
r2 = evaluate_arima_model(dataset, order)
if r2 > best_score:
best_score, best_cfg = r2, order
print('ARIMA%s MSE=%.3f' % (order,r2))
except:
continue
print('Best ARIMA%s R2=%.3f' % (best_cfg, best_score))
# load dataset
sheet= pd.read_excel('F:\\123123.xlsx',sheet_name= [0,1,2,3],header=0, index_col=0)[0]
series=sheet["XXX"]
# evaluate parameters
p_values = [0, 1, 2, 4, 6, 8, 10,12,14,16]
d_values = range(0, 5)
q_values = range(0, 5)
warnings.filterwarnings("ignore")
evaluate_models(series.values, p_values, d_values, q_values)
代码二:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score
from pandas import DataFrame
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from pandas import concat
from numpy import concatenate
from sklearn.metrics import mean_squared_error
from math import sqrt
from matplotlib import pyplot
sheet = pd.read_excel('F:\\123123.xlsx',sheet_name= [0,1,2,3],index_col=u'日期')
data=sheet[0][["XXX]].copy()
#绘制时序图
#plt.plot(date,recharge)
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=False
data.plot()
plt.show()
##绘制对数时序图
log_data=np.log(data)
log_data.plot()
plt.show
##自相关图
from statsmodels.graphics.tsaplots import plot_acf
plot_acf(data).show()
from statsmodels.tsa.stattools import adfuller as ADF
print('原始序列的ADF检测结果为:',ADF(data[u'XXX']))
##差分
D_data=data.diff().dropna()
D_data.columns=[u'XXX']
D_data.plot()
plt.show()
plot_acf(D_data).show()
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(D_data).show()
print('差分序列的ADF检测结果为:',ADF(data[u'XXX']))
from statsmodels.stats.diagnostic import acorr_ljungbox
print('差分序列的白噪声检测结果为:',acorr_ljungbox(D_data,lags=1))
from statsmodels.tsa.arima_model import ARIMA
print("开始建立时间序列")
model=ARIMA(data,(22,0,1)).fit()##这里的参数需要根据前面的adf检测后进行选择的参数
#model.summary2()
pr=model.predict()##predict这个不填参数的意思是将所有目前存在的数据都进行预测,我们可以拿来和真
#实数据进行对比
#pr2=model.forecast(5)##forecast这个方法可以直接预测后多少天的数据,这里是预测后5天的数据
#print(pr)
r2=r2_score(pr, data)
print('R2: %.3f' % r2)
print(model.predict('2019-8-22','2019-8-28'))##predict可以直接填写日期进行预测,
##这里是预测8-22至8-28的数据
890

被折叠的 条评论
为什么被折叠?



