CCI-predict-code
copyright@ CCI-P-group
1.stat-models(use R)
library("DMwR")
library("Metrics")
library("tseries")
library("forecast")
library("TTR")
library("fUnitRoots")
cci = read.csv("E:\\Data\\CCI.CSV.csv", header = F)
attach(cci)
##时序分析
cc = ts(cci, frequency = 12, start = c(1990, 1))
cc = cc[, -1]
tsdisplay(cc, main = "CCI", xlab = "Time", ylab = "CCI")
decom = decompose(cc)
plot(decom)
##划分训练集和测试集
c = cci[0:204, ]
train = ts(c, frequency = 12, start = c(1990, 1))
train = train[, -1]
tsdisplay(train, main = "Train Set", xlab = "Time", ylab = "CCI")
test = cci[205:295, ]
test = test[, -1]
tsdisplay(test, main = "Test Set", xlab = "Time", ylab = "CCI")
##简单指数平滑
fore1 = HoltWinters(train, beta = FALSE, gamma = FALSE)
fore2 = forecast:::forecast.HoltWinters(fore1, h=91)
plot(fore2)
Box.test(fore2$residuals, lag=20, type="Ljung-Box")
rmse(fore2$mean, test)
regr.eval(fore2$mean, test)
regr.eval(test, fore2$mean)
plot(fore2, ylim = c(4000, 10000), main = "Forcast from First-Order ES Out-of-Sample", xlab = "Time", ylab = "CCI")
lines(cc) ##深蓝色为80%预测区间,浅蓝色为90%
##Holt指数平滑法
fore3 = HoltWinters(train, alpha = 0.999, beta = 0.020, gamma=FALSE, l.start=4680, b.start=5)
fore4 = forecast:::forecast.HoltWinters(fore3, h = 91)
plot(fore4)
Box.test(fore4$residuals, lag = 20, type = "Ljung-Box")
rmse(fore4$mean, test)
regr.eval(fore4$mean, test)
regr.eval(test, fore4$mean)
plot(fore4, ylim = c(4000, 10000), main = "Forcast from Holt ES Out-of-Sample", xlab = "Time", ylab = "CCI")
lines(cc)
plot(fore4, xlim = c(2007,2015), main = "Magnifying Forcast from Holt ES Out-of-Sample", xlab = "Time", ylab = "CCI") ##放大后测试的图
lines(cc)
##Holt-Winters 指数平滑法
fore5 = HoltWinters(train, alpha = 0.88690, beta = 0.020, gamma = 0.999)
fore6 = forecast:::forecast.HoltWinters(fore5, h = 91)
plot(fore6)
Box.test(fore6$residuals, lag = 20, type="Ljung-Box")
rmse(fore6$mean, test)
regr.eval(test, fore6$mean)
plot(fore6, ylim = c(4000, 10000), main = "Forcast from Holt-Winters ES Out-of-Sample", xlab = "Time", ylab = "CCI")
lines(cc)
plot(fore6, xlim = c(2007,2015), main = "Magnifying Forcast from Holt-Winters ES Out-of-Sample", xlab = "Time", ylab = "CCI") ##放大后测试的图
lines(cc)
##Holt指数平滑法 单步预测
a = 0
aa = 0
aaa = 0
d = 0
for(i in 1:91)
{
ccc = cci[0:204 + i - 1, ]
train1 = ts(ccc, frequency = 12, start = c(1990, 1))
train1 = train1[, -1]
fore7 = HoltWinters(train1, alpha = 0.999, beta = 0.020, gamma = FALSE)
fore8 = forecast:::forecast.HoltWinters(fore7, h = 1)
d[i] = fore8$mean
test1 = cci[205 + i - 1, ]
test1 = test1[, -1]
a = a + rmse(fore8$mean, test1)
aa = regr.eval(test1, fore8$mean)
aaa = aaa + aa[4]
}
rmse = a / 91
rmse
mape = aaa /91
mape
d = ts(d, frequency = 12, start = c(2007,1))
plot(cc, main = "Forcast from Holt ES In-Sample", xlab = "Time", ylab = "CCI")
lines(d, col = 4, lwd = 1)
##放大后测试的图
plot(d, col = 4, main = "Magnifying Forcast from Holt ES In-Sample", xlab = "Time", ylab = "CCI")
lines(cc)
##Holt-Winters 指数平滑法 单步预测
a = 0
aa = 0
aaa = 0
e = 0
for(i in 1:91)
{
cccc = cci[0:204 + i - 1, ]
train2 = ts(cccc, frequency = 12, start = c(1990, 1))
train2 = train2[, -1]
fore9 = HoltWinters(train2, alpha = 0.88690, beta = 0.010, gamma = 0.999)
fore10 = forecast:::forecast.HoltWinters(fore9, h = 1)
e[i] = fore10$mean
test2 = cci[205 + i - 1, ]
test2 = test2[, -1]
a = a + rmse(fore10$mean, test2)
aa = regr.eval(test2, fore10$mean)
aaa = aaa + aa[4]
}
rmse = a / 91
rmse
mape = aaa /91
mape
e = ts(d, frequency = 12, start = c(2007, 1))
plot(cc, main = "Forcast from Holt-Winters ES In-Sample", xlab = "Time", ylab = "CCI")
lines(e, col = 4, lwd = 1)
##放大后测试的图
plot(d, col = 4, main = "Magnifying Forcast from Holt-Winters ES In-Sample", xlab = "Time", ylab = "CCI")
lines(cc)
##差分检验
diff = diff(train, differences = 2)
plot.ts(diff, main = "Second-Order Difference")
acf(diff, lag.max = 20, main = "ACF Test")
acf(diff, lag.max = 20, plot = FALSE)
pacf(diff, lag.max = 20, main = "PACF Test")
pacf(diff, lag.max = 20, plot = FALSE)
auto.arima(diff)
##ARIMA模型
fore11 = arima(train, order = c(2, 2, 2))
fore12 = forecast:::forecast.Arima(fore11, h = 91)
plot(fore12)
acf(fore12$residuals, lag.max = 20, main = "ARIMA ACF Test")
Box.test(fore12$residuals, lag = 20, type="Ljung-Box")
rmse(fore12$mean, test)
regr.eval(test, fore12$mean)
plot(fore12, ylim = c(4000,10000), main = "Forcast from ARIMA(2,2,2) Out-of-Sample", xlab = "Time", ylab = "CCI")
lines(cc)
plot(fore12, xlim = c(2007,2015), main = "Magnifying Forcast from ARIMA(2,2,2) Out-of-Sample", xlab = "Time", ylab = "CCI") ##放大后测试的图
lines(cc)
##ARIMA模型 单步预测
a = 0
aa = 0
aaa = 0
f = 0
for(i in 1:91)
{
ccccc = cci[0:204 + i - 1, ]
train3 = ts(ccccc, frequency = 12, start = c(1990, 1))
train3 = train3[, -1]
fore13 = arima(train3, order = c(2, 2, 2))
fore14 = forecast:::forecast.Arima(fore13, h = 1)
f[i] = fore14$mean
test3 = cci[205 + i - 1, ]
test3 = test3[, -1]
a = a + rmse(fore14$mean, test3)
aa = regr.eval(test3, fore14$mean)
aaa = aaa + aa[4]
}
rmse = a / 91
rmse
mape = aaa /91
mape
f = ts(f, frequency = 12, start = c(2007, 1))
plot(cc, main = "Forcast from ARIMA(2,2,2) In-Sample", xlab = "Time", ylab = "CCI")
lines(f, col = 4, lwd = 1)
plot(f, col = 4, main = "Magnifying Forcast from ARIMA(2,2,2) In-Sample", xlab = "Time", ylab = "CCI") ##放大后测试的图
lines(cc)
##季节性ARIMA模型
auto.arima(train, trace = T)
fore15 = arima(train, order = c(2, 2, 2), seasonal = c(1, 0, 1))
fore16 = forecast:::forecast.Arima(fore15, h = 91)
acf(fore16$residuals, lag.max = 20, main = "Seasonal ARIMA ACF Test")
Box.test(fore16$residuals, lag = 20, type="Ljung-Box")
plot(fore16)
rmse(fore16$mean, test)
regr.eval(test, fore16$mean)
plot(fore16, ylim = c(4000, 10000), main = "Forcast from Seasonal ARIMA(2,2,2)(1,0,1) Out-of-Sample", xlab = "Time", ylab = "CCI")
lines(cc)
plot(fore16, xlim = c(2007,2015), main = "Magnifying Forcast from Seasonal ARIMA(2,2,2)(1,0,1) Out-of-Sample", xlab = "Time", ylab = "CCI") ##放大后测试的图
lines(cc)
##季节性ARIMA模型 单步预测
a = 0
aa = 0
aaa = 0
g = 0
for(i in 1:91)
{
cccccc = cci[0:204 + i - 1, ]
train4 = ts(cccccc, frequency = 12, start = c(1990, 1))
train4 = train4[, -1]
fore17 = arima(train4, order = c(2, 2, 2), seasonal = c(1, 0, 1))
fore18 = forecast:::forecast.Arima(fore17, h = 1)
g[i] = fore18$mean
test4 = cci[205 + i - 1, ]
test4 = test4[, -1]
a = a + rmse(fore18$mean, test4)
aa = regr.eval(test4, fore18$mean)
aaa = aaa + aa[4]
}
rmse = a / 91
rmse
mape = aaa /91
mape
g = ts(g, frequency = 12, start = c(2007, 1))
plot(cc, main = "Forcast from Seasonal ARIMA(2,2,2)(1,0,1) In-Sample", xlab = "Time", ylab = "CCI")
lines(g,col = 4, lwd = 1)
plot(g, col = 4, main = "Magnifying Forcast from Seasonal ARIMA(2,2,2)(1,0,1) In-Sample", xlab = "Time", ylab = "CCI") ##放大后测试的图
lines(cc)
2.LSTM for predict(use python)
from pandas import DataFrame
from pandas import Series
from pandas import concat
from pandas import read_csv
from pandas import datetime
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from math import sqrt
from matplotlib import pyplot
import numpy
import pandas as pd
# date-time parsing function for loading the dataset
def parser(x):
return datetime.strptime('190'+x, '%Y-%m')
# frame a sequence as a supervised learning problem
def timeseries_to_supervised(data, lag=1):
df = DataFrame(data)
columns = [df.shift(i) for i in range(1, lag+1)]
columns.append(df)
df = concat(columns, axis=1)
df.fillna(0, inplace=True)
return df
# create a differenced series
def difference(dataset, interval=1):
diff = list()
for i in range(interval, len(dataset)):
value = dataset[i] - dataset[i - interval]
diff.append(value)
return Series(diff)
# invert differenced value
def inverse_difference(history, yhat, interval=1):
return yhat + history[-interval]
# scale train and test data to [-1, 1]
def scale(train, test):
# fit scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(train)
# transform train
train = train.reshape(train.shape[0], train.shape[1])
train_scaled = scaler.transform(train)
# transform test
test = test.reshape(test.shape[0], test.shape[1])
test_scaled = scaler.transform(test)
return scaler, train_scaled, test_scaled
# inverse scaling for a forecasted value
def invert_scale(scaler, X, value):
new_row = [x for x in X] + [value]
array = numpy.array(new_row)
array = array.reshape(1, len(array))
inverted = scaler.inverse_transform(array)
return inverted[0, -1]
# fit an LSTM network to training data
def fit_lstm(train, batch_size, nb_epoch, neurons):
X, y = train[:, 0:-1], train[:, -1]
X = X.reshape(X.shape[0], 1, X.shape[1])
model = Sequential()
model.add(LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
for i in range(nb_epoch):
model.fit(X, y, epochs=1, batch_size=batch_size, verbose=0, shuffle=False)
model.reset_states()
return model
# make a one-step forecast
def forecast_lstm(model, batch_size, X):
X = X.reshape(1, 1, len(X))
yhat = model.predict(X, batch_size=batch_size)
return yhat[0,0]
# load dataset
df = read_csv("data.csv")
df['time'] = pd.to_datetime(df['time'])
df = df.set_index('time')
# transform data to be stationary
raw_values = df.values
diff_values = difference(raw_values, 1)
# transform data to be supervised learning
supervised = timeseries_to_supervised(diff_values, 1)
supervised_values = supervised.values
# split data into train and test-sets
train, test = supervised_values[0:204], supervised_values[204:]
# transform the scale of the data
scaler, train_scaled, test_scaled = scale(train, test)
# fit the model
lstm_model = fit_lstm(train_scaled, 1, 1000, 128)
lstm_model.summary()
# forecast the entire training dataset to build up state for forecasting
train_reshaped = train_scaled[:, 0].reshape(len(train_scaled), 1, 1)
lstm_model.predict(train_reshaped, batch_size=1)
# walk-forward validation on the test data
predictions = list()
for i in range(len(test_scaled)):
# make one-step forecast
X, y = test_scaled[i, 0:-1], test_scaled[i, -1]
yhat = forecast_lstm(lstm_model, 1, X)
# invert scaling
yhat = invert_scale(scaler, X, yhat)
# invert differencing
yhat = inverse_difference(raw_values, yhat, len(test_scaled)+1-i)
# store forecast
predictions.append(yhat)
expected = raw_values[len(train) + i + 1]
print('Month=%d, Predicted=%f, Expected=%f' % (i+1, yhat, expected))
# report performance
rmse = sqrt(mean_squared_error(raw_values[205:], predictions))
print('Test RMSE: %.3f' % rmse)
# line plot of observed vs predicted
pyplot.plot(raw_values[205:],label='Test')
pyplot.plot(predictions,label="LSTM predict")
pyplot.legend(loc='best')
pyplot.show()
3.M-LSTM for predict(use python)
from numpy import concatenate
from matplotlib import pyplot
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.core import Dense, Activation, Dropout
from math import sqrt
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = DataFrame(data)
cols, names = list(), list()
# input sequence (t-n, ... t-1)
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
# forecast sequence (t, t+1, ... t+n)
for i in range(0, n_out):
cols.append(df.shift(-i))
if i == 0:
names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
else:
names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
# put it all together
agg = concat(cols, axis=1)
agg.columns = names
# drop rows with NaN values
if dropnan:
agg.dropna(inplace=True)
return agg
# load dataset
dataset = read_csv('M-data.csv', header=0, index_col=0)
values = dataset.values
# integer encode direction
#encoder = LabelEncoder()
#values[:,4] = encoder.fit_transform(values[:,4])
# ensure all data is float
values = values.astype('float32')
groups = [0, 1, 2, 3,4,5, 6]
i = 1
# plot each column
pyplot.figure()
for group in groups:
pyplot.subplot(len(groups), 1, i)
pyplot.plot(values[:, group])
pyplot.title(dataset.columns[group], y=0.5, loc='right')
i += 1
pyplot.show()
# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)
# frame as supervised learning
reframed = series_to_supervised(scaled, 1, 1)
# drop columns we don't want to predict
reframed.drop(reframed.columns[[8,9,10,11,12,13]], axis=1, inplace=True)
print(reframed.head())
# split into train and test sets
values = reframed.values
n_train_hours = 204
train = values[:n_train_hours, :]
test = values[n_train_hours:, :]
# split into input and outputs
train_X, train_y = train[:, :-1], train[:, -1]
test_X, test_y = test[:, :-1], test[:, -1]
# reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)
# design network
model = Sequential()
model.add(LSTM(128, input_shape=(train_X.shape[1], train_X.shape[2]),return_sequences=False))
#model.add(Dropout(0.5))
#model.add(LSTM(50))
#model.add(Dropout(0.2))
model.add(Dense(1))
model.summary()
model.compile(loss='mae', optimizer='adam')
# fit network
history = model.fit(train_X, train_y, epochs=800, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False)
# plot history
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()
# make a prediction
yhat = model.predict(test_X)
test_X = test_X.reshape((test_X.shape[0], test_X.shape[2]))
# invert scaling for forecast
inv_yhat = concatenate((yhat, test_X[:, 1:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = concatenate((test_y, test_X[:, 1:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]
# calculate RMSE
pyplot.plot(inv_y,label="Test")
pyplot.plot(inv_yhat,label="M-LSTM predict")
pyplot.legend(loc='best')
pyplot.show()
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)