这个代码是之前趴的人家代码进行改的,思路也是设置滑动窗口,只是稍微绕一点
总之,都是把时间序列,转化成监督学习,然后利用监督学习的算法进行预测,长短记忆网络还是蛮好用的,只是需要提前安装TensorFlow和keras
from numpy import nan
from numpy import isnan
from pandas import read_csv
from pandas import to_numeric
from keras import optimizers
from math import sqrt
from numpy import split
from numpy import array
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.layers import Flatten
from keras.layers import LSTM
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from numpy.random import seed
import numpy as np
from tensorflow import set_random_seed
set_random_seed(2019)
seed(2019)
def fill_missing(values):
one_day = 362
for row in range(values.shape[0]):
for col in range(values.shape[1]):
if isnan(values[row, col]):
values[row, col] = values[row - one_day, col]
# split a univariate dataset into train/test sets
def split_dataset(data):
# split into standard weeks
train, test = data[12:257], data[257:362]
print(train.shape)
print(test.shape)
# restructure into windows of weekly data
train = array(split(train, len(train)/7))
print(test.shape)
test = array(split(test, len(test)/7))
return train, test
# evaluate one or more weekly forecasts against expected values
def evaluate_forecasts(actual, predicted):
scores = list()
# calculate an RMSE score for each day
"""
for i in range(actual.shape[1]):
# calculate mse
mse = mean_squared_error(actual[:, i], predicted[:, i])
# calculate rmse
rmse = sqrt(mse)
# store
scores.append(rmse)
# calculate overall RMSE
s = 0
for row in range(actual.shape[0]):
for col in range(actual.shape[1]):
s += (actual[row, col] - predicted[row, col])**2
score = sqrt(s / (actual.shape[0] * actual.shape[1]))
"""
for i in range(actual.shape[1]):
r2=r2_score(actual[:, i], predicted[:, i])
scores.append(r2)
score = r2_score(actual[:, :], predicted[:, :,0])
return score, scores
# summarize scores
def summarize_scores(name, score, scores):
s_scores = ', '.join(['%.1f' % s for s in scores])
print('%s: [%.3f] %s' % (name, score, s_scores))
# convert history into inputs and outputs
def to_supervised(train, n_input, n_out=7):
# flatten data
data = train.reshape((train.shape[0]*train.shape[1], train.shape[2]))
X, y = list(), list()
in_start = 0
# step over the entire history one time step at a time
for _ in range(len(data)):
# define the end of the input sequence
in_end = in_start + n_input
out_end = in_end + n_out
# ensure we have enough data for this instance
if out_end < len(data):
X.append(data[in_start:in_end, :])
y.append(data[in_end:out_end, 0])
# move along one time step
in_start += 1
return array(X), array(y)
# train the model
def build_model(train, n_input):
# prepare data
train_x, train_y = to_supervised(train, n_input)
# define parameters
verbose, epochs, batch_size = 2, 800, 50
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
#n_timesteps, n_features= train_x.shape[1], train_x.shape[2]
# reshape output into [samples, timesteps, features]
train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))
# define model
model = Sequential()
model.add(LSTM(16,activation='relu', input_shape=(n_timesteps, n_features)))
model.add(RepeatVector(n_outputs))
model.add(LSTM(32,activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(200, activation='relu')))
model.add(TimeDistributed(Dense(1)))
adam=optimizers.Adam(lr=0.15, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
model.compile(loss='mse', optimizer=adam)
# fit network
model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size, verbose=verbose,shuffle=False)
return model
# make a forecast
def forecast(model, history, n_input):
# flatten data
data = array(history)
data = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
# retrieve last observations for input data
input_x = data[-n_input:, :]
# reshape into [1, n_input, n]
input_x = input_x.reshape((1, input_x.shape[0], input_x.shape[1]))
# forecast the next week
yhat = model.predict(input_x, verbose=2)
#print('---------------')
#print(yhat)
# we only want the vector forecast
#print('---------yhat[0]------')
yhat = yhat[0]
#yhat = yhat.astype('int32')
yhat =np.rint(yhat)
print(yhat)
return yhat
# evaluate a single model
def evaluate_model(train, test, n_input):
# fit model
model = build_model(train, n_input)
print(model.summary())
# history is a list of weekly data
history = [x for x in train]
# walk-forward validation over each week
predictions = list()
for i in range(len(test)):
# predict the week
yhat_sequence = forecast(model, history, n_input)
# store the predictions
predictions.append(yhat_sequence)
# get real observation and add to history for predicting the next week
history.append(test[i, :])
# evaluate predictions days for each week
predictions = array(predictions)
score, scores = evaluate_forecasts(test[:, :, 0], predictions)
#print(predictions)
print('=========history----------')
#print(predictions)
#print(history)
return score, scores
import pandas as pd
# load the new file
#dataset = read_csv('G:\\household_power_consumption_days.csv', header=0, infer_datetime_format=True, parse_dates=['datetime'], index_col=['datetime'])
# split into train and test
sheet = pd.read_excel('F:\\123123.xlsx',sheet_name= [0,1,2,3],header=0, index_col=0)
dataset=sheet[0].astype('float64')
values=dataset.values
"""
##归一化
encoder = LabelEncoder()
values[:,0] = encoder.fit_transform(values[:,0])
values = values.astype('float32')
scaler = MinMaxScaler(feature_range=(0,1))
values = scaler.fit_transform(values)
print(values)
"""
train, test = split_dataset(values)
print(train.shape)
print('------train--------------')
print(test.shape)
# evaluate model and get scores
n_input = 14
score, scores = evaluate_model(train, test, n_input)
# summarize scores
summarize_scores('lstm', score, scores)
# plot scores
days = ['sun', 'mon', 'tue', 'wed', 'thr', 'fri', 'sat']
pyplot.plot(days, scores, marker='o', label='lstm')
pyplot.show()