lstm进行多元时间序列预测

这个代码是之前趴的人家代码进行改的,思路也是设置滑动窗口,只是稍微绕一点

总之,都是把时间序列,转化成监督学习,然后利用监督学习的算法进行预测,长短记忆网络还是蛮好用的,只是需要提前安装TensorFlow和keras

from numpy import nan
from numpy import isnan
from pandas import read_csv
from pandas import to_numeric
from keras import optimizers
from math import sqrt
from numpy import split
from numpy import array
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.layers import Flatten
from keras.layers import LSTM
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from numpy.random import seed 
import numpy as np
from tensorflow import set_random_seed 
set_random_seed(2019)
seed(2019) 


def fill_missing(values):
	one_day = 362
	for row in range(values.shape[0]):
		for col in range(values.shape[1]):
			if isnan(values[row, col]):
				values[row, col] = values[row - one_day, col]
# split a univariate dataset into train/test sets
def split_dataset(data):
	# split into standard weeks
    train, test = data[12:257], data[257:362]
    print(train.shape)
    print(test.shape)
	# restructure into windows of weekly data
    train = array(split(train, len(train)/7))
    
    print(test.shape)
    test = array(split(test, len(test)/7))
    return train, test
 
# evaluate one or more weekly forecasts against expected values
def evaluate_forecasts(actual, predicted):
    scores = list()
	# calculate an RMSE score for each day
    """
    for i in range(actual.shape[1]):
        
		# calculate mse
		mse = mean_squared_error(actual[:, i], predicted[:, i])
		# calculate rmse
		rmse = sqrt(mse)
		# store
		scores.append(rmse)
	# calculate overall RMSE
	s = 0
	for row in range(actual.shape[0]):
		for col in range(actual.shape[1]):
			s += (actual[row, col] - predicted[row, col])**2
	score = sqrt(s / (actual.shape[0] * actual.shape[1]))
    """
    for i in range(actual.shape[1]):
        r2=r2_score(actual[:, i], predicted[:, i])
        scores.append(r2)
    score = r2_score(actual[:, :], predicted[:, :,0])
    return score, scores
 
# summarize scores
def summarize_scores(name, score, scores):
	s_scores = ', '.join(['%.1f' % s for s in scores])
	print('%s: [%.3f] %s' % (name, score, s_scores))
 
# convert history into inputs and outputs
def to_supervised(train, n_input, n_out=7):
	# flatten data
	data = train.reshape((train.shape[0]*train.shape[1], train.shape[2]))
	X, y = list(), list()
	in_start = 0
	# step over the entire history one time step at a time
	for _ in range(len(data)):
		# define the end of the input sequence
		in_end = in_start + n_input
		out_end = in_end + n_out
		# ensure we have enough data for this instance
		if out_end < len(data):
			X.append(data[in_start:in_end, :])
			y.append(data[in_end:out_end, 0])
		# move along one time step
		in_start += 1
	return array(X), array(y)
 
# train the model
def build_model(train, n_input):
	# prepare data
    train_x, train_y = to_supervised(train, n_input)
	# define parameters
    verbose, epochs, batch_size = 2, 800, 50
    n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
    #n_timesteps, n_features= train_x.shape[1], train_x.shape[2]
	# reshape output into [samples, timesteps, features]
    train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))
	# define model
    model = Sequential()
    model.add(LSTM(16,activation='relu', input_shape=(n_timesteps, n_features)))
    model.add(RepeatVector(n_outputs))
    model.add(LSTM(32,activation='relu', return_sequences=True))
    
    model.add(TimeDistributed(Dense(200, activation='relu')))
    
    model.add(TimeDistributed(Dense(1)))
    adam=optimizers.Adam(lr=0.15, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    model.compile(loss='mse', optimizer=adam)
	# fit network
    model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size, verbose=verbose,shuffle=False)
    return model

# make a forecast
def forecast(model, history, n_input):
	# flatten data
    data = array(history)
    data = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
	# retrieve last observations for input data
    input_x = data[-n_input:, :]
	# reshape into [1, n_input, n]
    input_x = input_x.reshape((1, input_x.shape[0], input_x.shape[1]))
	# forecast the next week
    yhat = model.predict(input_x, verbose=2)
    #print('---------------')
    #print(yhat)
	# we only want the vector forecast
    #print('---------yhat[0]------')
    yhat = yhat[0]
    #yhat = yhat.astype('int32')
    yhat =np.rint(yhat)
    print(yhat)
    return yhat
 
# evaluate a single model
def evaluate_model(train, test, n_input):
	# fit model
    model = build_model(train, n_input)
    print(model.summary())

	# history is a list of weekly data
    history = [x for x in train]
	# walk-forward validation over each week
    predictions = list()
    for i in range(len(test)):
		# predict the week
        yhat_sequence = forecast(model, history, n_input)
		# store the predictions
        predictions.append(yhat_sequence)
		# get real observation and add to history for predicting the next week
        history.append(test[i, :])
	# evaluate predictions days for each week
    predictions = array(predictions)
    score, scores = evaluate_forecasts(test[:, :, 0], predictions)
    #print(predictions)
    print('=========history----------')
    #print(predictions)
    #print(history)
    return score, scores
import pandas as pd 
# load the new file
#dataset = read_csv('G:\\household_power_consumption_days.csv', header=0, infer_datetime_format=True, parse_dates=['datetime'], index_col=['datetime'])
# split into train and test
sheet = pd.read_excel('F:\\123123.xlsx',sheet_name= [0,1,2,3],header=0, index_col=0)
dataset=sheet[0].astype('float64')
values=dataset.values
"""
##归一化
encoder = LabelEncoder()
values[:,0] = encoder.fit_transform(values[:,0])
values = values.astype('float32')
scaler = MinMaxScaler(feature_range=(0,1))
values = scaler.fit_transform(values)


print(values)
"""
train, test = split_dataset(values)

print(train.shape)
print('------train--------------')
print(test.shape)

# evaluate model and get scores
n_input = 14
score, scores = evaluate_model(train, test, n_input)

# summarize scores
summarize_scores('lstm', score, scores)
# plot scores
days = ['sun', 'mon', 'tue', 'wed', 'thr', 'fri', 'sat']
pyplot.plot(days, scores, marker='o', label='lstm')
pyplot.show()

 

  • 3
    点赞
  • 42
    收藏
    觉得还不错? 一键收藏
  • 4
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值