lstm进行多元时间序列预测

最新推荐文章于 2024-06-04 19:00:35 发布

喝粥也会胖的唐僧

最新推荐文章于 2024-06-04 19:00:35 发布

阅读量5.8k

点赞数 3

分类专栏：机器学习

本文链接：https://blog.csdn.net/zhou_438/article/details/101352859

版权

机器学习专栏收录该内容

32 篇文章 7 订阅

订阅专栏

这个代码是之前趴的人家代码进行改的，思路也是设置滑动窗口，只是稍微绕一点

总之，都是把时间序列，转化成监督学习，然后利用监督学习的算法进行预测，长短记忆网络还是蛮好用的，只是需要提前安装TensorFlow和keras

from numpy import nan
from numpy import isnan
from pandas import read_csv
from pandas import to_numeric
from keras import optimizers
from math import sqrt
from numpy import split
from numpy import array
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.layers import Flatten
from keras.layers import LSTM
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from numpy.random import seed 
import numpy as np
from tensorflow import set_random_seed 
set_random_seed(2019)
seed(2019) 


def fill_missing(values):
	one_day = 362
	for row in range(values.shape[0]):
		for col in range(values.shape[1]):
			if isnan(values[row, col]):
				values[row, col] = values[row - one_day, col]
# split a univariate dataset into train/test sets
def split_dataset(data):
	# split into standard weeks
    train, test = data[12:257], data[257:362]
    print(train.shape)
    print(test.shape)
	# restructure into windows of weekly data
    train = array(split(train, len(train)/7))
    
    print(test.shape)
    test = array(split(test, len(test)/7))
    return train, test
 
# evaluate one or more weekly forecasts against expected values
def evaluate_forecasts(actual, predicted):
    scores = list()
	# calculate an RMSE score for each day
    """
    for i in range(actual.shape[1]):
        
		# calculate mse
		mse = mean_squared_error(actual[:, i], predicted[:, i])
		# calculate rmse
		rmse = sqrt(mse)
		# store
		scores.append(rmse)
	# calculate overall RMSE
	s = 0
	for row in range(actual.shape[0]):
		for col in range(actual.shape[1]):
			s += (actual[row, col] - predicted[row, col])**2
	score = sqrt(s / (actual.shape[0] * actual.shape[1]))
    """
    for i in range(actual.shape[1]):
        r2=r2_score(actual[:, i], predicted[:, i])
        scores.append(r2)
    score = r2_score(actual[:, :], predicted[:, :,0])
    return score, scores
 
# summarize scores
def summarize_scores(name, score, scores):
	s_scores = ', '.join(['%.1f' % s for s in scores])
	print('%s: [%.3f] %s' % (name, score, s_scores))
 
# convert history into inputs and outputs
def to_supervised(train, n_input, n_out=7):
	# flatten data
	data = train.reshape((train.shape[0]*train.shape[1], train.shape[2]))
	X, y = list(), list()
	in_start = 0
	# step over the entire history one time step at a time
	for _ in range(len(data)):
		# define the end of the input sequence
		in_end = in_start + n_input
		out_end = in_end + n_out
		# ensure we have enough data for this instance
		if out_end < len(data):
			X.append(data[in_start:in_end, :])
			y.append(data[in_end:out_end, 0])
		# move along one time step
		in_start += 1
	return array(X), array(y)
 
# train the model
def build_model(train, n_input):
	# prepare data
    train_x, train_y = to_supervised(train, n_input)
	# define parameters
    verbose, epochs, batch_size = 2, 800, 50
    n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
    #n_timesteps, n_features= train_x.shape[1], train_x.shape[2]
	# reshape output into [samples, timesteps, features]
    train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))
	# define model
    model = Sequential()
    model.add(LSTM(16,activation='relu', input_shape=(n_timesteps, n_features)))
    model.add(RepeatVector(n_outputs))
    model.add(LSTM(32,activation='relu', return_sequences=True))
    
    model.add(TimeDistributed(Dense(200, activation='relu')))
    
    model.add(TimeDistributed(Dense(1)))
    adam=optimizers.Adam(lr=0.15, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    model.compile(loss='mse', optimizer=adam)
	# fit network
    model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size, verbose=verbose,shuffle=False)
    return model

# make a forecast
def forecast(model, history, n_input):
	# flatten data
    data = array(history)
    data = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
	# retrieve last observations for input data
    input_x = data[-n_input:, :]
	# reshape into [1, n_input, n]
    input_x = input_x.reshape((1, input_x.shape[0], input_x.shape[1]))
	# forecast the next week
    yhat = model.predict(input_x, verbose=2)
    #print('---------------')
    #print(yhat)
	# we only want the vector forecast
    #print('---------yhat[0]------')
    yhat = yhat[0]
    #yhat = yhat.astype('int32')
    yhat =np.rint(yhat)
    print(yhat)
    return yhat
 
# evaluate a single model
def evaluate_model(train, test, n_input):
	# fit model
    model = build_model(train, n_input)
    print(model.summary())

	# history is a list of weekly data
    history = [x for x in train]
	# walk-forward validation over each week
    predictions = list()
    for i in range(len(test)):
		# predict the week
        yhat_sequence = forecast(model, history, n_input)
		# store the predictions
        predictions.append(yhat_sequence)
		# get real observation and add to history for predicting the next week
        history.append(test[i, :])
	# evaluate predictions days for each week
    predictions = array(predictions)
    score, scores = evaluate_forecasts(test[:, :, 0], predictions)
    #print(predictions)
    print('=========history----------')
    #print(predictions)
    #print(history)
    return score, scores
import pandas as pd 
# load the new file
#dataset = read_csv('G:\\household_power_consumption_days.csv', header=0, infer_datetime_format=True, parse_dates=['datetime'], index_col=['datetime'])
# split into train and test
sheet = pd.read_excel('F:\\123123.xlsx',sheet_name= [0,1,2,3],header=0, index_col=0)
dataset=sheet[0].astype('float64')
values=dataset.values
"""
##归一化
encoder = LabelEncoder()
values[:,0] = encoder.fit_transform(values[:,0])
values = values.astype('float32')
scaler = MinMaxScaler(feature_range=(0,1))
values = scaler.fit_transform(values)


print(values)
"""
train, test = split_dataset(values)

print(train.shape)
print('------train--------------')
print(test.shape)

# evaluate model and get scores
n_input = 14
score, scores = evaluate_model(train, test, n_input)

# summarize scores
summarize_scores('lstm', score, scores)
# plot scores
days = ['sun', 'mon', 'tue', 'wed', 'thr', 'fri', 'sat']
pyplot.plot(days, scores, marker='o', label='lstm')
pyplot.show()

喝粥也会胖的唐僧

关注

3
点赞
踩
42

收藏

觉得还不错? 一键收藏
4
评论
lstm进行多元时间序列预测

这个代码是之前趴的人家代码进行改的，思路也是设置滑动窗口，只是稍微绕一点总之，都是把时间序列，转化成监督学习，然后利用监督学习的算法进行预测，长短记忆网络还是蛮好用的，只是需要提前安装TensorFlow和kerasfrom numpy import nanfrom numpy import isnanfrom pandas import read_csvfrom pandas i...
复制链接

扫一扫