LSTM Keras API predicting multiple outputsLSTM Keras API predicting multiple outputs
在回归问题中的Keras-LSTM 在监督学习中的样本需要是以下3D格式:
reshape input to be 3D [samples, timesteps, features]
- 准备数据
对于下面数据,样本个数m=10
,每个样本的维度n=2
,变量名称为var1
、var2
。
data = a = np.linspace(1,20,num=20).reshape((10,2))
[[ 1. 2.]
[ 3. 4.]
[ 5. 6.]
[ 7. 8.]
[ 9. 10.]
[11. 12.]
[13. 14.]
[15. 16.]
[17. 18.]
[19. 20.]]
- 数据归一化
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
scaled = scaler.fit_transform(data)
- 将数据转换成可用于
监督学习
的数据格式:通过前3个时间步,预测后2个时间步。
LSTM
的输入为n_in=3
, 即3个时间步t-3
、 t-2
、 t-1
的特征长度, 则每个训练样本的特征trainX的长度是n_in x n
;
LSTM
的输出为 n_out=2
,即2个时间步t-3
、 t-2
、 t-1
的特征长度,则每个样本的标签trainY的长度是n_out x n
。
- 不进行归一化的数据预处理
- 进行归一化的数据预处理
n_steps = 3
n_features = 2
reformed = series_to_supervised(data,n_steps,2) # without MinMaxScaler
reformed = series_to_supervised(scaled,n_steps,2) # with MinMaxScaler
print(reformed.shape) # m: L - (n_in + n_out) + 1, n = (n_in + n_out)*n_features
reformed = reformed.values
train = reformed[::2] # Get all the odd lines for training
test = reformed[1::2] # Get all the even lines for testing
train_X = train[:, :6]
train_Y = train[:,6:]
test_X = test[:,:6]
test_Y = test[:,6:]
train_X = train_X.reshape((train_X.shape[0], n_steps, n_features))
test_X = test_X.reshape((test_X.shape[0], n_steps, n_features))
print('The training data shape is ', train_X.shape)
print('The testing data shape is ', test_X.shape)
- 程序汇总
import pandas as pd
import numpy as np
from pandas import DataFrame
from pandas import concat
from sklearn.preprocessing import MinMaxScaler
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = DataFrame(data)
cols, names = list(), list()
# input sequence (t-n, ... t-1)
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
# forecast sequence (t, t+1, ... t+n)
for i in range(0, n_out):
cols.append(df.shift(-i))
if i == 0:
names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
else:
names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
# put it all together
agg = concat(cols, axis=1)
agg.columns = names
# drop rows with NaN values
if dropnan:
agg.dropna(inplace=True)
return agg
data = np.linspace(1,20,num=20).reshape((10,2))
scaler = MinMaxScaler(feature_range=(0,1))
scaled = scaler.fit_transform(data)
n_steps = 3
n_features = 2
reformed = series_to_supervised(data,n_steps,2)
print(data)
print(reformed.shape) # m: L - (n_in + n_out) + 1, n = (n_in + n_out)*n_features
reformed = series_to_supervised(data,n_steps,2) # without MinMaxScaler
reformed = series_to_supervised(scaled,n_steps,2) # with MinMaxScaler
reformed = reformed.values
train = reformed[::2] # Get all the odd lines
test = reformed[1::2] # Get all the even lines
train_X = train[:, :6]
train_Y = train[:,6:]
test_X = test[:,:6]
test_Y = test[:,6:]
train_X = train_X.reshape((train_X.shape[0], n_steps, n_features))
test_X = test_X.reshape((test_X.shape[0], n_steps, n_features))
print('The training data shape is ', train_X.shape)
print('The testing data shape is ', test_X.shape)
- 程序输出
[[ 1. 2.]
[ 3. 4.]
[ 5. 6.]
[ 7. 8.]
[ 9. 10.]
[11. 12.]
[13. 14.]
[15. 16.]
[17. 18.]
[19. 20.]]
(6, 10)
The training data shape is (3, 3, 2)
The testing data shape is (3, 3, 2)