在之前的文章中,我们已经试着写出了最简单的LSTM层,但是在测试方面出了一点小的异常,导致超过测试集的部分无法连续输出预测值。本文在完成超过测试集的输入过程中,还进行模型的调参过程。
首先是之前的代码部分。
import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import MinMaxScaler
time_series = pd.read_csv('../wntr-data/pressure data.csv')
node5_15min = time_series.loc[:,'5']
# print(len(node5_15min))
#
#
# plt.plot(node5_15min[:385])
# plt.show()
#把数据转换成浮点数
all_data = node5_15min.values.astype(float)
all_data = all_data[:1250]
#选择输入的波形
test_data_size = 16
train_data = all_data[:-test_data_size]
test_data = all_data[-test_data_size:]
scaler = MinMaxScaler(feature_range=(0, 1))
train_data_normalized = scaler.fit_transform(train_data .reshape(-1, 1))
train_data = torch.FloatTensor(train_data_normalized).view(-1)
train_window = 7
def create_inout_sequences(input_data, tw):
inout_seq = []
L = len(input_data)
for i in range(672,L-tw):
train_seq1 = input_data[i:i+tw]
train_seq2 = input_data[i+tw-96*7:i+tw:96]
train_seq = torch.cat([train_seq1,train_seq2])
train_label = input_data[i+tw:i+tw+1]
inout_seq.append((train_seq ,train_label))
return inout_seq
train_inout_seq = create_inout_sequences(train_data, train_window)
# print(train_inout_seq[:5])
device = torch.device('cuda')
class LSTM(nn.Module):
def __init__(self, input_size=1, hidden_layer_size=64, output_size=1):
super().__init__()
self.hidden_layer_size = hidden_layer_size
self.lstm = nn.LSTM(input_size, hidden_layer_size)
self.linear = nn.Linear(hidden_layer_size, output_size)
self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
torch.zeros(1,1,self.hidden_layer_size))
def forward(self, input_seq):
lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
predictions = self.linear(lstm_out.view(len(input_seq), -1))
return predictions[-1]
model = LSTM().to(device)
loss_function = nn.MSELoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)
epochs = 101
for i in range(epochs):
for seq, labels in train_inout_seq:
seq,labels =seq.to(device),labels.to(device)
optimizer.zero_grad()
a = torch.zeros(1, 1, model.hidden_layer_size).to(device)
b = torch.zeros(1, 1, model.hidden_layer_size).to(device)
model.hidden_cell = (a,b)
y_pred = model(seq)
single_loss = loss_function(y_pred, labels)
single_loss.backward()
optimizer.step()
if i%25 == 1:
print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')
接下来我们进行窗口的滑动,把训练好的模型用来预测之后的10个数据,并且添加到另一个列表中,train_data只用来调参。
原先的train_data复制一下为data_pred,然后去最后7个参数作为顺序的原始时间序列,即第一个输入。第二个输入是[-95,-95-96,-95-96 * 2,…-95-96 *6 ]是预测时间的间隔为24h的前7天。最后把预测结果添加到data_pred
data_pred = train_data[:].tolist()
for i in range(10):
inputs_ =data_pred[-train_window:]
inputs_.append(data_pred[-95])
test_inputs1 = torch.FloatTensor(inputs_)
test_inputs2 =torch.FloatTensor(data_pred[-95-96:-95-(96*6)-1:-96])
seq = torch.cat([test_inputs1,test_inputs2]).to(device)
with torch.no_grad():
a = torch.zeros(1, 1, model.hidden_layer_size).to(device)
b = torch.zeros(1, 1, model.hidden_layer_size).to(device)
model.hidden_cell = (a, b)
data_pred.append(model(seq).item())
actual_predictions = scaler.inverse_transform(np.array(data_pred[-10:] ).reshape(-1, 1))
print(actual_predictions[-5:])
#画出预测的10个值和真实的10个值
plt.title('predictdata vs realdata')
plt.ylabel('pressure')
plt.grid(True)
plt.plot(test_data[-10:])
plt.plot(actual_predictions)
plt.legend(['real','predict'])
plt.show()
终于完成了!之后我们将使用全部数据,深度网络进行调整