LSTM代码部分(关于原理网上资料已经很多,这里只展示代码,数据集为开源ETT数据集)
1. 导入必须要的包
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn import metrics
from torch.utils.data import Dataset, DataLoader
2. 定义LSTM网络(这里定义两个网络结构作对比)
# 定义模型
class Lstm(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, is_bidirectional, output_size, seq_length):
super(Lstm, self).__init__()
self.bidirectional = is_bidirectional
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first = True, bidirectional = self.bidirectional)
if self.bidirectional:
num_directions = 2
else:
num_directions = 1
self.fc1 = nn.Linear(hidden_size * num_directions * seq_length, output_size)
def forward(self, x):
###
# batch_first = True
# 因此input和output的第一维是batch
###
### lstm的input
# input (batch, seq_len, input_size)
# h_0 (num_layers * num_directions, batch, hidden_size) 这里存在疑问?,batch_first = True 和 False 是否一样
# c_0 (num_layers * num_directions, batch, hidden_size)
# 不输入默认初始为0
###
### lstm的output
# output (batch, seq_len, hidden_size * num_directions)
# h_n (num_layers * num_directions, batch, hidden_size)
# c_n (num_layers * num_directions, batch, hidden_size)
###
###
# 因为input和output的第一维是batch
# 对于lstm来说,想要的值应该是h_n[-1, :, :]或output[:, -1, :]
###
x, (h_n, c_n) = self.lstm(x)
batch_size, seq_length, hidden_size = x.shape
# print(x.shape)
# print(h_n.shape)
# print(c_n.shape)
x = x.contiguous()
x = x.view(-1, seq_length * hidden_size)
x = self.fc1(x)
return x
# 优化模型
class opt_Lstm(nn.Module):
def __init__(self, input_size, hidden_size, output_size, linear_size, num_layers, seq_length, is_bidirectional):
super(opt_Lstm, self).__init__()
self.hidden_size = hidden_size
self.bidirectional = is_bidirectional
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first = True, bidirectional = self.bidirectional)
if self.bidirectional:
self.fc1 = nn.Linear(seq_length * hidden_size, linear_size)
self.fc2 = nn.Linear(seq_length * hidden_size, linear_size)
self.fc3 = nn.Linear(linear_size, output_size)
else:
self.fc = nn.Linear(seq_length * hidden_size, output_size)
def forward(self, x):
if self.bidirectional:
x, (h_n, c_n) = self.lstm(x)
batch_size, seq_length, hidden_size = x.shape
# print(x.shape)
x1 = x[:, :, :self.hidden_size]
x2 = x[:, :, self.hidden_size:]
x1 = x1.contiguous()
x2 = x2.contiguous()
# print(x1.shape)
# print(x2.shape)
x1 = x1.view(-1, seq_length * self.hidden_size)
x2 = x2.view(-1, seq_length * self.hidden_size)
x1 = self.fc1(x1)
x2 = self.fc2(x2)
x = x1 + x2
x = self.fc3(x)
else:
x, (h_n, c_n) = self.lstm(x)
batch_size, seq_length, hidden_size = x.shape
x = x.contiguous()
x = x.view(-1, seq_length * hidden_size)
x = self.fc(x)
return x
3. 定义有关Dataset的数据整理
# 定义Dataset
class get_dataset(Dataset):
def __init__(self, data_path, seq_length, features, train_split, mode):
self.mode = mode
self.data_path = data_path
self.features = features
self.seq_length = seq_length
self.data, self.data_max, self.data_min = self.get_data()
# print(self.data)
# print(self.data[0, :-1, :])
# print(self.data[0, -1, -1])
# print(self.data[0, -1, -1].unsqueeze(0))
# print(self.data[0, -1, -1].unsqueeze(0).unsqueeze(1))
train_num = int(train_split * len(self.data))
if self.mode == 'train':
self.data = self.data[:train_num, :, :]
else:
self.data = self.data[train_num:, :, :]
def __len__(self):
return len(self.data)
def __getitem__(self, index):
return self.data[index, :-1, :], self.data[index, -1, -1].unsqueeze(0)
def get_data(self):
data = pd.read_csv(self.data_path)
data.index = pd.to_datetime(data['date'])
data = data.drop('date', axis = 1)
data_max = data.max()
data_min = data.min()
data = (data - data_min) / (data_max - data_min)
num_sample = len(data) - self.seq_length
seq_data = torch.zeros(num_sample, self.seq_length + 1, len(self.features))
# print(data.iloc[0:0 + self.seq_length + 1, self.features].values)
for i in range(num_sample):
seq_data[i] = torch.tensor(data.iloc[i:i + self.seq_length + 1, self.features].values)
# print(data_max)
# print(data_min)
return seq_data, data_max, data_min
4. 定义训练
def train(model, dataset, epochs, optim, loss_function, device, batch_size, shuffle = True):
data_loader = DataLoader(dataset, batch_size = batch_size, shuffle = shuffle)
for epoch in range(epochs):
train_loss = 0
model.train()
for x, y in data_loader:
x, y = x.to(device), y.to(device)
# print(x)
# print(y)
pred = model(x)
loss = loss_function(pred, y)
optim.zero_grad()
loss.backward()
optim.step()
train_loss += loss.item()
train_loss /= len(data_loader)
print('epoch / epochs : %d / %d, loss : %.6f'%(epoch, epochs, train_loss))
5. 定义模型保存
def save_model(model, path):
torch.save(model.state_dict(), path)
6. 定义测试
def test(opt, path, dataset, device, batch_size, input_size, hidden_size, output_size, linear_size, num_layers, seq_length, shuffle = False):
if opt:
model = opt_Lstm(input_size = input_size,
hidden_size = hidden_size,
output_size = output_size,
linear_size = linear_size,
num_layers = num_layers,
seq_length = seq_length,
is_bidirectional = True).to(device)
else:
model = Lstm(input_size = input_size,
hidden_size = hidden_size,
num_layers = num_layers,
is_bidirectional = False,
output_size = output_size,
seq_length = seq_length).to(device)
# device = torch.device(device)
model.load_state_dict(torch.load(path))
# model.to(device)
model.eval()
data_loader = DataLoader(dataset, batch_size = batch_size, shuffle = shuffle)
preds, labels = np.zeros(len(dataset)), np.zeros(len(dataset))
left, right = 0, 0
for x, label in data_loader:
label = label.numpy()
left = right
right += len(label)
x = x.to(device)
pred = model(x).detach().cpu().numpy().flatten()
# print(label.flatten())
# print(pred)
preds[left:right] = pred
labels[left:right] = label.flatten()
return preds, labels
7. 定义模型评估指标
def model_eva(pred, label):
fig = px.line(title = 'LSTM模型预测')
fig.add_scatter(y = label, name = 'label')
fig.add_scatter(y = pred, name = 'pred')
fig.show()
# print(label)
# print(pred)
# label_nozero = labels[labels == 0] = 1e-3
index = np.where(labels > 0.01)
mse = np.mean((label - pred) ** 2)
r2 = 1 - np.sum((label - pred) ** 2) / np.sum((label - np.mean(label)) ** 2)
mape = np.abs((pred[index] - label[index]) / label[index]).mean()
mae = np.abs(label - pred).mean()
print('MSE : %.6f'%(mse))
print('R2 : %.6f'%(r2))
print('MAPE : %.6f'%(mape))
print('MAE : %.6f'%(mae))
8. main函数以及运行结果
seed = 0
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
device = 'cuda' if torch.cuda.is_available() else 'cpu'
seq_length = 24
features = [0, 1, 2, 3, 4, 5, 6] # [HUFL,HULL,MUFL,MULL,LUFL,LULL,OT]
epochs = 300
lr = 0.005
batch_size = 16
train_split = 0.8
input_size = len(features)
hidden_size = 32
output_size = 1
linear_size = 16
num_layers = 1
opt = True
save_path = 'LSTM_ETT.pth'
if opt:
model = opt_Lstm(input_size = input_size,
hidden_size = hidden_size,
output_size = output_size,
linear_size = linear_size,
num_layers = num_layers,
seq_length = seq_length,
is_bidirectional = True).to(device)
else:
model = Lstm(input_size = input_size,
hidden_size = hidden_size,
num_layers = num_layers,
is_bidirectional = False,
output_size = output_size,
seq_length = seq_length).to(device)
optim = torch.optim.SGD(model.parameters(), lr = lr)
loss_function = nn.MSELoss()
dataset_train = get_dataset(data_path, seq_length, features, train_split = train_split, mode = 'train')
dataset_test = get_dataset(data_path, seq_length, features, train_split = train_split, mode = 'test')
train(model, dataset_train, epochs, optim, loss_function, device, batch_size, shuffle = True)
save_model(model, save_path)
preds, labels = test(opt, save_path, dataset_test, device, batch_size, input_size, hidden_size, output_size, linear_size, num_layers, seq_length, shuffle = False)
model_eva(preds, labels)