在上一篇文章中提到,LSTNet作者开源代码只能进行单步预测,然而时序预测任务和实际项目中则更多的是需要多步预测,本文详述如何进行多步预测。
更新:添加了训练、测试以及预测部分的完整代码,项目完整代码可以进行(一对一、多对一、多对多的多步时间序列预测,包含测试结果的绘制以及预测)有需要整个项目完整代码的可以私信。
首先来看一下原作者开源代码的数据构建:
可以看到每一组特征X[batch_size,sequence_length,channels(features_nums)] 对应一组输出Y[batch_size,channels(features_nums)],3维的输入对应2维的输出。通常我们会将输入和输出的维度保持一致,因为原代码是进行单步预测,所以输出Y的第二维sequence_length等于1,所以正确的shape为Y[128,1,137],该输出等价于Y[128,137]。我们想进行多步预测的话,从数据构建的角度上来看,就需要保证Y的shape为Y[batch_size,sequence_length,channels(features_nums)],假设原数据格式下,我需要预测未来五个时间步,那么Y就应该为Y[128,5,137]。
数据改好了,那么则需要对网络进行修改,保证三维的输入X在经过LSTNet网络后得到的输出是我们希望得到的三维Y。
下面贴出我修改过后的多步预测LSTNet网络部分代码:
class LSTNetModule(nn.Module):
def __init__(self,
in_chunk_len: int,
out_chunk_len: int,
target_dim: int,
skip_size: int,
channels: int,
kernel_size: int,
rnn_cell_type: str,
rnn_cell_nums: int,
skip_rnn_cell_type: str,
skip_rnn_cell_nums: int,
dropout_rate: float,
output_activation: Optional[str] = None
):
super(LSTNetModule, self).__init__()
self._in_chunk_len = in_chunk_len
self._channels = channels
self._rnn_cell_nums = rnn_cell_nums
self._skip_rnn_cell_nums = skip_rnn_cell_nums
self._skip_size = skip_size
self._output_activation = output_activation
conv_out = in_chunk_len - kernel_size
self._conv_skip = conv_out // skip_size
# [N,C,L] —>[batch_size,channels,sequence_length]
self._cnn = nn.Conv1d(in_channels=target_dim, out_channels=channels, kernel_size=kernel_size)
self._dropout = nn.Dropout(p=dropout_rate)
rnn = {"LSTM": torch.nn.LSTM, "GRU": torch.nn.GRU}[rnn_cell_type]
self._rnn = rnn(channels, rnn_cell_nums)
skip_rnn = {"LSTM": nn.LSTM, "GRU": nn.GRU}[skip_rnn_cell_type]
self._skip_rnn = skip_rnn(channels, skip_rnn_cell_nums)
self._fc = nn.Linear(in_features=rnn_cell_nums + skip_size * skip_rnn_cell_nums, out_features=target_dim)
self._ar_fc = nn.Linear(in_features=in_chunk_len, out_features=out_chunk_len)
def forward(self, X: torch.Tensor) -> torch.Tensor:
# x -> [N,L,C]
X = X.permute(0, 2, 1) # X [N,C,L]
# CNN
cnn_out = self._cnn(X) # [N,C,L]
cnn_out = F.relu(cnn_out)
cnn_out = self._dropout(cnn_out) # [8,1,94]
cnn_out = cnn_out.permute(0, 2, 1) # [8,94,1]
# RNN
rnn_in = cnn_out.permute(1, 0, 2)
_, rnn_out = self._rnn(rnn_in) # rnn_out [1,94,10]
rnn_out = (
rnn_out[0] if isinstance(rnn_out, tuple) else rnn_out
)
rnn_out = self._dropout(rnn_out)
rnn_out = torch.squeeze(rnn_out, dim=0)
# Skip-RNN
skip_out = cnn_out[:, -self._conv_skip * self._skip_size:, :]
skip_out = torch.reshape(
skip_out,
shape=[-1, self._conv_skip, self._skip_size, self._channels]
)
skip_out = torch.permute(skip_out, dims=[0, 2, 1, 3])
skip_out = torch.reshape(skip_out, shape=[-1, self._conv_skip, self._channels]) # [N,L,C]
skip_in = skip_out.permute(1, 0, 2)
_, skip_out = self._skip_rnn(skip_in)
skip_out = (
skip_out[0] if isinstance(skip_out, tuple) else skip_out
)
skip_out = torch.reshape(skip_out, shape=[-1, self._skip_size * self._skip_rnn_cell_nums])
skip_out = self._dropout(skip_out)
res = self._fc(
torch.concat([rnn_out, skip_out], dim=1)
)
res = torch.unsqueeze(res, dim=1)
# Highway
ar_in = X.permute(0, 2, 1)
ar_in = ar_in[:, -self._in_chunk_len:, :]
ar_in = torch.permute(ar_in, dims=[0, 2, 1])
ar_out = self._ar_fc(ar_in) # [N, C, L]
ar_out = torch.permute(ar_out, dims=[0, 2, 1]) # [N, L, C]
out = ar_out + res
if self._output_activation:
out = (
F.sigmoid(out) if self._output_activation == "sigmoid" else F.tanh(out)
)
return out
下面对网络初始化参数进行说明:
参数 | 说明 |
in_chunk_len | 输入长度 |
out_chunk_len | 输出长度 |
target_dim | 特征数量 |
skip_size | 跳过的时间步长 |
channels | 通道数 |
kernel_size | 卷积核大小 |
rnn_cell_type | rnn单元类型(LSTM、GRU) |
rnn_cell_nums | rnn单元的数量 |
skip_rnn_cell_type | skip-rnn单元类型(LSTM、GRU) |
skip_rnn_cell_nums | skip-rnn单元的数量 |
dropout_rate | dropout比例 |
output_activation | 输出激活函数 |
训练代码:
def train(model, data, settings):
train_dataset, train_loader = get_data(data, 'train', settings)
val_dataset, val_loader = get_data(data, 'val', settings)
dir_path = 'weights/'
path = os.path.join(dir_path, 'LSTNet')
if not os.path.exists(path):
os.makedirs(path)
time_now = time.time()
train_steps = len(train_loader)
model_optim = optim.Adam(model.parameters(), lr=settings['learning_rate'])
criterion = nn.MSELoss()
save_loss = 10000
for epoch in range(settings['train_epochs']):
iter_count = 0
train_loss = []
model.train()
epoch_time = time.time()
for i, (batch_x, batch_y) in enumerate(train_loader):
iter_count += 1
model_optim.zero_grad()
batch_x = batch_x.float().to(device)
batch_y = batch_y.float().to(device)
outputs = model(batch_x)
f_dim = -1 if settings['task'] == 'MS' or settings['task'] == 'M' else 0
outputs = outputs[:, -settings['pred_len']:, f_dim:]
batch_y = batch_y[:, -settings['pred_len']:, f_dim:].to(device)
loss = criterion(outputs, batch_y)
train_loss.append(loss.item())
if (i + 1) % 100 == 0:
print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
speed = (time.time() - time_now) / iter_count
left_time = speed * ((settings['train_epochs'] - epoch) * train_steps - i)
print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
iter_count = 0
time_now = time.time()
loss.backward()
model_optim.step()
print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
train_loss = np.average(train_loss)
vali_loss = vali(model, val_loader, criterion, settings)
test_loss = vali_loss
print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
epoch + 1, train_steps, train_loss, vali_loss, test_loss))
if vali_loss < save_loss:
save_loss = vali_loss
print("Saving model...")
torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
if settings['scale'] is True:
path_to_scaler = os.path.join(path, 'LSTNet_scaler.pkl')
pickle.dump(train_dataset.scaler, open(path_to_scaler, 'wb'))
best_model_path = path + '/' + 'checkpoint.pth'
model.load_state_dict(torch.load(best_model_path))
print('==========================Training Done!==========================')
return model
测试代码:
def test(model, data, settings, test=1):
test_data, test_loader = get_data(data, flag='test', settings=settings)
if test:
print('loading model')
model.load_state_dict(torch.load(os.path.join('./weights/' + 'LSTNet', 'checkpoint.pth')))
preds = []
trues = []
folder_path = './test_results/' + 'LSTNet' + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
model.eval()
with torch.no_grad():
for i, (batch_x, batch_y) in enumerate(test_loader):
batch_x = batch_x.float().to(device)
batch_y = batch_y.float().to(device)
outputs = model(batch_x)
f_dim = -1 if settings['task'] == 'MS' or settings['task'] == 'M' else 0
outputs = outputs[:, -settings['pred_len']:, :]
batch_y = batch_y[:, -settings['pred_len']:, :].to(device)
outputs = outputs.detach().cpu().numpy()
batch_y = batch_y.detach().cpu().numpy()
if test_data.scale:
shape = outputs.shape
outputs = test_data.inverse_transform(outputs.squeeze(0)).reshape(shape)
batch_y = test_data.inverse_transform(batch_y.squeeze(0)).reshape(shape)
outputs = outputs[:, :, f_dim:]
batch_y = batch_y[:, :, f_dim:]
pred = outputs
true = batch_y
preds.append(pred)
trues.append(true)
if i % 20 == 0:
input = batch_x.detach().cpu().numpy()
if test_data.scale:
shape = input.shape
input = test_data.inverse_transform(input.squeeze(0)).reshape(shape)
gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)
pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)
visual(gt, pd, os.path.join(folder_path, str(i) + '.jpg'))
preds = np.array(preds)
trues = np.array(trues)
print('test shape:', preds.shape, trues.shape)
preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
print('test shape:', preds.shape, trues.shape)
mae, mse, rmse, mape, mspe, acc = metric(preds, trues)
print('mse:{}, mae:{}'.format(mse, mae))
print('rmse:{}, mape:{}'.format(rmse, mape))
return
预测代码:
def predict(model, data, settings):
print('Loading Model...')
map_location = 'cuda' if torch.cuda.is_available() else 'cpu'
dir_path = 'weights/'
path = os.path.join(dir_path, 'LSTNet')
model.load_state_dict(
torch.load(os.path.join('./weights/' + 'LSTNet', 'checkpoint.pth'), map_location=map_location))
raw_data = data[:settings['seq_len']].copy()
target_col = raw_data.pop(settings['target'])
raw_data[settings['target']] = target_col
if settings['task'] == 'M' or settings['task'] == 'MS':
cols_data = raw_data.columns[1:]
pred_data = raw_data[cols_data]
elif settings['task'] == 'S':
pred_data = raw_data[[settings['target']]]
if settings["scale"] is True:
path_to_scaler = os.path.join(path, 'LSTNet' + '_scaler.pkl')
scaler = pickle.load(open(path_to_scaler, 'rb'))
if pred_data.ndim == 1:
pred_data = pred_data.reshape(-1, 1)
pred_data = scaler.transform(pred_data)
pred_data = pred_data.squeeze()
pred_data = torch.from_numpy(pred_data).float()
if pred_data.ndim == 1:
pred_data = pred_data.unsqueeze(0).unsqueeze(2)
else:
pred_data = pred_data.unsqueeze(0)
model.eval()
with torch.no_grad():
pred_data = pred_data.to(device)
predictions = model(pred_data)
if settings['task'] in ('MS', 'S'):
predictions = torch.flatten(predictions[:, :, -1])
predictions = predictions.detach().cpu().numpy()
if settings["scale"] is True:
predictions = predictions.squeeze()
if settings['task'] in ('MS', 'S'):
predictions = predictions.reshape(-1, 1)
predictions = scaler.inverse_transform(predictions)
predictions = predictions.squeeze()
predictions = pd.DataFrame(data=predictions)
dir_result_path = 'predict_result/'
result_path = os.path.join(dir_result_path, 'LSTNet')
if not os.path.exists(result_path):
os.makedirs(result_path)
predictions.to_excel(os.path.join(result_path, 'result.xlsx'))