时序预测小白,看到很多Transfirmer在时间序列预测上应用的代码,一头雾水,很烦,所以想自己记录一下,以求真正把Transformer怎么用在时间序列预测上讲清楚,捋明白。
我直接用代码来讲,用一个公开数据集(ETTh1)来做例子。
我把这个数据集提前归一化处理了,命名为ETTh1_standardization.csv,需要数据集的可以私信我。
首先导包并设定随即种子
import argparse
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
import time
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from typing import List
from pandas.tseries import offsets
from pandas.tseries.frequencies import to_offset
import matplotlib.pyplot as plt
fix_seed = 2024
torch.manual_seed(fix_seed)
这些包的作用后面都会有。
然后传入参数
parser = argparse.ArgumentParser(description='Transformer')
parser.add_argument('--model', type=str, required=False, default='Transformer', help='model of experiment')
parser.add_argument('--data', type=str, required=False, default='ETTh1', help='dataset')
parser.add_argument('--root_path', type=str, default='./data/', help='root path of the data file')
parser.add_argument('--data_path', type=str, default='ETTh1_standardization.csv', help='data file')
parser.add_argument('--features', type=str, default='MS', help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate')
parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
parser.add_argument('--freq', type=str, default='h', help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly]')
parser.add_argument('--seq_len', type=int, default=96, help='input sequence length of Transformer encoder')
parser.add_argument('--label_len', type=int, default=48, help='start token length of Transformer decoder')
parser.add_argument('--pred_len', type=int, default=24, help='prediction sequence length')
parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
parser.add_argument('--c_out', type=int, default=1, help='output size')
parser.add_argument('--d_model', type=int, default=512, help='dimension of model')
parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
parser.add_argument('--d_ff', type=int, default=2048, help='dimension of fcn')
parser.add_argument('--padding', type=int, default=0, help='padding type')
parser.add_argument('--dropout', type=float, default=0.05, help='dropout')
parser.add_argument('--activation', type=str, default='gelu', help='activation')
parser.add_argument('--output_attention', action='store_true', help='whether to output attention in encoder')
parser.add_argument('--num_workers', type=int, default=0, help='data loader num workers')
parser.add_argument('--train_epochs', type=int, default=1, help='number of train epochs')
parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data')
parser.add_argument('--patience', type=int, default=3, help='early stopping patience')
parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
parser.add_argument('--loss', type=str, default='mse', help='loss function')
parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu')
parser.add_argument('--gpu', type=int, default=0, help='gpu')
args = parser.parse_args()
args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False
print('Args in experiment:')
print(args)
可以看出我们设置的一些默认参数
然后下面就开始写类和函数了,共有12个类和9个函数。
首先是TimeFeature类和它的8个子类
class TimeFeature:
def __init__(self):
pass
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
pass
def __repr__(self):
return self.__class__.__name__ + "()"
class SecondOfMinute(TimeFeature):
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return index.second / 59.0 - 0.5
class MinuteOfHour(TimeFeature):
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return index.minute / 59.0 - 0.5
class HourOfDay(TimeFeature):
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return index.hour / 23.0 - 0.5
class DayOfWeek(TimeFeature):
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return index.dayofweek / 6.0 - 0.5
class DayOfMonth(TimeFeature):
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return (index.day - 1) / 30.0 - 0.5
class DayOfYear(TimeFeature):
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return (index.dayofyear - 1) / 365.0 - 0.5
class MonthOfYear(TimeFeature):
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return (index.month - 1) / 11.0 - 0.5
class WeekOfYear(TimeFeature):
def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
return (index.week - 1) / 52.0 - 0.5
然后是所有的函数,写在一起了。
def adjust_learning_rate(optimizer, epoch, args):
if args.lradj == 'type1':
lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}
elif args.lradj == 'type2':
lr_adjust = {
2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
10: 5e-7, 15: 1e-7, 20: 5e-8
}
if epoch in lr_adjust.keys():
lr = lr_adjust[epoch]
for param_group in optimizer.param_groups:
param_group['lr'] = lr
print('Updating learning rate to {}'.format(lr))
def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
features_by_offsets = {
offsets.YearEnd: [],
offsets.QuarterEnd: [MonthOfYear],
offsets.MonthEnd: [MonthOfYear],
offsets.Week: [DayOfMonth, WeekOfYear],
offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
offsets.Minute: [MinuteOfHour, HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
offsets.Second: [SecondOfMinute, MinuteOfHour, HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
}
offset = to_offset(freq_str)
for offset_type, feature_classes in features_by_offsets.items():
if isinstance(offset, offset_type):
return [cls() for cls in feature_classes]
raise RuntimeError(supported_freq_msg)
def time_features(dates, timeenc, freq):
if timeenc == 1:
list1 = []
dates = pd.to_datetime(dates.date.values)
for feat in time_features_from_frequency_str(freq):
list1.append(feat(dates))
return np.vstack(list1).transpose(1, 0)
def MAE(pred, true):
return np.mean(np.abs(pred - true))
def MSE(pred, true):
return np.mean((pred - true) ** 2)
def RMSE(pred, true):
return np.sqrt(MSE(pred, true))
def MAPE(pred, true):
return np.mean(np.abs((pred - true) / true))
def MSPE(pred, true):
return np.mean(np.square((pred - true) / true))
def metric(pred, true):
mae = MAE(pred, true)
mse = MSE(pred, true)
rmse = RMSE(pred, true)
mape = MAPE(pred, true)
mspe = MSPE(pred, true)
return mae, mse, rmse, mape, mspe
然后是早停类
class EarlyStopping:
def __init__(self, patience, verbose, delta=0):
self.patience = patience
self.verbose = verbose
self.counter = 0
self.best_score = None
self.early_stop = False
self.val_loss_min = np.Inf
self.delta = delta
def __call__(self, val_loss, model):
score = -val_loss
if self.best_score is None:
self.best_score = score
self.save_checkpoint(val_loss, model)
elif score < self.best_score + self.delta:
self.counter += 1
print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
if self.counter >= self.patience:
self.early_stop = True
else:
self.best_score = score
self.save_checkpoint(val_loss, model)
self.counter = 0
def save_checkpoint(self, val_loss, model):
if self.verbose:
print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
self.val_loss_min = val_loss
然后是数据集类
class MyDataset(Dataset):
def __init__(self, root_path, flag, size, features, data_path, target, timeenc, freq):
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
assert flag in ['train', 'test']
type_map = {'train': 0, 'val': 1, 'test': 2}
self.set_type = type_map[flag]
self.features = features
self.target = target
self.timeenc = timeenc
self.freq = freq
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
df_raw = pd.read_csv(os.path.join(self.root_path, self.data_path))
border1s = [0, int(len(df_raw) * 0.6) - self.seq_len,
int(len(df_raw) * 0.8) - self.seq_len]
border2s = [int(len(df_raw) * 0.6), int(len(df_raw) * 0.8),
int(len(df_raw) * 1)]
border1 = border1s[self.set_type]
border2 = border2s[self.set_type]
if self.features == 'M' or self.features == 'MS':
df_data = df_raw[df_raw.columns[1:]]
elif self.features == 'S':
df_data = df_raw[[self.target]]
data = df_data.values
df_stamp = df_raw[['date']][border1:border2]
df_stamp['date'] = pd.to_datetime(df_stamp.date)
data_stamp = time_features(df_stamp, timeenc=self.timeenc, freq=self.freq)
self.data_x = data[border1:border2]
self.data_y = data[border1:border2]
self.data_stamp = data_stamp
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end - self.label_len
r_end = r_begin + self.label_len + self.pred_len
seq_x = self.data_x[s_begin:s_end]
seq_y = self.data_y[r_begin:r_end]
seq_x_mark = self.data_stamp[s_begin:s_end]
seq_y_mark = self.data_stamp[r_begin:r_end]
return seq_x, seq_y, seq_x_mark, seq_y_mark
def __len__(self):
return len(self.data_x) - self.seq_len - self.pred_len + 1
Token嵌入类
class TokenEmbedding(nn.Module):
def __init__(self, c_in, d_model):
super(TokenEmbedding, self).__init__()
self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, kernel_size=3, padding=1, padding_mode='circular')
for m in self.modules():
if isinstance(m, nn.Conv1d):
nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
def forward(self, x):
x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
return x
位置嵌入类
class PositionalEmbedding(nn.Module):
def __init__(self, d_model, max_len=5000):
super(PositionalEmbedding, self).__init__()
pe = torch.zeros(max_len, d_model).float()
pe.require_grad = False
position = torch.arange(0, max_len).float().unsqueeze(1)
div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0)
self.register_buffer('pe', pe)
def forward(self, x):
return self.pe[:, :x.size(1)]
时间特征嵌入类
class TimeFeatureEmbedding(nn.Module):
def __init__(self, d_model, freq):
super(TimeFeatureEmbedding, self).__init__()
freq_map = {'h': 4, 't': 5, 's': 6, 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
d_inp = freq_map[freq]
self.embed = nn.Linear(d_inp, d_model)
def forward(self, x):
return self.embed(x)
总的数据嵌入类
class DataEmbedding(nn.Module):
def __init__(self, c_in, d_model, freq, dropout):
super(DataEmbedding, self).__init__()
self.value_embedding = TokenEmbedding(c_in, d_model)
self.position_embedding = PositionalEmbedding(d_model)
self.time_feature_embedding = TimeFeatureEmbedding(d_model, freq)
self.dropout = nn.Dropout(p=dropout)
def forward(self, x, x_mark):
x = self.value_embedding(x) + self.position_embedding(x) + self.time_feature_embedding(x_mark)
return self.dropout(x)
上三角掩码类
class TriangularCausalMask():
def __init__(self, B, L, device):
mask_shape = [B, 1, L, L]
with torch.no_grad():
self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
@property
def mask(self):
return self._mask
自注意力机制
class FullAttention(nn.Module):
def __init__(self, mask_flag, attention_dropout, output_attention):
super(FullAttention, self).__init__()
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
def forward(self, queries, keys, values, attn_mask):
scale = 1. / math.sqrt(E)
scores = torch.einsum("blhe,bshe->bhls", queries, keys)
if self.mask_flag:
if attn_mask is None:
attn_mask = TriangularCausalMask(B, L, device=queries.device)
scores.masked_fill_(attn_mask.mask, -np.inf)
A = self.dropout(torch.softmax(scale * scores, dim=-1))
V = torch.einsum("bhls,bshd->blhd", A, values)
if self.output_attention:
return (V.contiguous(), A)
else:
return (V.contiguous(), None)
class AttentionLayer(nn.Module):
def __init__(self, attention, d_model, n_heads):
super(AttentionLayer, self).__init__()
d_keys = d_model // n_heads
d_values = d_model // n_heads
self.inner_attention = attention
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
self.value_projection = nn.Linear(d_model, d_values * n_heads)
self.out_projection = nn.Linear(d_values * n_heads, d_model)
self.n_heads = n_heads
def forward(self, queries, keys, values, attn_mask):
B, L, _ = queries.shape
_, S, _ = keys.shape
H = self.n_heads
queries = self.query_projection(queries).view(B, L, H, -1)
keys = self.key_projection(keys).view(B, S, H, -1)
values = self.value_projection(values).view(B, S, H, -1)
out, attn = self.inner_attention(queries, keys, values, attn_mask)
out = out.view(B, L, -1)
return self.out_projection(out), attn
编码器类
class EncoderLayer(nn.Module):
def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
super(EncoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.attention = attention
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
self.activation = F.relu if activation == "relu" else F.gelu
def forward(self, x, attn_mask=None):
new_x, attn = self.attention(x, x, x, attn_mask=attn_mask)
x = x + self.dropout(new_x)
y = x = self.norm1(x)
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
y = self.dropout(self.conv2(y).transpose(-1, 1))
return self.norm2(x + y), attn
class Encoder(nn.Module):
def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
super(Encoder, self).__init__()
self.attn_layers = nn.ModuleList(attn_layers)
self.norm = norm_layer
def forward(self, x, attn_mask=None):
attns = []
for attn_layer in self.attn_layers:
x, attn = attn_layer(x, attn_mask=attn_mask)
attns.append(attn)
if self.norm is not None:
x = self.norm(x)
return x, attns
解码器类
class DecoderLayer(nn.Module):
def __init__(self, self_attention, cross_attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
super(DecoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.self_attention = self_attention
self.cross_attention = cross_attention
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.norm3 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
self.activation = F.relu if activation == "relu" else F.gelu
def forward(self, x, cross, x_mask=None, cross_mask=None):
x = x + self.dropout(self.self_attention(x, x, x, attn_mask=x_mask)[0])
x = self.norm1(x)
x = x + self.dropout(self.cross_attention(x, cross, cross, attn_mask=cross_mask)[0])
y = x = self.norm2(x)
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
y = self.dropout(self.conv2(y).transpose(-1, 1))
return self.norm3(x + y)
class Decoder(nn.Module):
def __init__(self, layers, norm_layer=None):
super(Decoder, self).__init__()
self.layers = nn.ModuleList(layers)
self.norm = norm_layer
def forward(self, x, cross, x_mask=None, cross_mask=None):
for layer in self.layers:
x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
if self.norm is not None:
x = self.norm(x)
return x
最重要的Transformer类
class Transformer(nn.Module):
def __init__(self, enc_in, dec_in, c_out, seq_len, label_len, pred_len, d_model, n_heads, e_layers, d_layers, d_ff,
dropout, freq, activation, output_attention, device):
super(Transformer, self).__init__()
self.pred_len = pred_len # 24
self.enc_embedding = DataEmbedding(enc_in, d_model, freq, dropout)
self.dec_embedding = DataEmbedding(dec_in, d_model, freq, dropout)
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
AttentionLayer(FullAttention(False, dropout, output_attention), d_model, n_heads),
d_model,
d_ff,
dropout=dropout,
activation=activation
) for l in range(e_layers)
],
norm_layer=torch.nn.LayerNorm(d_model)
)
# Decoder
self.decoder = Decoder(
[
DecoderLayer(
AttentionLayer(FullAttention(True, dropout, True), d_model, n_heads),
AttentionLayer(FullAttention(False, dropout, False), d_model, n_heads),
d_model,
d_ff,
dropout=dropout,
activation=activation,
)
for l in range(d_layers)
],
norm_layer=nn.LayerNorm(d_model)
)
self.projection = nn.Linear(d_model, c_out, bias=True)
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
enc_embedding_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_embedding_out, attn_mask=None)
dec_embedding_out = self.dec_embedding(x_dec, x_mark_dec)
dec_out = self.decoder(dec_embedding_out, enc_out, x_mask=None)
model_output = self.projection(dec_out)
return model_output[:, -self.pred_len:, :]
实验类
class Exp_Transformer:
def __init__(self, args):
self.args = args
self.device = self._acquire_device()
self.model = self._build_model().to(self.device)
def _acquire_device(self):
device = torch.device('cuda:{}'.format(self.args.gpu))
print('Use GPU: cuda:{}'.format(self.args.gpu))
return device
def _build_model(self):
model = Transformer(
self.args.enc_in,
self.args.dec_in,
self.args.c_out,
self.args.seq_len,
self.args.label_len,
self.args.pred_len,
self.args.d_model,
self.args.n_heads,
self.args.e_layers,
self.args.d_layers,
self.args.d_ff,
self.args.dropout,
self.args.freq,
self.args.activation,
self.args.output_attention,
self.device
).float()
return model
def _get_data(self, flag):
Data = MyDataset
if flag == 'test':
shuffle_flag = False
drop_last = True
batch_size = self.args.batch_size
else:
shuffle_flag = True
drop_last = True
batch_size = self.args.batch_size
data_set = Data(
root_path=self.args.root_path,
flag=flag,
size=[self.args.seq_len, self.args.label_len, self.args.pred_len],
features=self.args.features,
data_path=self.args.data_path,
target=self.args.target,
timeenc=1,
freq='h'
)
print(flag, len(data_set))
data_loader = DataLoader(data_set, batch_size=batch_size, shuffle=shuffle_flag, num_workers=args.num_workers, drop_last=drop_last)
return data_set, data_loader
def train(self):
train_data, train_loader = self._get_data(flag='train')
time_now = time.time()
train_steps = len(train_loader)
early_stopping = EarlyStopping(self.args.patience, True)
model_optim = torch.optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
criterion = nn.MSELoss()
hist = np.zeros(self.args.train_epochs)
for epoch in range(self.args.train_epochs):
iter_count = 0
train_loss = []
self.model.train()
epoch_time = time.time()
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
iter_count += 1
model_optim.zero_grad()
pred, true = self._process_one_batch(train_data, batch_x, batch_y, batch_x_mark, batch_y_mark)
loss = criterion(pred, true)
train_loss.append(loss.item())
if (i + 1) % 100 == 0:
print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
speed = (time.time() - time_now) / iter_count
left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
iter_count = 0
time_now = time.time()
loss.backward()
model_optim.step()
plt.figure(figsize=(15, 6))
plt.plot(list(range(1, len(train_loss) + 1)), train_loss)
plt.xlabel('Iteration')
plt.ylabel('Train Loss')
plt.show()
print("Epoch: {} cost time: {}s".format(epoch + 1, time.time() - epoch_time))
train_loss = np.average(train_loss)
hist[epoch] = train_loss
if early_stopping.early_stop:
print("Early stopping")
break
adjust_learning_rate(model_optim, epoch + 1, self.args)
plt.figure(figsize=(15, 6))
plt.plot(list(range(1, len(hist) + 1)), hist, label="Training loss")
plt.xlabel('Epoch')
plt.ylabel('Average Train Loss')
plt.show()
return self.model
def test(self):
test_data, test_loader = self._get_data(flag='test')
self.model.eval()
preds = []
trues = []
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
pred, true = self._process_one_batch(test_data, batch_x, batch_y, batch_x_mark, batch_y_mark)
preds.append(pred.detach().cpu().numpy())
trues.append(true.detach().cpu().numpy())
preds = np.array(preds)
trues = np.array(trues)
print('test shape:', preds.shape, trues.shape)
preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
print('test shape:', preds.shape, trues.shape)
prediction = []
ground_truth = []
for i in range(preds.shape[0] - 1):
prediction.append(preds[i][0].item())
ground_truth.append(trues[i][0].item())
for j in range(preds.shape[1] - 1):
prediction.append(preds[-1][j + 1].item())
ground_truth.append(trues[-1][j + 1].item())
plt.figure(figsize=(15, 6))
plt.plot(prediction, label='Prediction', color='g')
plt.plot(ground_truth, label='Ground Truth', color='r')
plt.legend()
plt.xlabel('Index')
plt.ylabel('Value')
plt.show()
mae, mse, rmse, mape, mspe = metric(preds, trues)
print('mse: {}'.format(mse))
print('mae: {}'.format(mae))
print('rmse: {}'.format(rmse))
print('mape: {}'.format(mape))
print('mspe: {}'.format(mspe))
return
def _process_one_batch(self, dataset_object, batch_x, batch_y, batch_x_mark, batch_y_mark):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float()
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
dec_inp = torch.zeros([batch_y.shape[0], self.args.pred_len, batch_y.shape[-1]]).float()
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
f_dim = -1 if self.args.features == 'MS' else 0
batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
return outputs, batch_y
最后一段主函数
exp = Exp_Transformer(args)
print('>>>>>>>start training>>>>>>>>>>>')
exp.train()
print('>>>>>>>start testing<<<<<<<<<<<<')
exp.test()
torch.cuda.empty_cache()