import torch
import torch.nn as nn
import torch.nn.functional as F
#定义了一个卷积层,通常用于降低序列数据的维度和提取特征
class ConvLayer(nn.Module):
def __init__(self, c_in):
super(ConvLayer, self).__init__()
self.downConv = nn.Conv1d(in_channels=c_in,
out_channels=c_in,
kernel_size=3,
padding=2,
padding_mode='circular')
self.norm = nn.BatchNorm1d(c_in)
self.activation = nn.ELU()
self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
def forward(self, x):
#定义一个1维卷积层
x = self.downConv(x.permute(0, 2, 1))
#添加一个批量归一化层,用于加速训练过程并提高泛化能力
x = self.norm(x)
#self.activation:定义一个ELU(指数线性单元)激活函数。
x = self.activation(x)
#self.maxPool:定义一个最大池化层,用于减少数据维度和特征选择。
x = self.maxPool(x)
x = x.transpose(1, 2)
return x
#定义了一个编码层,通常用于处理输入序列,并通过注意力机制提取序列间的关系
class EncoderLayer(nn.Module):
def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
#调用父类nn.Module的构造函数,进行初始化
super(EncoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
#self.attention:传入一个注意力机制模块。
#self.conv1和self.conv2:定义两个1维卷积层用于在注意力操作后对特征进行转换。
#self.norm1和self.norm2:定义两个层归一化操作用于稳定网络输出。
#self.dropout:添加一个dropout层用于防止过拟合。
#self.activation:定义激活函数,根据传入参数为ReLU或GELU。
self.attention = attention
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
self.activation = F.relu if activation == "relu" else F.gelu
#定义卷积层的前向传播过程。其中,x.permute(0, 2, 1)是对输入的维度进行调整,以匹配卷积层的期望输入。
def forward(self, x, attn_mask=None, tau=None, delta=None):
new_x, attn = self.attention(
x, x, x,
attn_mask=attn_mask,
tau=tau, delta=delta
)
x = x + self.dropout(new_x)
y = x = self.norm1(x)
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
y = self.dropout(self.conv2(y).transpose(-1, 1))
return self.norm2(x + y), attn
#定义了整个编码器的结构,它包含多个编码层和可选的卷积层。
class Encoder(nn.Module):
def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
super(Encoder, self).__init__()
#self.attn_layers:创建一个模块列表,包含所有的注意力层。
#self.conv_layers:如果有,也创建一个包含所有卷积层的模块列表。
#self.norm:添加一个可选的层归一化操作。
self.attn_layers = nn.ModuleList(attn_layers)
self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
self.norm = norm_layer
#定义编码器的前向传播流程,串联多个注意力层和卷积层
def forward(self, x, attn_mask=None, tau=None, delta=None):
# x [B, L, D]
attns = []
if self.conv_layers is not None:
for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)):
delta = delta if i == 0 else None
x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
x = conv_layer(x)
attns.append(attn)
x, attn = self.attn_layers[-1](x, tau=tau, delta=None)
attns.append(attn)
else:
for attn_layer in self.attn_layers:
x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
attns.append(attn)
if self.norm is not None:
x = self.norm(x)
return x, attns
#定义了解码器的单个层结构,用于处理编码器的输出,并预测下一个序列
class DecoderLayer(nn.Module):
def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
dropout=0.1, activation="relu"):
super(DecoderLayer, self).__init__() #调用父类构造函数进行初始化
d_ff = d_ff or 4 * d_model
#self.self_attention和self.cross_attention:定义自注意力机制和交叉注意力机制。
self.self_attention = self_attention
self.cross_attention = cross_attention
#self.conv1和self.conv2、self.norm1、self.norm2、self.norm3、self.dropout:与编码层类似,定义相应的卷积层、归一化层和dropout层。
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.norm3 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
self.activation = F.relu if activation == "relu" else F.gelu
#定义解码层的前向传播流程,包括自注意力、交叉注意力和卷积操作
def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
x = x + self.dropout(self.self_attention(
x, x, x,
attn_mask=x_mask,
tau=tau, delta=None
)[0])
x = self.norm1(x)
x = x + self.dropout(self.cross_attention(
x, cross, cross,
attn_mask=cross_mask,
tau=tau, delta=delta
)[0])
y = x = self.norm2(x)
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
y = self.dropout(self.conv2(y).transpose(-1, 1))
return self.norm3(x + y)
#定义了解码器的整体结构
class Decoder(nn.Module):
def __init__(self, layers, norm_layer=None, projection=None):
super(Decoder, self).__init__()#调用父类构造函数进行初始化
#self.layers:创建一个模块列表,包含所有的解码层。
#self.norm:添加一个可选的层归一化操作。
#self.projection:如果有,添加一个输出层的全连接层。
self.layers = nn.ModuleList(layers)
self.norm = norm_layer
self.projection = projection
#定义解码器的前向传播流程,串联多个解码层
def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
for layer in self.layers:
x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta)
if self.norm is not None:
x = self.norm(x)
if self.projection is not None:
x = self.projection(x)
return x