Transformers中的编码器层及编码器的实现

编码器层

        编码器通过堆叠多个编码器层实现对输入数据的特征提取,这些层连续工作以完成编码过程。

import copy
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

torch.set_printoptions(sci_mode=False)


# 构建编码器层
class EncoderLayer(nn.Module):
    def __init__(self, embedding_dim, self_attention, feed_forward, dropout):
        """初始化编码器层.

        参数:
        - embedding_dim (int): 嵌入维度大小,也用作层大小.
        - self_attention (nn.Module): 多头自注意力机制的实例.
        - feed_forward (nn.Module): 前馈全连接层的实例.
        - dropout (float): 用于正则化的dropout率.
        """
        super(EncoderLayer, self).__init__()

        self.self_attention = self_attention
        self.feed_forward = feed_forward

        # 使用克隆函数创建两个子层连接结构
        self.sublayers = clones(SublayerConnectionWithNormalization(embedding_dim, dropout), 2)
        self.embedding_dim = embedding_dim

    def forward(self, input_tensor, mask):
        """编码器层的前向传播

        参数:
        - input_tensor (torch.Tensor): 来自上一层的输入张量.
        - mask (torch.Tensor): 注意力机制的掩码张量.

        返回:
        - torch.Tensor: 层处理后的输出张量.
        """
        # 第一个子层连接,包括多头自注意力
        input_tensor = self.sublayers[0](input_tensor, lambda x: self.self_attention(x, x, x, mask))

        # 第二个子层连接,包括前馈全连接层
        return self.sublayers[1](input_tensor, self.feed_forward)


if __name__ == "__main__":
    # 设置参数
    test_embedding_dim = 512
    test_vocab_size = 10000
    test_max_len = 100
    test_heads = 8
    test_dropout = 0.2
    d_ffl = 64
    size = d_model = test_embedding_dim

    # 文本嵌入层
    text_embeddings = TextEmbeddings(test_vocab_size, test_embedding_dim)
    test_input_tensor = torch.LongTensor([[1, 2, 3, 4], [4, 3, 2, 1]])
    text_embeddings_output = text_embeddings(test_input_tensor)

    # 添加位置编码
    positional_encoding = PositionalEncoding(test_embedding_dim, dropout=0.1,
                                             max_sequence_length=test_max_len)
    positional_encoded_output = positional_encoding(text_embeddings_output)

    # 多头注意力
    test_mask = torch.zeros(8, 4, 4)  # 令mask为一个8x4x4的零张量
    self_mha = MultiHeadedAttention(test_heads, d_model)

    # 前馈全连接层
    ffl = FeedForwardLayer(d_model, d_ffl, test_dropout)

    # 构建编码器层
    el = EncoderLayer(size, self_mha, ffl, test_dropout)
    el_result = el(positional_encoded_output, test_mask)
    print("Encoder Layer Output:\n", el_result)
    print("Shape of Encoder Layer Output:", el_result.shape)

编码器

import copy
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from copy import deepcopy

torch.set_printoptions(sci_mode=False)


class TransformerEncoder(nn.Module):
    def __init__(self, encoder_layer, num_layers):
        """初始化Transformer编码器

        参数:
        - encoder_layer (nn.Module): 单个编码器层的实例
        - num_layers (int): 编码器层的数量
        """
        super(TransformerEncoder, self).__init__()
        # 克隆多个编码器层
        self.encoder_layers = clones(encoder_layer, num_layers)
        # 初始化一个规范化层
        self.norm_layer = NormalizationLayer(encoder_layer.embedding_dim)

    def forward(self, input_tensor, mask):
        """编码器的前向传播

        参数:
        - input_tensor (torch.Tensor): 上一层的输出
        - mask (torch.Tensor): 掩码张量

        返回:
        - torch.Tensor: 编码器的输出
        """
        # 依次通过每个编码器层
        for layer in self.encoder_layers:
            input_tensor = layer(input_tensor, mask)
        # 通过规范化层
        return self.norm_layer(input_tensor)


if __name__ == "__main__":
    # 设置参数
    test_embedding_dim = 512
    test_vocab_size = 10000
    test_max_len = 100
    test_heads = 8
    test_dropout = 0.2
    d_ffl = 64
    size = d_model = test_embedding_dim

    # 文本嵌入层
    text_embeddings = TextEmbeddings(test_vocab_size, test_embedding_dim)
    test_input_tensor = torch.LongTensor([[1, 2, 3, 4], [4, 3, 2, 1]])
    text_embeddings_output = text_embeddings(test_input_tensor)

    # 添加位置编码
    positional_encoding = PositionalEncoding(test_embedding_dim, dropout=0.1,
                                             max_sequence_length=test_max_len)
    positional_encoded_output = positional_encoding(text_embeddings_output)

    # 多头注意力
    test_mask = torch.zeros(8, 4, 4)  # 令mask为一个8x4x4的零张量
    self_mha = MultiHeadedAttention(test_heads, d_model)

    # 前馈全连接层
    ffl = FeedForwardLayer(d_model, d_ffl, test_dropout)

    # 构建编码器层
    # 需要深度拷贝子层的各个对象
    el = EncoderLayer(size, deepcopy(self_mha), deepcopy(ffl), test_dropout)

    # 构建编码器
    test_num_layers = 4   # 编码器层数
    encoder = TransformerEncoder(el, test_num_layers)
    en_result = encoder(positional_encoded_output, test_mask)
    print(f"Encoder Output:\n{en_result}")
    print(f"Shape of Encoder Output: {en_result.shape}")

Encoder Output:
tensor([[[-0.6755, -1.1236, -0.1131,  ..., -0.0815,  0.3898,  0.7918],
         [ 0.7663, -0.6230, -1.2164,  ...,  0.0106, -0.7013,  0.2994],
         [-0.2404,  1.0819,  1.5586,  ..., -0.1301,  1.7509,  0.1889],
         [-0.3832, -0.0331, -0.4277,  ...,  1.1210, -0.9636, -2.4338]],

        [[-0.3616, -1.8817, -0.4713,  ...,  1.2407, -0.8797, -2.4673],
         [-0.2006,  0.0076,  1.6668,  ..., -0.1171,  1.7598,  0.2362],
         [ 0.7781,  0.0376, -1.2090,  ..., -1.1441, -0.7755,  0.2996],
         [-0.6776, -1.2891, -0.2176,  ..., -0.0245,  0.4007,  0.7322]]],
       grad_fn=<AddBackward0>)
Shape of Encoder Output: torch.Size([2, 4, 512])

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值