自然语言处理(二十三):Transformer解码器构建

自然语言处理笔记总目录


Transformer介绍

解码器层

class DecoderLayer(nn.Module):
    def __init__(self, size, self_attn, src_attn, feed_forward, dropout):
        """
        self_attn: 多头自注意力对象,Q=K=V
        src_attn: 多头注意力对象,Q!=K=V
        """
        super(DecoderLayer, self).__init__()
        
        self.size = size
        self.self_attn = self_attn
        self.src_attn = src_attn
        self.feed_forward = feed_forward
        # 解码器需要三个子层
        self.sublayer = clones(SublayerConnection(size, dropout), 3)
        
    def forward(self, x, memory, source_mask, target_mask):
        m = memory
        x = self.sublayer[0](x, lambda x: self.self_attn(x, x, x, target_mask))
        x = self.sublayer[1](x, lambda x: self.src_attn(x, m, m, source_mask))
        x = self.sublayer[2](x, self.feed_forward)
        return x
# 以下示例均为假数据,实际中self_attn = src_attn不同,source_mask和target_mask也并不相同,这里只是为了方便计算
head = 8
size = 512
d_model = 512
d_ff = 2048
dropout = 0.2
self_attn = src_attn = MultiHeadedAttention(head, d_model, dropout)
ff = PositionwiseFeedForward(d_model, d_ff, dropout)

x = out_pe
memory = out_en
mask = torch.zeros(8, 4, 4)
source_mask = target_mask = mask

dl = DecoderLayer(size, self_attn, src_attn, ff, dropout)
out_dl = dl(x, memory, source_mask, target_mask)

print(out_dl)
print(out_dl.shape)
tensor([[[-16.8412,  32.9487,  14.4696,  ..., -22.2736,  -0.1756,  -0.5874],
         [ 14.0010,   3.7020,   0.8705,  ...,  16.0435,  -0.7342, -27.5716],
         [ -6.6832, -50.7786,   0.3991,  ...,  35.4034, -34.8524, -10.2925],
         [  0.4025, -27.5343,   0.0897,  ...,  -4.3228, -52.6830, -26.3140]],

        [[-36.1756, -15.5990,  -6.8882,  ..., -27.8795, -22.8598,   9.3413],
         [-13.2061, -39.8130,  31.6627,  ..., -17.9774,   3.5719,   0.4909],
         [ -0.2878,  -2.6208,  -0.1447,  ...,  -6.8129,  -6.6932, -56.2726],
         [ -0.5078,  15.9361,   0.1945,  ...,   5.3691,   1.5384,  -0.4271]]],
       grad_fn=<AddBackward0>)
torch.Size([2, 4, 512])

解码器

class Decoder(nn.Module):
    def __init__(self, layer, N):
        super(Decoder, self).__init__()
        
        self.layers = clones(layer, N)
        self.norm = LayerNorm(layer.size)
        
    def forward(self, x, memory, source_mask, target_mask):
        for layer in self.layers:
            x = layer(x, memory, source_mask, target_mask)
        return self.norm(x)
size = 512
d_model = 512
head = 8
d_ff = 64
dropout = 0.2
c = copy.deepcopy
attn = MultiHeadedAttention(head, d_model)
ff = PositionwiseFeedForward(d_model, d_ff, dropout)
layer = DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout)
N = 8

x = out_pe
memory = out_en
mask = torch.zeros(8, 4, 4)
source_mask = target_mask = mask

de = Decoder(layer, N)
out_de = de(x, memory, source_mask, target_mask)

print(out_de)
print(out_de.shape)
tensor([[[-0.3795,  1.4082,  0.6244,  ..., -0.9057, -0.2942, -0.0643],
         [ 0.9174,  0.6259,  0.1907,  ...,  0.7216, -0.0158, -0.9123],
         [-0.0695, -1.6939,  0.0654,  ...,  1.3314, -1.5960, -0.5077],
         [ 0.2412, -0.7457,  0.1086,  ..., -0.2554, -2.3369, -1.0643]],

        [[-1.5241, -0.2561, -0.2190,  ..., -1.2681, -0.9377,  0.4868],
         [-0.5661, -1.0945,  1.4063,  ..., -0.7858,  0.1505,  0.1036],
         [-0.1120,  0.0913, -0.0218,  ..., -0.4020, -0.3154, -2.0716],
         [-0.1565,  0.9397, -0.0604,  ...,  0.1730,  0.0154,  0.1646]]],
       grad_fn=<AddBackward0>)
torch.Size([2, 4, 512])

输出部分

class Generator(nn.Module):
    def __init__(self, d_model, vocab_size):
        super(Generator, self).__init__()
        self.linear = nn.Linear(d_model, vocab_size)
    
    def forward(self, x):
        return F.log_softmax(self.linear(x), dim=-1)
d_model = 512
vocab_size = 1000

x = out_de
gen = Generator(d_model, vocab_size)
out_gen = gen(x)

print(out_gen)
print(out_gen.shape)
tensor([[[-7.2107, -6.8135, -7.3745,  ..., -6.8875, -7.7995, -6.8735],
         [-7.2454, -6.7102, -6.5739,  ..., -7.4684, -7.8100, -6.5677],
         [-6.7945, -6.4811, -7.0429,  ..., -6.1130, -7.7186, -8.6953],
         [-6.4648, -8.1545, -6.5278,  ..., -6.5628, -8.1096, -6.6423]],

        [[-6.6874, -6.4551, -7.2109,  ..., -7.2206, -7.1038, -6.5141],
         [-7.0045, -7.2391, -6.9019,  ..., -7.3764, -7.8051, -6.4741],
         [-7.1714, -6.9198, -7.1506,  ..., -5.9916, -6.7347, -7.1622],
         [-7.9641, -7.2477, -7.8616,  ..., -6.5662, -8.9810, -7.2106]]],
       grad_fn=<LogSoftmaxBackward>)
torch.Size([2, 4, 1000])
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

GeniusAng丶

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值