Day04-序列到序列学习的循环神经网络代码（李沐）

最新推荐文章于 2025-03-07 16:30:53 发布

Keenkey6

最新推荐文章于 2025-03-07 16:30:53 发布

阅读量359

点赞数

分类专栏：代码练习文章标签： pycharm

本文链接：https://blog.csdn.net/qq_55655082/article/details/124881541

版权

代码练习专栏收录该内容

8 篇文章

订阅专栏

这段代码展示了如何用PyTorch实现一个序列到序列（Seq2Seq）模型，包括编码器和解码器，以及训练和预测过程。编码器是基于GRU的循环神经网络，解码器同样使用GRU，并在预测时通过注意力机制选择源序列的上下文信息。训练过程中采用了带掩码的softmax交叉熵损失函数，以忽略填充的无效项。预测时，模型能够翻译输入的源序列并计算BLEU分数来评估翻译质量。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

import collections
import math
import torch
from torch import nn
from d2l import torch as d2l

#@save
class Seq2SeqEncoder(d2l.Encoder):
    """用于序列到序列学习的循环神经网络编码器"""
    def __init__(self,vocab_size,embed_size,num_hiddens,num_layers,
                 dropout=0,**kwargs):
        super(Seq2SeqEncoder, self).__init__(**kwargs)
        #嵌入层
        self.embedding=nn.Embedding(vocab_size,embed_size)
        #循环层
        self.rnn=nn.GRU(embed_size,num_hiddens,num_layers,
                        dropout=dropout)

        def forward(self,X,*args):
            X=self.embedding(X)
            X=X.permute(1,0,2)
            output,state=self.rnn(X)
            return output,state

#代码实现
encoder=Seq2SeqEncoder(vocab_size=10,embed_size=8,num_hiddens=16,
                        num_layers=2)
encoder.eval()
X=torch.zeros((4,7),dtype=torch.long)
output,state=encoder(X)

class Seq2SeqDecoder(d2l.Decoder):
    """用于序列到序列学习的循环神经网络编码器"""
    def __init__(self,vocab_size,embed_size,num_hiddens,num_layers,
                 dropout=0,**kwargs):
        super(Seq2SeqDecoder, self).__init__(**kwargs)
        #嵌入层
        self.embedding=nn.Embedding(vocab_size,embed_size)
        #循环层*n
        self.rnn=nn.GRU(embed_size+num_layers,num_hiddens,num_layers,
                        dropout=dropout)
        #全连接层
        self.dense=nn.Linear(num_hiddens,vocab_size)

    def init_state(self,enc_outputs,*args):
        return enc_outputs[1]

    def forward(self,X,state):
        X=self.embedding(X).permute(1,0,2)
        context=state[-1].repeat(X.shape[0],1,1)
        X_and_context=torch.cat((X,context),2)
        output,state=self.rnn(X_and_context,state)
        output=self.dense(output).permute((1,0,2))
        return output,state

#实例化解码器
decoder=Seq2SeqDecoder(vocab_size=10,embed_size=8,num_hiddens=16,
                       num_layers=2)
decoder.eval()
state=decoder.init_state(encoder(X))
output,state=decoder(X,state)

#损失函数,通过0屏蔽不相关的项
#@save
def sequence_mask(X,valid_len,value=0):
    """在序列中屏蔽不相关的项"""
    maxlen=X.size(1)
    mask=torch.arange((maxlen),dtype=torch.float32,
                      device=X.device)[None,:]<valid_len[:,None]
    X[~mask]=value
    return X

X=torch.tensor([[1,2,3],[4,5,6]])
sequence_mask(X,torch.tensor([1,2]))

X=torch.ones(2,3,4)
sequence_mask(X,torch.tensor([1,2]),value=-1)

#@save
class MaskedSoftmaxCELoss(nn.CrossEntropyLoss):
    """带遮蔽的softmax交叉损失函数"""
    def forward(self, pred,label,valid_len):
        # pred的形状：(batch_size,num_steps,vocab_size)
        # label的形状：(batch_size,num_steps)
        # valid_len的形状：(batch_size,)
        weights=torch.ones_like(label)
        weights=sequence_mask(weights,valid_len)
        self.reduction='none'
        unweighted_loss=super(MaskedSoftmaxCELoss,self).forward(
            pred.permute(0,2,1),label)
        weighted_loss=(unweighted_loss*weights).mean(dim=1)
        return weighted_loss

#三个代码健全性检查
loss=MaskedSoftmaxCELoss()
loss(torch.ones(3,4,10),torch.ones((3,4),dtype=torch.long),
     torch.tensor([4,2,0]))

#训练
#@save
def train_seq2seq(net,data_iter,lr,num_epochs,tgt_vocab,device):
    """训练序列到训练序列"""
    def xavier_init_weights(m):
        if type(m)==nn.Linear:
            nn.init.xavier_uniform_(m.weight)
        if type(m)==nn.GRU:
            for param in m._flat_weights_names:
                if "weight" in param:
                    nn.init.xavier_uniform_(m.parameters[param])

    net.apply(xavier_init_weights)
    net.to(device)
    optimizer=torch.optim.Adam(net.parameters(),lr=lr)
    loss=MaskedSoftmaxCELoss()
    net.train()
    animator=d2l.Animator(xlabel='epoch',ylabel='loss',
                          xlim=[10,num_epochs])
    for epoch in range(num_epochs):
        timer=d2l.Timer()
        metric=d2l.Accmulator(2)
        for batch in data_iter:
            optimizer.zero_grad()
            X,X_valid_len,Y,Y_valid_len=[x.to(device) for x in batch]
            bos=torch.tensor([tgt_vocab['<bos>']]*Y.shape[0],
                             device=device).reshape(-1,1)
            dec_input=torch.cat([bos,Y[:,:-1]],1)
            Y_hat,_=net(X,dec_input,X_valid_len)
            l=loss(Y_hat,Y,Y_valid_len)
            l.sum().backward()
            d2l.grad_clipping(net,1)
            num_tokens=Y_valid_len.sum()
            optimizer.step()
            with torch.no_grad():
                metric.add(l.sum(),num_tokens)
        if(epoch+1)%10==0:
            animator.add(epoch+1,(metric[0]/metric[1],))
    print(f'loss{metric[0]/metric[1]:.3f},{metric[1]/timer.stop():.1f}'
          f'token/sec on {str(device)}')

#创建和训练一个循环神经网络“编码器-解码器”模型
embed_size,num_hiddens,num_layers,dropout=32,32,2,0.1
banch_size,num_steps=64,10
lr,num_epochs,device=0.005,300,d2l.try_gpu()

train_iter,src_vocab,tgt_vocab=d2l.load_data_nmt(banch_size,num_steps)
encoder=Seq2SeqEncoder(len(src_vocab),embed_size,num_hiddens,num_layers,dropout)
decoder=Seq2SeqDecoder(len(tgt_vocab),embed_size,num_hiddens,num_layers,dropout)
net=d2l.EncoderDercoder(encoder,decoder)
train_seq2seq(net,train_iter,lr,num_epochs,tgt_vocab,device)

#预测
#@save
def predict_seq2seq(net,src_sentence,src_vocab,tgt_vocab,num_steps,
                    device,save_attention_weights=False)
    """序列到序列模型的预备"""
    #在预测时将net设置为评估模式
    net.eval()
    src_tokens=src_vocab[src_sentence.lower().split('')]+[src_vocab['<eos>']]
    enc_valid_len=torch.tensor([len(src_tokens)],device=device)
    src_tokens=d2l.truncate_pad(src_tokens,num_steps,src_vocab['<pad>'])

    #添加批量轴
    enc_X=torch.unsqueeze(
        torch.tensor(src_tokens,dtype=torch.long,device=device),dim=0)
    enc_outputs=net.encoder(enc_X,enc_valid_len)
    dec_state=net.decoder.init_state(enc_outputs,enc_valid_len)

    #添加批量轴
    dec_X = torch.unsqueeze(torch.tensor(
        [tgt_vocab['<bos>']], dtype=torch.long, device=device), dim=0)
    outputs_seq,attention_weight_seq=[],[]
    for _ in range(num_steps):
        Y,dec_state=net.decoder(dec_X,dec_state)
        dec_X=Y.argmax(dim=2)
        pred=dec_X.squeeze(dim=0).type(torch.int32).item()
        if save_attention_weights:
            attention_weight_seq.append(net.decoder.attention_weights)
        if pred==tgt_vocab['<eos>']:
            break
        outputs_seq.append(pred)
    return  ''.join(tgt_vocab.to_tokens(outputs_seq)),attention_weight_seq

#BLEU评估
def bleu(pred_seq,label_seq,k):#@save
    """计算bleu"""
    pred_tokens,label_tokens=pred_seq.split(''),label_seq.split('')
    len_pred,len_label=len(pred_tokens),len(label_tokens)
    score=math.exp(min(0,1-len_label/len_pred))
    for n in range(1,k+1):
        num_matchs,label_subs=0,collections.defaultdict(int)
        for i in range(len_label-n+1):
            label_subs[''.join(pred_tokens[i:i+n])]+=1
        for i in range(len_pred-n+1):
            if label_subs[''.join(pred_tokens[i:i+n])]>0:
                num_matchs+=1
                label_subs[''.join(pred_tokens[i:i+n])]-=1
        score*=math.pow(num_matchs/(len_pred-n+1),math.pow(0.5,n))
    return score

#模型翻译后计算bleu最终结果
engs=['go.',"i lost.",'he\'s calm.','i\'m home.']
fras=['va !','j\'ai perdu.', 'il est calme .', 'je suis chez moi .']
for eng,fra in zip(engs,fras):
    translation,attention_weight_seq=predict_seq2seq(
        net,eng,src_vocab,tgt_vocab,num_steps,device)
    print(f'{eng}=>{translation},bleu{bleu(translation,fra,k=2):.3f}')