seq2seq with attention代码实现
带有注意力的seq2seq模型理论,请参考:seq2seq + attention 详解
带有Luong attention 的seq2seq模型实现如下:
# coding = utf-8
# author = 'xy'
"""
model2: encoder + attn + decoder
we use Bi-gru as our encoder, gru as decoder, Luong attention(concat method) as our attention
It refers to paper "Effective Approaches to Attention-based Neural Machine Translation"
"""
import numpy as np
import torch
from torch import nn
from torch.nn import functional as f
import test_helper
class Encoder(nn.Module):
def __init__(self, input_size, hidden_size, embedding, num_layers=1, dropout=0.2):
super(Encoder, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.embedding = embedding
self.num_layers = num_layers
self.drop_out = dropout
self.rnn = nn.GRU(
input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
dropout=dropout,
bidirectional=True
)
def forward(self, src, src_len):
"""
:param src: tensor, cuda, (seq_len, batch_size)
:param src_len: tensor, (batch_size)
:return: outputs(seq_len, batch_size, hidden_size*2), h_t(num_layers, batch_size, hidden_size*2)
"""
src = self.embedding(src)
src = nn.utils.rnn.pack_padded_sequence(src, src_len)
outputs, h_t = self.rnn(src, None)
outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs)
h_t = torch.cat((h_t[0::2