最近在看NLP相关的孪生模型,看到这篇最原始的论文,突然想用pytorch实现一下,那就像下面这样吧!
模型:
import torch
import torch.nn as nn
class SIAMESE(torch.nn.Module):
def __init__(self):
super().__init__()
self.batch_size = 6
self.seq_len = 8
self.embedding_size = 64
self.lstm_hidden_size = 128
self.word_size = 100
self.word_embedding = nn.Embedding(self.word_size, self.embedding_size, padding_idx=self.seq_len)
self.encoder_leyer = nn.LSTM(input_size=64, hidden_size=128, bidirectional=True, batch_first=True)
self.aggregate = nn.Linear(4 * self.lstm_hidden_size, 2)
self.softmax = nn.Softmax()
def forward(self, seq1, seq2):
sq1_embedding = self.word_embedding(seq1)
sq2_embedding = self.word_embedding(seq2)
print("sq1_embedding:{}".format(sq1_embedding.shape)) #torch.Size([6, 8, 64])
encoder1 = self.encoder_leyer(sq1_embedding) #Outputs: output, (h_n, c_n)
encoder2 = self.encoder_leyer(sq2_embedding)
print("encoder1:{}".format(encoder1[1][1].shape)) #torch.Size([2, 6, 128])
encoder1_trans = torch.transpose(encoder1[1][1], 0, 1)
encoder2_trans = torch.transpose(encoder2[1][1], 0, 1)
print("encoder1_trans:{}".format(encoder1_trans.shape))
encoder = torch.cat([encoder1_trans, encoder2_trans], dim=1)
encoder_agg = torch.squeeze(encoder.view(self.batch_size,1,-1))
agg = self.aggregate(encoder_agg)
output = torch.softmax(agg, dim=-1)
return output
模拟一个简单的训练:
seq1 = torch.randint(1, 100, (6,8))
seq2 = torch.randint(1, 100, (6,8))
label = torch.LongTensor([1,1,0,0,1,1])
siamese = SIAMESE()
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(siamese.parameters(), lr=0.1)
for i in range(10):
out_put = siamese(seq1, seq2)
# print(out_put)
loss = loss_func(out_put, label)
print("loss --> {}".format(loss))
loss.backward()
optimizer.step()
# for key in siamese.named_parameters():
# print(key)