-
模型结构图
-
模型实现
# code by Tae Hwan Jung @graykode modified by 前行follow import torch import torch.nn as nn import torch.optim as optim # 将输入和输出封装成小批量 输出是由前N-1个词进行预测 def make_batch(): input_batch = [] target_batch = [] for sen in sentences: word = sen.split() # space tokenizer input = [word_dict[n] for n in word[:-1]] # create (1~n-1) as input target = word_dict[word[-1]] # create (n) as target, We usually call this 'casual language model' input_batch.append(input) target_batch.append(target) return input_batch, target_batch # Model class NNLM(nn.Module): def __init__(self): super(NNLM, self).__init__() self.C = nn.Embedding(n_class, m) # 嵌入层 从index到m维词向量 self.H = nn.Linear(n_step * m, n_hidden, bias=False) # 从m维词向量变成n_hidden维 self.d = nn.Parameter(torch.ones(n_hidden)) # 加上隐层偏置 self.U = nn.Linear(n_hidden, n_class, bias=False) # 隐藏层到输出层 self.W = nn.Linear(n_step * m, n_class, bias=False) # 输入层到隐藏层 self.b = nn.Parameter(torch.ones(n_class)) # 输出层偏置 def forward(self, X): X = self.C(X) # X : [batch_size, n_step, m] X = X.view(-1, n_step * m) # [batch_size, n_step * m] tanh = torch.tanh(self.d + self.H(X)) # [batch_size, n_hidden] output = self.b + self.W(X) + self.U(tanh) # [batch_size, n_class] # 一个线性层+一个非线性层+偏置 return output # 主函数 if __name__ == '__main__': n_step = 2 # number of steps, n-1 in paper n_hidden = 2 # number of hidden size, h in paper m = 2 # embedding size, m in paper sentences = ["i like dog", "i love coffee", "i hate milk"] word_list = " ".join(sentences).split() word_list = list(set(word_list)) word_dict = {w: i for i, w in enumerate(word_list)} number_dict = {i: w for i, w in enumerate(word_list)} n_class = len(word_dict) # number of Vocabulary model = NNLM() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) input_batch, target_batch = make_batch() input_batch = torch.LongTensor(input_batch) target_batch = torch.LongTensor(target_batch) # Training 训练5000个epoch for epoch in range(5000): optimizer.zero_grad() output = model(input_batch) # output : [batch_size, n_class], target_batch : [batch_size] loss = criterion(output, target_batch) if (epoch + 1) % 1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) loss.backward() optimizer.step() # Predict 进行预测和输出 predict = model(input_batch).data.max(1, keepdim=True)[1] # Test print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])
-
Reference:
1 . 源码实现
06-22
717
03-05
06-13
323
08-29
4602
08-26
1381