代码:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
torch.manual_seed(1)
# 基础知识复习 nn.LSTM 的输入与输出
"""
rnn = nn.LSTM(10, 20, 2) # input_size, hidden_size, num_layers
input = torch.randn(5, 3, 10) # seq_len, batch, input_size # randn 正态分布
h0 = torch.randn(2, 3, 20) # num_layers*num_directions, batch, hidden_size # initial hidden state for each element in the batch
c0 = torch.randn(2, 3, 20) # num_layers*num_directions, batch, hidden_size # initial cell state for each element in the batch
output, (hn, cn) = rnn(input, (h0, c0))
print(output.shape)
print(output)
print("\n")
print(hn)
print("\n")
print(cn)
"""
# SEQUENCE MODELS AND LONG-SHORT TERM MEMORY NETWORKS
# warm up
"""
# 单步lstm
lstm = nn.LSTM(3, 3)
inputs = [torch.randn(1, 3) for _ in range(5)]
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))
for i in inputs:
out, hidden = lstm(i.view(1, 1, -1), hidden)
print(out.shape)
print(out)
print(hidden)
print('------'*5)
# 直接全部送入lstm 结果一致
print('*****'*15)
inputs2 = torch.stack(inputs)
output, last_hidden = lstm(inputs2, hidden)
print(last_hidden)
"""
# ready
def prepare_sequence(seq, to_ix):
idxs = [to_ix[w] for w in seq]
return torch.tensor(idxs, dtype=torch.long)
training_data = [
("The cat ate the cheese".split(), ["DET", "NN", "V", "DET", "NN"]),
("she read that book".split(), ["NN", "V", "DET", "NN"]),
("The dog loves art".split(), ['DET', "NN", "V", "NN"]),
("The elephant answers the phone".split(), ["DET", "NN", "V", "DET", "NN"])
]
word_to_ix = {}
for sen, tags in training_data:
for word in sen:
if word not in word_to_ix:
word_to_ix[word] = len(word_to_ix)
print(word_to_ix)
tag_to_ix = {'DET': 0, "NN": 1, "V": 2}
ix_to_tag = {v:k for k, v in tag_to_ix.items()}
EMBEDDING_DIM = 6
HIDDEN_DIM = 6
class LSTMTagger(nn.Module):
def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
super(LSTMTagger, self).__init__()
self.hidden_dim = hidden_dim
self.word_embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim)
self.hidden2tag = nn.Linear(hidden_dim, tagset_size) # 举例比如我先定义好我的 这个网络是 nn.Linear(20, 30) 那么如果来一个 torch.randn(128, 20) --> 那么我就可以得到 [128, 30]
self.hidden = self.init_hidden()
def init_hidden(self):
return (torch.zeros(1, 1, self.hidden_dim),
torch.zeros(1, 1, self.hidden_dim))
def forward(self, sentence):
embeds = self.word_embedding(sentence)
# input的输入格式 seq_len, batch, input_size
lstm_out, self.hidden = self.lstm(embeds.view(len(sentence), 1, -1), self.hidden)
tag_outputs = self.hidden2tag(lstm_out.view(len(sentence), -1))
tag_scores = F.log_softmax(tag_outputs, dim=1)
return tag_scores
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)
# 训练前效果
print("训练前效果...")
test_sentence = "The cheese loves the elephant".lower().split()
inputs = prepare_sequence(test_sentence, word_to_ix)
tag_scores = model(inputs)
print(tag_scores)
_, predict_tags = torch.max(tag_scores, 1)
print("predict_tags:", predict_tags)
predict_tags = predict_tags.numpy().tolist()
print("predict_tags:", [ix_to_tag[i] for i in predict_tags])
# 训练后效果
n_epoch = 300
for epoch in range(n_epoch):
epoch_loss = 0.0
for sentence, tags in training_data:
model.zero_grad()
model.hidden = model.init_hidden()
sentence_in = prepare_sequence(sentence, word_to_ix)
targets = prepare_sequence(tags, tag_to_ix)
tag_scores = model(sentence_in)
loss = loss_function(tag_scores, targets)
epoch_loss += loss.item()
loss.backward()
optimizer.step()
if epoch%20 == 19:
print(epoch+1, epoch_loss/len(training_data))
print("训练后效果...")
test_sentence = "The cheese loves the elephant".lower().split()
inputs = prepare_sequence(test_sentence, word_to_ix)
tag_scores = model(inputs)
print(tag_scores)
_, predict_tags = torch.max(tag_scores, 1)
print("predict_tags:", predict_tags)
predict_tags = predict_tags.numpy().tolist()
print("predict_tags:", [ix_to_tag[i] for i in predict_tags])
结果:
{'The': 0, 'cat': 1, 'ate': 2, 'the': 3, 'cheese': 4, 'she': 5, 'read': 6, 'that': 7, 'book': 8, 'dog': 9, 'loves': 10, 'art': 11, 'elephant': 12, 'answers': 13, 'phone': 14}
训练前效果...
tensor([[-0.9160, -1.4268, -1.0222],
[-1.0699, -1.4256, -0.8756],
[-1.1754, -1.6048, -0.7126],
[-0.9610, -1.4437, -0.9638],
[-0.9996, -1.4030, -0.9516]], grad_fn=<LogSoftmaxBackward>)
predict_tags: tensor([0, 2, 2, 0, 2])
predict_tags: ['DET', 'V', 'V', 'DET', 'V']
20 0.9818110167980194
40 0.8441966474056244
60 0.6295402944087982
80 0.3856070041656494
100 0.19818489998579025
120 0.10504559241235256
140 0.06542468629777431
160 0.04578841384500265
180 0.03454794408753514
200 0.027419292367994785
220 0.02255500666797161
240 0.01905142469331622
260 0.01642173482105136
280 0.014383405912667513
300 0.012762402184307575
训练后效果...
tensor([[-1.0138e-02, -4.7908e+00, -6.3307e+00],
[-6.3856e+00, -5.2431e-03, -5.6426e+00],
[-6.1697e+00, -2.7313e+00, -6.9590e-02],
[-1.2358e-02, -4.9998e+00, -5.1953e+00],
[-6.6795e+00, -4.6397e-03, -5.6921e+00]], grad_fn=<LogSoftmaxBackward>)
predict_tags: tensor([0, 1, 2, 0, 1])
predict_tags: ['DET', 'NN', 'V', 'DET', 'NN']