pytorch lstm - 来自官网

最新推荐文章于 2021-10-07 14:45:51 发布

sinat_15355869

最新推荐文章于 2021-10-07 14:45:51 发布

阅读量386

点赞数

分类专栏： pytorch

本文链接：https://blog.csdn.net/sinat_15355869/article/details/102525743

版权

pytorch 专栏收录该内容

27 篇文章 0 订阅

订阅专栏

代码：

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

# 基础知识复习 nn.LSTM 的输入与输出
"""
rnn = nn.LSTM(10, 20, 2)  # input_size, hidden_size, num_layers

input = torch.randn(5, 3, 10)  # seq_len, batch, input_size   # randn 正态分布

h0 = torch.randn(2, 3, 20)  # num_layers*num_directions, batch, hidden_size  # initial hidden state for each element in the batch

c0 = torch.randn(2, 3, 20)  # num_layers*num_directions, batch, hidden_size  # initial cell state for each element in the batch

output, (hn, cn) = rnn(input, (h0, c0))

print(output.shape)
print(output)
print("\n")
print(hn)
print("\n")
print(cn)
"""

# SEQUENCE MODELS AND LONG-SHORT TERM MEMORY NETWORKS
# warm up
"""
# 单步lstm
lstm = nn.LSTM(3, 3)
inputs = [torch.randn(1, 3) for _ in range(5)]
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))

for i in inputs:
    out, hidden = lstm(i.view(1, 1, -1), hidden)
    print(out.shape)
    print(out)
    print(hidden)
    print('------'*5)

# 直接全部送入lstm 结果一致
print('*****'*15)
inputs2 = torch.stack(inputs)
output, last_hidden = lstm(inputs2, hidden)
print(last_hidden)
"""


# ready
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)


training_data = [
    ("The cat ate the cheese".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("she read that book".split(), ["NN", "V", "DET", "NN"]),
    ("The dog loves art".split(), ['DET', "NN", "V", "NN"]),
    ("The elephant answers the phone".split(), ["DET", "NN", "V", "DET", "NN"])
]

word_to_ix = {}
for sen, tags in training_data:
    for word in sen:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)

print(word_to_ix)
tag_to_ix = {'DET': 0, "NN": 1, "V": 2}
ix_to_tag = {v:k for k, v in tag_to_ix.items()}

EMBEDDING_DIM = 6
HIDDEN_DIM = 6


class LSTMTagger(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embedding = nn.Embedding(vocab_size, embedding_dim)

        self.lstm = nn.LSTM(embedding_dim, hidden_dim)

        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)  # 举例比如我先定义好我的 这个网络是 nn.Linear(20, 30)  那么如果来一个 torch.randn(128, 20)  -->  那么我就可以得到 [128, 30]

        self.hidden = self.init_hidden()

    def init_hidden(self):
        return (torch.zeros(1, 1, self.hidden_dim),
                torch.zeros(1, 1, self.hidden_dim))

    def forward(self, sentence):
        embeds = self.word_embedding(sentence)

        # input的输入格式  seq_len, batch, input_size
        lstm_out, self.hidden = self.lstm(embeds.view(len(sentence), 1, -1), self.hidden)

        tag_outputs = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_outputs, dim=1)

        return tag_scores



model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)


# 训练前效果
print("训练前效果...")
test_sentence = "The cheese loves the elephant".lower().split()
inputs = prepare_sequence(test_sentence, word_to_ix)

tag_scores = model(inputs)
print(tag_scores)
_, predict_tags = torch.max(tag_scores, 1)
print("predict_tags:", predict_tags)
predict_tags = predict_tags.numpy().tolist()
print("predict_tags:", [ix_to_tag[i] for i in predict_tags])


# 训练后效果
n_epoch = 300
for epoch in range(n_epoch):
    epoch_loss = 0.0

    for sentence, tags in training_data:
        model.zero_grad()
        model.hidden = model.init_hidden()
        sentence_in = prepare_sequence(sentence, word_to_ix)
        targets = prepare_sequence(tags, tag_to_ix)

        tag_scores = model(sentence_in)

        loss = loss_function(tag_scores, targets)
        epoch_loss += loss.item()
        loss.backward()

        optimizer.step()

    if epoch%20 == 19:
        print(epoch+1, epoch_loss/len(training_data))

print("训练后效果...")
test_sentence = "The cheese loves the elephant".lower().split()
inputs = prepare_sequence(test_sentence, word_to_ix)

tag_scores = model(inputs)
print(tag_scores)
_, predict_tags = torch.max(tag_scores, 1)
print("predict_tags:", predict_tags)
predict_tags = predict_tags.numpy().tolist()
print("predict_tags:", [ix_to_tag[i] for i in predict_tags])

结果：

{'The': 0, 'cat': 1, 'ate': 2, 'the': 3, 'cheese': 4, 'she': 5, 'read': 6, 'that': 7, 'book': 8, 'dog': 9, 'loves': 10, 'art': 11, 'elephant': 12, 'answers': 13, 'phone': 14}
训练前效果...
tensor([[-0.9160, -1.4268, -1.0222],
        [-1.0699, -1.4256, -0.8756],
        [-1.1754, -1.6048, -0.7126],
        [-0.9610, -1.4437, -0.9638],
        [-0.9996, -1.4030, -0.9516]], grad_fn=<LogSoftmaxBackward>)
predict_tags: tensor([0, 2, 2, 0, 2])
predict_tags: ['DET', 'V', 'V', 'DET', 'V']
20 0.9818110167980194
40 0.8441966474056244
60 0.6295402944087982
80 0.3856070041656494
100 0.19818489998579025
120 0.10504559241235256
140 0.06542468629777431
160 0.04578841384500265
180 0.03454794408753514
200 0.027419292367994785
220 0.02255500666797161
240 0.01905142469331622
260 0.01642173482105136
280 0.014383405912667513
300 0.012762402184307575
训练后效果...
tensor([[-1.0138e-02, -4.7908e+00, -6.3307e+00],
        [-6.3856e+00, -5.2431e-03, -5.6426e+00],
        [-6.1697e+00, -2.7313e+00, -6.9590e-02],
        [-1.2358e-02, -4.9998e+00, -5.1953e+00],
        [-6.6795e+00, -4.6397e-03, -5.6921e+00]], grad_fn=<LogSoftmaxBackward>)
predict_tags: tensor([0, 1, 2, 0, 1])
predict_tags: ['DET', 'NN', 'V', 'DET', 'NN']