pytorch lstm - 来自官网

代码:

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

# 基础知识复习 nn.LSTM 的输入与输出
"""
rnn = nn.LSTM(10, 20, 2)  # input_size, hidden_size, num_layers

input = torch.randn(5, 3, 10)  # seq_len, batch, input_size   # randn 正态分布

h0 = torch.randn(2, 3, 20)  # num_layers*num_directions, batch, hidden_size  # initial hidden state for each element in the batch

c0 = torch.randn(2, 3, 20)  # num_layers*num_directions, batch, hidden_size  # initial cell state for each element in the batch

output, (hn, cn) = rnn(input, (h0, c0))

print(output.shape)
print(output)
print("\n")
print(hn)
print("\n")
print(cn)
"""

# SEQUENCE MODELS AND LONG-SHORT TERM MEMORY NETWORKS
# warm up
"""
# 单步lstm
lstm = nn.LSTM(3, 3)
inputs = [torch.randn(1, 3) for _ in range(5)]
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))

for i in inputs:
    out, hidden = lstm(i.view(1, 1, -1), hidden)
    print(out.shape)
    print(out)
    print(hidden)
    print('------'*5)

# 直接全部送入lstm 结果一致
print('*****'*15)
inputs2 = torch.stack(inputs)
output, last_hidden = lstm(inputs2, hidden)
print(last_hidden)
"""


# ready
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)


training_data = [
    ("The cat ate the cheese".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("she read that book".split(), ["NN", "V", "DET", "NN"]),
    ("The dog loves art".split(), ['DET', "NN", "V", "NN"]),
    ("The elephant answers the phone".split(), ["DET", "NN", "V", "DET", "NN"])
]

word_to_ix = {}
for sen, tags in training_data:
    for word in sen:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)

print(word_to_ix)
tag_to_ix = {'DET': 0, "NN": 1, "V": 2}
ix_to_tag = {v:k for k, v in tag_to_ix.items()}

EMBEDDING_DIM = 6
HIDDEN_DIM = 6


class LSTMTagger(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embedding = nn.Embedding(vocab_size, embedding_dim)

        self.lstm = nn.LSTM(embedding_dim, hidden_dim)

        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)  # 举例比如我先定义好我的 这个网络是 nn.Linear(20, 30)  那么如果来一个 torch.randn(128, 20)  -->  那么我就可以得到 [128, 30]

        self.hidden = self.init_hidden()

    def init_hidden(self):
        return (torch.zeros(1, 1, self.hidden_dim),
                torch.zeros(1, 1, self.hidden_dim))

    def forward(self, sentence):
        embeds = self.word_embedding(sentence)

        # input的输入格式  seq_len, batch, input_size
        lstm_out, self.hidden = self.lstm(embeds.view(len(sentence), 1, -1), self.hidden)

        tag_outputs = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_outputs, dim=1)

        return tag_scores



model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)


# 训练前效果
print("训练前效果...")
test_sentence = "The cheese loves the elephant".lower().split()
inputs = prepare_sequence(test_sentence, word_to_ix)

tag_scores = model(inputs)
print(tag_scores)
_, predict_tags = torch.max(tag_scores, 1)
print("predict_tags:", predict_tags)
predict_tags = predict_tags.numpy().tolist()
print("predict_tags:", [ix_to_tag[i] for i in predict_tags])


# 训练后效果
n_epoch = 300
for epoch in range(n_epoch):
    epoch_loss = 0.0

    for sentence, tags in training_data:
        model.zero_grad()
        model.hidden = model.init_hidden()
        sentence_in = prepare_sequence(sentence, word_to_ix)
        targets = prepare_sequence(tags, tag_to_ix)

        tag_scores = model(sentence_in)

        loss = loss_function(tag_scores, targets)
        epoch_loss += loss.item()
        loss.backward()

        optimizer.step()

    if epoch%20 == 19:
        print(epoch+1, epoch_loss/len(training_data))

print("训练后效果...")
test_sentence = "The cheese loves the elephant".lower().split()
inputs = prepare_sequence(test_sentence, word_to_ix)

tag_scores = model(inputs)
print(tag_scores)
_, predict_tags = torch.max(tag_scores, 1)
print("predict_tags:", predict_tags)
predict_tags = predict_tags.numpy().tolist()
print("predict_tags:", [ix_to_tag[i] for i in predict_tags])


结果:

{'The': 0, 'cat': 1, 'ate': 2, 'the': 3, 'cheese': 4, 'she': 5, 'read': 6, 'that': 7, 'book': 8, 'dog': 9, 'loves': 10, 'art': 11, 'elephant': 12, 'answers': 13, 'phone': 14}
训练前效果...
tensor([[-0.9160, -1.4268, -1.0222],
        [-1.0699, -1.4256, -0.8756],
        [-1.1754, -1.6048, -0.7126],
        [-0.9610, -1.4437, -0.9638],
        [-0.9996, -1.4030, -0.9516]], grad_fn=<LogSoftmaxBackward>)
predict_tags: tensor([0, 2, 2, 0, 2])
predict_tags: ['DET', 'V', 'V', 'DET', 'V']
20 0.9818110167980194
40 0.8441966474056244
60 0.6295402944087982
80 0.3856070041656494
100 0.19818489998579025
120 0.10504559241235256
140 0.06542468629777431
160 0.04578841384500265
180 0.03454794408753514
200 0.027419292367994785
220 0.02255500666797161
240 0.01905142469331622
260 0.01642173482105136
280 0.014383405912667513
300 0.012762402184307575
训练后效果...
tensor([[-1.0138e-02, -4.7908e+00, -6.3307e+00],
        [-6.3856e+00, -5.2431e-03, -5.6426e+00],
        [-6.1697e+00, -2.7313e+00, -6.9590e-02],
        [-1.2358e-02, -4.9998e+00, -5.1953e+00],
        [-6.6795e+00, -4.6397e-03, -5.6921e+00]], grad_fn=<LogSoftmaxBackward>)
predict_tags: tensor([0, 1, 2, 0, 1])
predict_tags: ['DET', 'NN', 'V', 'DET', 'NN']

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值