5. LSTM Pytorch load

最新推荐文章于 2024-01-22 11:48:02 发布
zmjames2000
最新推荐文章于 2024-01-22 11:48:02 发布
阅读量159
点赞数
分类专栏： NLP Pytorch 文章标签： pytorch torch.load model lstm
本文链接：https://blog.csdn.net/zmjames2000/article/details/101699845
版权
NLP 同时被 2 个专栏收录
15 篇文章 0 订阅
订阅专栏
Pytorch
8 篇文章 1 订阅
订阅专栏
# #-*- coding:utf-8 -*-

import torchtext
from torchtext.vocab import Vectors
import torch
import numpy as np
import random,os

USE_CUDA = torch.cuda.is_available()

random.seed(1000)
np.random.seed(1000)
torch.manual_seed(1000)
if USE_CUDA:
    torch.cuda.manual_seed_all(1000)

device = torch.device('cuda' if USE_CUDA else 'cpu')
BATCH_SIZE =  32
EMBEDDING_SIZE = 650
MAX_VOCAB_SIZE = 50000

DATA_PATH  = r'./data/demo10_pytorch_skip-Gram'
TRAIN_DATA = 'text8.train.txt'
TEST_DATA  = 'text8.test.txt'
VALI_DATA  = 'text8.dev.txt'
SAVE_MODEL = DATA_PATH + os.sep + 'lossm.pth'

TEXT = torchtext.data.Field(lower=True)
train, val, test = torchtext.datasets.LanguageModelingDataset.splits(path=DATA_PATH,
                                                                     text_field=TEXT,
                                                                     train=TRAIN_DATA,
                                                                     validation=VALI_DATA,
                                                                     test=TEST_DATA)
TEXT.build_vocab(train, max_size=MAX_VOCAB_SIZE)
VOCAB_SIZE = len(TEXT.vocab)
# print(VOCAB_SIZE, TEXT.vocab.itos[:100])
# TEXT.vocab.itos is list  TEXT.vocab.stoi is dict

train_iter, val_iter, test_iter = torchtext.data.BPTTIterator.splits(datasets=(train, val, test),
                                                                     batch_size=BATCH_SIZE,
                                                                     device=device,
                                                                     bptt_len=50, #往回传的长度， 自定义
                                                                     repeat=False, # 写完文件不会重复
                                                                     shuffle=True)
# it = iter(train_iter)
# batch = next(it)
# print(batch)
# [torchtext.data.batch.Batch of size 32]
# 	[.text]:[torch.cuda.LongTensor of size 50x32 (GPU 0)]
# 	[.target]:[torch.cuda.LongTensor of size 50x32 (GPU 0)]
# print( ' '.join([TEXT.vocab.itos[i] for i in batch.text[:,0].data.cpu()]))
# anarchism originated as a term of abuse first used against early working class radicals including the diggers of the english revolution and the sans <unk> of the french revolution whilst the term is still used in a pejorative way to describe any act that used violent means to destroy the
# print( ' '.join([TEXT.vocab.itos[i] for i in batch.target[:,0].data.cpu()]))
# originated as a term of abuse first used against early working class radicals including the diggers of the english revolution and the sans <unk> of the french revolution whilst the term is still used in a pejorative way to describe any act that used violent means to destroy the organization
# 他们差了一个单词

class RNNModel(torch.nn.Module):
    def __init__(self, rnn_type, vocab_size, embed_size, hidden_size, nlayers, dropout=0.5):
        ''' 该模型包含以下几层:
            - 词嵌入层
            - 一个循环神经网络层(RNN, LSTM, GRU)
            - 一个线性层，从hidden state到输出单词表
            - 一个dropout层，用来做regularization
        '''
        super(RNNModel, self).__init__()
        self.drop = torch.nn.Dropout(dropout)
        # nn.Embedding -- rnn -- nn.Linear
        self.encoder = torch.nn.Embedding(vocab_size, embed_size) # 50000  650
        if rnn_type in ['LSTM','GRU']:
            self.rnn = getattr(torch.nn, rnn_type)(embed_size, hidden_size, nlayers, dropout=dropout)  # 650  hidden_size
        else:
            try:
                nonlinearity = {'RNN_TANH':'tanh', 'RNN_RELU':'relu'}[rnn_type]
            except KeyError:
                raise ValueError("""An invalid option for `--model` was supplied,
                        options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""")
            self.rnn = torch.nn.RNN(embed_size, hidden_size, nlayers, nonlinearity=nonlinearity, dropout=dropout)

        self.decoder = torch.nn.Linear(hidden_size, vocab_size) # hidden_size  vocab_size

        self.init_weights()

        self.rnn_type = rnn_type
        self.hidden_size = hidden_size
        self.nlayers = nlayers

    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.zero_()

    def forward(self, input, hidden): # 输入数据， 使用上面定义的层，输出数据
        ''' Forward pass:
            - word embedding
            - 输入循环神经网络
            - 一个线性层从hidden state转化为输出单词表
        '''
        # input.size  [ seg_length, batch_size]
        # nn.Embedding -- rnn -- nn.Linear
        #  self.encoder-- self.rnn--self.decoder
        emb = self.drop(self.encoder(input))  # [ seg_length, batch_size, embed_size]
        output,hidden = self.rnn(emb, hidden)
        # output : [ seg_length, batch_size, hidden_size]
        # hidden: [num_layer * batch_size * hidden_size, num_layer * batch_size * hidden_size ]
        # output = self.drop(output)
        decoded = self.decoder(self.drop(output.view(-1, output.size(2)))) # linear需要的是
        return decoded.view(output.size(0),output.size(1),-1), hidden

    def init_hidden(self, bsz, requires_grad=True):
        weight = next(self.parameters()) # 是iter
        if self.rnn_type =='LSTM':
            return (weight.new_zeros((self.nlayers, bsz, self.hidden_size), requires_grad=requires_grad),
                    weight.new_zeros((self.nlayers, bsz, self.hidden_size),requires_grad=requires_grad))
        else:
            return weight.new_zeros((self.nlayers, bsz, self.hidden_size), requires_grad=requires_grad)

model = RNNModel(rnn_type='LSTM',vocab_size=VOCAB_SIZE, embed_size=EMBEDDING_SIZE,hidden_size=100, nlayers=1)
if USE_CUDA:
    model = model.to(device)
# print(model)
# RNNModel(
#   (drop): Dropout(p=0.5, inplace=False)
#   (encoder): Embedding(50002, 650)
#   (rnn): LSTM(650, 100, dropout=0.5)
#   (decoder): Linear(in_features=100, out_features=50002, bias=True)
# )
# print(next(model.parameters()))

# 我们需要定义下面的一个function，帮助我们把一个hidden state和计算图之前的历史分离。
def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""
    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)  # lstm 有2个  看  hidden = model.init_hidden(BATCH_SIZE)

# 我们首先定义评估模型的代码。
# 模型的评估和模型的训练逻辑基本相同，唯一的区别是我们只需要forward pass，不需要backward pass
def evaluate(model, data):# 处理验证集的数据
    model.eval()
    total_loss = 0.
    it = iter(data)
    total_count = 0
    with torch.no_grad(): # 确保下面没有 grad
        hidden = model.init_hidden(BATCH_SIZE, requires_grad=False)
        for i, batch in enumerate(it):
            data, target = batch.text, batch.target
            if USE_CUDA:
                data, target = data.cuda(), target.cuda()
            hidden = repackage_hidden(hidden)
            with torch.no_grad():
                output, hidden = model(data, hidden)
            loss = loss_fn(output.view(-1, VOCAB_SIZE), target.view(-1))
            total_count += np.multiply(*data.size())
            total_loss  += loss.item() * np.multiply(*data.size()) # *data.size()相当于把元素拆开，相乘

    loss = total_loss/total_count
    model.train()
    return  loss


NUM_EPOCHS = 2
GRAD_CLIP = 5.
val_losses = []
loss_fn = torch.nn.CrossEntropyLoss()
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.5) # 降learning_rate 0.5降一半


best_model = RNNModel("LSTM", vocab_size=VOCAB_SIZE,embed_size=EMBEDDING_SIZE,hidden_size=100,nlayers=1)
if USE_CUDA:
    best_model = best_model.cuda()
best_model.load_state_dict(torch.load(SAVE_MODEL))

val_loss = evaluate(best_model, val_iter)
print("preplexity:", np.exp(val_loss))

test_loss = evaluate(best_model, test_iter)
print("perplexity: ", np.exp(test_loss))


hidden = best_model.init_hidden(1)
device = torch.device( 'cuda' if USE_CUDA else 'cpu')
input = torch.randint(VOCAB_SIZE, (1,1), dtype=torch.long).to(device)
words = []
for i in range(100):
    output, hidden = best_model(input, hidden)
    word_weights = output.squeeze().exp().cpu()
    word_idx = torch.multinomial(word_weights, 1)[0]
    input.fill_(word_idx)
    word = TEXT.vocab.itos[word_idx]
    words.append(word)
print(' '.join(words))