资源及教程来自伯禹教育
先写一个简易的实现
import sys
import re
import os
import time
import torch as tc
import numpy as np
import torchvision as tv
import torchvision.transforms as transforms
from torch import nn, optim
from torch.nn import functional as F
from matplotlib import pyplot as plt
from torch.utils.data import DataLoader, TensorDataset
import collections
def load_data_jay_lyrics():
""" 读取歌词,返回文章所有字的索引,词对索引的映射,索引对词的映射,字典大小 """
with open('./data/歌词.txt', encoding='utf8', mode='r') as f:
corpus_chars = f.read()
corpus_chars = corpus_chars.replace('\n', ' ').replace('\r', ' ')
corpus_chars = corpus_chars[0:10000]
idx_to_char = list(set(corpus_chars))
char_to_idx = dict([(char, i) for i, char in enumerate(idx_to_char)])
vocab_size = len(char_to_idx)
corpus_indices = [char_to_idx[char] for char in corpus_chars]
return corpus_indices, char_to_idx, idx_to_char, vocab_size
corpus_indices, char_to_idx, idx_to_char, vocab_size = load_data_jay_lyrics()
device = tc.device('cuda' if tc.cuda.is_available() else 'cpu')
class RNNModel(nn.Module):
def __init__(self, rnn_layer: nn.RNN, vocab_size):
super().__init__()
self.rnn = rnn_layer
self.hidden_size = rnn_layer.hidden_size * (2 if rnn_layer.bidirectional else 1)
self.vocab_size = vocab_size
self.dense = nn.Linear(self.hidden_size, vocab_size)
@classmethod
def onehot(cls, x, vocab_size, dtype=tc.float32):
res = tc.zeros(x.shape[0], vocab_size, dtype=dtype, device=device)
res.scatter(1, x.long().view(-1, 1), 1)
return res
@classmethod
def to_onehot(cls, X, vocab_size):
return [cls.onehot(X[:, i], vocab_size) for i in range(X.shape[1])]
def forward(self, inputs, state):
# batch-size * num-steps
X = self.to_onehot(inputs, vocab_size)
X = tc.stack(X) # stack dim to 3 dimention
# num-steps * batch-size * vocab-size
Ht, state = self.rnn(X, state)
Ht = Ht.view(-1, Ht.shape[-1]) # (num_steps * batch_size, hidden_size)
output = self.dense(Ht) # vocab-size * (num-steps * batch-size)
return output, state
def predict_rnn(prefix, num_chars, model, device, idx_to_char, char_to_idx):
state = None
output = [char_to_idx[prefix[0]]]
for t in range(num_chars + len(prefix) - 1):
X = tc.tensor([output[-1]], device=device).view(1, 1)
Y, state = model(X, state)
if t < len(prefix) - 1:
output.append(char_to_idx[prefix[t + 1]])
else:
output.append(Y.argmax(dim=1).item())
return ''.join([idx_to_char[i] for i in output])
rnn_layer = nn.RNN(input_size=vocab_size, hidden_size=256)
model = RNNModel(rnn_layer, vocab_size).to(device)
res = predict_rnn('分开', 10, model, device, idx_to_char, char_to_idx)
print(res)
Out: