pytorch官方文档链接:RNN — PyTorch 1.13 documentation
RNN被用于处理序列数据:
RNN cell:
cell = torch.nn.RNNCell(input_size=input_size, hidden_size=hidden_size)中:
- input_size:x(t)的维度
- hidden_size:h(t-1)的维度
假设我们有一个序列具有如下属性:
- batchSize = 1
- seqLen = 3
- inputSize = 4
- hiddenSize = 2
序列通过shape变形成一个张量:𝑑𝑎𝑡𝑎𝑠𝑒𝑡. 𝑠ℎ𝑎𝑝𝑒 =(𝒔𝒆𝒒𝑳𝒆𝒏, 𝒃𝒂𝒕𝒄𝒉𝑺𝒊𝒛𝒆, 𝒊𝒏𝒑𝒖𝒕𝑺𝒊𝒛𝒆)
由序列生成序列:
num_layers:RNN模型的层数。如下图,num_layers = 3
batch_first参数:
接下来是一个例子:sequence to sequence
先将序列转化为数字向量,在传进RNNCell:
RNN的output应该是预测的向量
直接上代码,注释里有思路
import torch
# 参数表
input_size = 4
hidden_size = 4
batch_size = 1
seq_len = 5
# 构建字典,将序列转换为数字向量
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3] # input
y_data = [3, 1, 2, 3, 2] # output
one_hot_lookup = [[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]]
x_one_hot = [one_hot_lookup[x] for x in x_data] # 将索引转换为one_hot向量
# 将inputs重塑为(seq_len, batch_size, input_size)
inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size)
# 将labels重塑为(seq_len, 1)
labels = torch.LongTensor(y_data).view(-1, 1)
class Model(torch.nn.Module):
def __init__(self, input_size, hidden_size, batch_size):
super(Model, self).__init__()
# 初始化参数
self.batch_size = batch_size
self.input_size = input_size
self.hidden_size = hidden_size
self.rnncell = torch.nn.RNNCell(input_size=self.input_size,
hidden_size=self.hidden_size)
def forward(self, input, hidden):
hidden = self.rnncell(input, hidden)
return hidden
def init_hidden(self):
# 提供初始化的hidden
return torch.randn(self.batch_size, self.hidden_size)
net = Model(input_size, hidden_size, batch_size)
# 优化
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.1)
# 训练过程
for epoch in range(15):
loss = 0
optimizer.zero_grad()
hidden = net.init_hidden()
print('Predicted string: ', end='')
# inputs的维度:(seq_len, batch_size, input_size); labels的维度:(seq_len, 1)
# input的维度:(batch_size, hidden_size); label的维度:(1)
for input, label in zip(inputs, labels):
hidden = net(input, hidden) # RNN Cell
loss += criterion(hidden, label)
_, idx = hidden.max(dim=1) # 输出prediction
print(idx2char[idx.item()], end='')
loss.backward()
optimizer.step()
print(', Epoch [%d/15] loss=%.4f' % (epoch+1, loss.item()))
运行结果:
进行优化,使用RNN代替RNN Cell:
import torch
input_size = 4
hidden_size = 4
num_layers = 1
batch_size = 1
seq_len = 5
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]
one_hot_lookup = [[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]]
x_one_hot = [one_hot_lookup[x] for x in x_data]
# inputs的维度为:(seq_len, batch_size, hidden_size)
inputs = torch.Tensor(x_one_hot).view(seq_len, batch_size, input_size)
# labels的维度为:(seq_len*batch_size, 1)
labels = torch.LongTensor(y_data)
class Model(torch.nn.Module):
def __init__(self, input_size, hidden_size, batch_size, num_layers):
super(Model, self).__init__()
# input_size=4,hidden_size=4,
# num_layers=1,batch_size=1,seq_len=5
self.num_layers = num_layers
self.batch_size = batch_size
self.input_size = input_size
self.hidden_size = hidden_size
self.rnn = torch.nn.RNN(input_size=self.input_size,
hidden_size=self.hidden_size,
num_layers=num_layers)
def forward(self, input):
hidden = torch.zeros(self.num_layers,
self.batch_size,
self.hidden_size)
out, _ = self.rnn(input, hidden)
# 将out重塑为(seq_len*batch_size,hidden_size)
return out.view(-1, self.hidden_size)
net = Model(input_size, hidden_size, batch_size, num_layers)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)
# 训练过程
for epoch in range(15):
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
_, idx = outputs.max(dim=1)
idx = idx.data.numpy()
print('Predicted: ', ''.join([idx2char[x] for x in idx]), end='')
print(', Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))
运行结果:
关于one-hot编码:(18条消息) one-hot理解_Dorothy_Xue的博客-CSDN博客
one-hot编码是:高维的、稀疏的、硬编码的
而我们需要一个:低维的、稠密的、贴合数据特征的编码方式
一个非常流行且有效的方法:embedding
例如:2被映射为向量(0.3896,1.6526,-0.9640,1.2836,1.2430)
加入embedding和Linear层:
代码:
import torch
# parameters
num_class = 4
input_size = 4
hidden_size = 8
embedding_size = 10
num_layers = 2
batch_size = 1
seq_len = 5
idx2char = ['e', 'h', 'l', 'o']
x_data = [[1, 0, 2, 2, 3]] # (batch, seq_len)
y_data = [3, 1, 2, 3, 2] # (batch * seq_len)
inputs = torch.LongTensor(x_data) # (batch_size, seq_len)
labels = torch.LongTensor(y_data) # (batch_size, seq_len)
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.emb = torch.nn.Embedding(
input_size, embedding_size) # 查阅embedding矩阵
# RNN输入:(batch_size, seq_len, embedding_size)
# RNN输出:(batch_size, seq_len, hidden_size)
self.rnn = torch.nn.RNN(input_size=embedding_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True)
# fc层输入:(batch_size, seq_len, hidden_size)
# fc层输出:(batch_size, seq_len, num_class)
self.fc = torch.nn.Linear(hidden_size, num_class)
def forward(self, x):
hidden = torch.zeros(num_layers, x.size(0), hidden_size)
# 输入:LongTensor(batch_size, seq_len)
# 输出:(batch, seqLen, embeddingSize)
# 注意 batch first!!!
x = self.emb(x)
x, _ = self.rnn(x, hidden)
x = self.fc(x)
return x.view(-1, num_class) # 重构结果以便使用Cross Entropy
net = Model()
# 优化
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)
for epoch in range(15):
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
_, idx = outputs.max(dim=1)
idx = idx.data.numpy()
print('Predicted: ', ''.join([idx2char[x] for x in idx]), end='')
print(', Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))
运行结果:
LSTM(Long-Short Term Memory)神经网络:torch.nn — PyTorch 1.13 documentation
GRU(Gate Recurrent Memory):torch.nn — PyTorch 1.13 documentation
例子:名称国家分类
因为不需要sequence-to-sequence,所以可以简单一些:
模型如下:
数据准备
Bi-dirctional of RNN/LSTM/GRU:
代码如下:
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pack_sequence, pad_packed_sequence
import gzip
import csv
import time
# Parameters
HIDDEN_SIZE = 100
BATCH_SIZE = 256
N_LAYER = 2
N_EPOCHS = 100
N_CHARS = 128
USE_GPU = False
class NameDataset(Dataset):
def __init__(self, is_train_set=True):
# 从.gz文件读取数据,利用gzip和csv
filename = 'data/names_train.csv.gz' if is_train_set else 'data/names_test.csv.gz'
with gzip.open(filename, 'rt') as f:
reader = csv.reader(f)
rows = list(reader)
# 把name和country保存到list中
self.names = [row[0] for row in rows]
self.len = len(self.names)
self.countries = [row[1] for row in rows]
# 将countries和它们的索引保存在列表和字典中
self.country_list = list(sorted(set(self.countries)))
self.country_dict = self.getCountryDict()
self.country_num = len(self.country_list)
def __getitem__(self, index):
# 获取name-country(index)对
return self.names[index], self.country_dict[self.countries[index]]
def __len__(self):
# 数据集长度
return self.len
def getCountryDict(self):
country_dict = dict()
for idx, country_name in enumerate(self.country_list, 0):
country_dict[country_name] = idx
return country_dict
def idx2country(self, index):
# 通过索引返回country
return self.country_list[index]
def getCountriesNum(self):
# 返回countries数量
return self.country_num
class RNNClassifier(torch.nn.Module):
def __init__(self, input_size, hidden_size, output_size, n_layers=1, bidirectional=True):
super(RNNClassifier, self).__init__()
self.hidden_size = hidden_size # GRU参数
self.n_layers = n_layers # GRU参数
self.n_directions = 2 if bidirectional else 1
# input:(seq_len,batch_size); output:(seq_len,batch_size,hidden_size)
self.embedding = torch.nn.Embedding(input_size, hidden_size)
# inputs of GRU layer:
# input:(seq_len,batch_size,hidden_size)
# output:(n_layers*n_dirctions,batch_size,hidden_size)
# outputs of GRU layer:
# output:(seq_len,batch_size,hidden_size*n_dirctions)
# hidden:(n_layers*n_dirctions,batch_size,hidden_size)
self.gru = torch.nn.GRU(hidden_size, hidden_size, n_layers,
bidirectional=bidirectional)
self.fc = torch.nn.Linear(hidden_size * self.n_directions, output_size)
def _init_hidden(self, batch_size):
hidden = torch.zeros(self.n_layers * self.n_directions,
batch_size, self.hidden_size)
return create_tensor(hidden)
def forward(self, input, seq_lengths):
# input shape : B x S -> S x B
input = input.t()
batch_size = input.size(1)
# 初始化hidden为:(n_layer*n_dirctions,batch_size,hidden_size)
hidden = self._init_hidden(batch_size)
# embedding的结果:(seq_len,batch_size,hidden_size)
embedding = self.embedding(input)
# output:元组
# hidden:(n_layer*n_dirction,batch_size,hidden_size)
output, hidden = self.gru(embedding, hidden)
if self.n_directions == 2:
# 如果使用bidirction GRU,forward hidden和backward hidden应该被链接起来
hidden_cat = torch.cat([hidden[-1], hidden[-2]], dim=1)
else:
hidden_cat = hidden[-1]
fc_output = self.fc(hidden_cat) # 使用linear 分类器
return fc_output
def make_tensors(names, countries):
sequences_and_lengths = [name2list(name) for name in names]
name_sequences = [sl[0] for sl in sequences_and_lengths]
seq_lengths = torch.LongTensor([sl[1] for sl in sequences_and_lengths])
countries = countries.long()
# make tensor of name, BatchSize x SeqLen
# padding
seq_tensor = torch.zeros(len(name_sequences), seq_lengths.max()).long()
for idx, (seq, seq_len) in enumerate(zip(name_sequences, seq_lengths), 0):
seq_tensor[idx, :seq_len] = torch.LongTensor(seq)
return create_tensor(seq_tensor), \
create_tensor(seq_lengths),\
create_tensor(countries)
def name2list(name):
arr = [ord(c) for c in name]
return arr, len(arr)
def create_tensor(tensor):
if USE_GPU:
device = torch.device("cuda:0")
tensor = tensor.to(device)
return tensor
def trainModel():
total_loss = 0
for i, (names, countries) in enumerate(trainloader, 1):
inputs, seq_lengths, target = make_tensors(names, countries)
output = classifier(inputs, seq_lengths) # forward-计算output
loss = criterion(output, target) # forward-计算loss
optimizer.zero_grad() # zero grad-梯度置0
loss.backward() # backward
optimizer.step() # update
total_loss += loss.item()
if i % 10 == 0:
print(f'[{i * len(inputs)}/{len(trainset)}] ', end='')
print(f'loss={total_loss / (i * len(inputs))}')
return total_loss
def testModel():
correct = 0
total = len(testset)
print("evaluating trained model ...")
# 告诉PyTorch不要计算梯度,节省时间与内存
with torch.no_grad():
for i, (names, countries) in enumerate(testloader, 1):
# 计算模型输出
inputs, seq_lengths, target = make_tensors(names, countries)
output = classifier(inputs, seq_lengths)
# 计算正确预测的数量
pred = output.max(dim=1, keepdim=True)[1]
correct += pred.eq(target.view_as(pred)).sum().item()
percent = '%.2f' % (100 * correct / total)
print(f'Test set: Accuracy {correct}/{total} {percent}%')
return correct / total
def plot_acc(acc_list):
import matplotlib.pyplot as plt
import numpy as np
epoch = np.arange(1, len(acc_list) + 1, 1)
acc_list = np.array(acc_list)
plt.plot(epoch, acc_list)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.grid()
plt.show()
if __name__ == '__main__':
# 准备Dataset和DataLoader
trainset = NameDataset(is_train_set=True)
trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
testset = NameDataset(is_train_set=False)
testloader = DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)
# output的大小
N_COUNTRY = trainset.getCountriesNum()
# 实例化分类器模型
classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_COUNTRY, N_LAYER)
if USE_GPU:
device = torch.device("cuda:0")
classifier.to(device)
# 把cross_entropy_loss作为loss函数
# 用Adam优化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)
print("Training for %d epochs..." % N_EPOCHS)
acc_list = [] # 记录测试得到的精确度
for epoch in range(1, N_EPOCHS + 1):
# Train cycle
# 每一次循环,训练并测试模型一次
print('epoch: %d' % epoch)
trainModel()
acc = testModel()
acc_list.append(acc)
plot_acc(acc_list)