PyTorch中提供了两种有关 RNN 的封装,分别是 RNNCell()与 RNN() 。RNNCell 是 RNN 网络的核心单元,可以灵活调用。下面分别用这两种方式实现。
RNN实现字符排序
- 目标:
- 原理
循环神经网络中,激活函数 tanh 比较常用,因为 tanh ∈ \in ∈[-1, 1]
1. RNNCell
import torch
from torch.nn.functional import one_hot
1.1 数据预处理
把字符编码成 one-hot 编码
input_size = 4 # [1, 0, 0, 0]
hidden_size = 4
batch_size = 1 # 一个样本
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3] # The input sequence is 'hello'
y_data = [3, 1, 2, 3, 2] # The output sequence is 'ohlol'
# one-hot encoding
# pattern 1.
one_hot_lookup = [[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]]
# pattern 2.
# words = torch.tensor([0, 1, 2, 3], dtype=torch.long)
# one_hot_encoding = one_hot(words)
# print(one_hot_encoding)
# Convert indices into one-hot vector
x_one_hot = [one_hot_lookup[x] for x in x_data] # [seqLen, input_size]
# Reshape the inputs to (seqLen, batch_size, input_size)
inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size)
labels = torch.LongTensor(y_data).view(-1, 1)
1.2 定义 RNNCell 模型
class Model(torch.nn.Module):
def __init__(self, input_size, hidden_size, batch_size):
super(Model, self).__init__()
self.batch_size = batch_size
self.input_size = input_size
self.hidden_size = hidden_size
self.RNNcell = torch.nn.RNNCell(input_size=self.input_size, hidden_size=self.hidden_size)
def forward(self, input, hidden):
hidden = self.RNNcell(input, hidden)
return hidden
def init_hidden(self): # provide initial hidden(h0)
return torch.zeros(self.batch_size, self.hidden_size)
net = Model(input_size, hidden_size, batch_size) # 4, 4, 1
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.1)
1.3 训练模型
i = 0 # 显示迭代次数
for epoch in range(15):
print('=' * 24, i, '=' * 24)
i += 1
loss = 0
optimizer.zero_grad()
hidden = net.init_hidden()
print('Predicted string: ', end='')
# shape of inputs:(seqLen, batch_size, input_size)
# shape of input :(batch_size, input_size)
# shape of labels:(seqLen, 1)
# shape of label :(1)
for input, label in zip(inputs, labels):
# 通过不断地 for 循环,使这次的 hidden 结果,输入到下次。
hidden = net(input, hidden)
# 不要用loss.item(),因为一个循环只求出了一个seq的损失,要把所有seq的损失和加起来构造计算图,如上方最后一张图所示,所有seq的损失的和,才是最终的损失。
loss += criterion(hidden, label)
_, idx = hidden.max(dim=1) # 返回每一行最大概率的下标
print(idx2char[idx.item()], end='')
loss.backward()
optimizer.step()
print(', Epocn [%d / 15] loss=%.4f' % (epoch + 1, loss.item()))
运行结果:
======================== 0 ========================
Predicted string: ollll, Epocn [1 / 15] loss=5.4837
======================== 1 ========================
Predicted string: ollll, Epocn [2 / 15] loss=4.9311
======================== 2 ========================
Predicted string: ollll, Epocn [3 / 15] loss=4.4451
======================== 3 ========================
Predicted string: ohlll, Epocn [4 / 15] loss=3.9837
======================== 4 ========================
Predicted string: ohlol, Epocn [5 / 15] loss=3.6430
======================== 5 ========================
Predicted string: ohlol, Epocn [6 / 15] loss=3.4190
======================== 6 ========================
Predicted string: ohlol, Epocn [7 / 15] loss=3.1067
======================== 7 ========================
Predicted string: ohlol, Epocn [8 / 15] loss=2.8451
======================== 8 ========================
Predicted string: ohlol, Epocn [9 / 15] loss=2.6792
======================== 9 ========================
Predicted string: ohlol, Epocn [10 / 15] loss=2.5638
======================== 10 ========================
Predicted string: ohlol, Epocn [11 / 15] loss=2.4677
======================== 11 ========================
Predicted string: ohlol, Epocn [12 / 15] loss=2.3780
======================== 12 ========================
Predicted string: ohlol, Epocn [13 / 15] loss=2.2892
======================== 13 ========================
Predicted string: ohlol, Epocn [14 / 15] loss=2.2024
======================== 14 ========================
Predicted string: ohlol, Epocn [15 / 15] loss=2.1249
2. RNN
import torch
2.1 数据预处理
input_size2 = 4
hidden_size2 = 4
num_layers2 = 1
batch_size2 = 1 # 一个样本
seq_len2 = 5
idx2char2 = ['e', 'h', 'l', 'o']
x_data2 = [1, 0, 2, 2, 3] # The input sequence is 'hello'
y_data2 = [3, 1, 2, 3, 2] # The output sequence is 'ohlol'
one_hot_lookup2 = [[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]]
# Convert indices into one-hot vector
x_one_hot2 = [one_hot_lookup2[x2] for x2 in x_data2] # [seqLen, input_size]
# Reshape the inputs to (seqLen, batch_size, input_size)
inputs2 = torch.Tensor(x_one_hot2).view(seq_len2, batch_size2, input_size2)
labels2 = torch.LongTensor(y_data2) # 不同于上一个程序,(seqLen*batch_size, 1), 呼应Model2中forward的返回值
2.2 定义 RNN 模型
class Model2(torch.nn.Module):
def __init__(self, input_size2, hidden_size2, batch_size2, num_layers2):
super(Model2, self).__init__()
self.num_layers2 = num_layers2
self.batch_size2 = batch_size2
self.input_size2 = inp ut_size2
self.hidden_size2 = hidden_size2
self.RNN = torch.nn.RNN(input_size=self.input_size2,
hidden_size=self.hidden_size2,
num_layers=self.num_layers2)
def forward(self, input2):
# shape of hidden:(num_layers, batch_size, hidden_size)
hidden2 = torch.zeros(self.num_layers2, self.batch_size2, self.hidden_size2)
out, _ = self.RNN(input2, hidden2)
return out.view(-1, self.hidden_size2) # Reshape out to (seqLen*batch_size, hidden_size), 变成了一个矩阵,方便计算交叉熵。
net2 = Model2(input_size2, hidden_size2, batch_size2, num_layers2)
criterion2 = torch.nn.CrossEntropyLoss()
optimizer2 = torch.optim.Adam(net2.parameters(), lr=0.05)
2.3 训练模型
loss2 = 0
for epoch in range(15):
optimizer2.zero_grad()
outputs = net2(inputs2)
loss2 = criterion2(outputs, labels2)
loss2.backward()
optimizer2.step()
_, idx2 = outputs.max(dim=1)
idx2 = idx2.data.numpy()
print('Predicted: ', ''.join([idx2char2[x] for x in idx2]), end='')
print(', Epocn [%d / 15] loss=%.3f' % (epoch + 1, loss2.item()))
运行结果
Predicted: lllle, Epocn [1 / 15] loss=1.643
Predicted: lllle, Epocn [2 / 15] loss=1.489
Predicted: lllll, Epocn [3 / 15] loss=1.371
Predicted: lllll, Epocn [4 / 15] loss=1.262
Predicted: hllll, Epocn [5 / 15] loss=1.154
Predicted: hhlll, Epocn [6 / 15] loss=1.059
Predicted: hhlll, Epocn [7 / 15] loss=0.983
Predicted: hhlll, Epocn [8 / 15] loss=0.925
Predicted: hhlll, Epocn [9 / 15] loss=0.879
Predicted: hhlll, Epocn [10 / 15] loss=0.839
Predicted: ohlll, Epocn [11 / 15] loss=0.805
Predicted: ohlll, Epocn [12 / 15] loss=0.777
Predicted: ohlll, Epocn [13 / 15] loss=0.752
Predicted: ohlll, Epocn [14 / 15] loss=0.729
Predicted: ohlll, Epocn [15 / 15] loss=0.709