RNN实现字符排序

欢迎关注
PyTorch中提供了两种有关 RNN 的封装,分别是 RNNCell()与 RNN() 。RNNCell 是 RNN 网络的核心单元,可以灵活调用。下面分别用这两种方式实现。

RNN实现字符排序

  • 目标:
    shadow
  • 原理
    shadow-图

循环神经网络中,激活函数 tanh 比较常用,因为 tanh ∈ \in [-1, 1]

1. RNNCell

import torch
from torch.nn.functional import one_hot

1.1 数据预处理

把字符编码成 one-hot 编码

input_size = 4  # [1, 0, 0, 0]
hidden_size = 4
batch_size = 1  # 一个样本
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]  # The input sequence is 'hello'
y_data = [3, 1, 2, 3, 2]  # The output sequence is 'ohlol'

# one-hot encoding
# pattern 1. 
one_hot_lookup = [[1, 0, 0, 0],
                  [0, 1, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]]
# pattern 2. 
# words = torch.tensor([0, 1, 2, 3], dtype=torch.long)
# one_hot_encoding = one_hot(words)
# print(one_hot_encoding)

# Convert indices into one-hot vector
x_one_hot = [one_hot_lookup[x] for x in x_data]  # [seqLen, input_size]
# Reshape the inputs to (seqLen, batch_size, input_size) 
inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size)
labels = torch.LongTensor(y_data).view(-1, 1)  

1.2 定义 RNNCell 模型

class Model(torch.nn.Module):
    def __init__(self, input_size, hidden_size, batch_size):
        super(Model, self).__init__()
        self.batch_size = batch_size
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.RNNcell = torch.nn.RNNCell(input_size=self.input_size, hidden_size=self.hidden_size)

    def forward(self, input, hidden):
        hidden = self.RNNcell(input, hidden)
        return hidden 
        
    def init_hidden(self):  # provide initial hidden(h0)
        return torch.zeros(self.batch_size, self.hidden_size)
net = Model(input_size, hidden_size, batch_size)  # 4, 4, 1
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.1)

1.3 训练模型

i = 0  # 显示迭代次数
for epoch in range(15):
    print('=' * 24, i, '=' * 24)
    i += 1
    loss = 0
    optimizer.zero_grad()
    hidden = net.init_hidden()
    print('Predicted string: ', end='')
    # shape of inputs:(seqLen, batch_size, input_size)
    # shape of input :(batch_size, input_size)
    # shape of labels:(seqLen, 1)
    # shape of label :(1)
    for input, label in zip(inputs, labels): 
        # 通过不断地 for 循环,使这次的 hidden 结果,输入到下次。 
        hidden = net(input, hidden)
        # 不要用loss.item(),因为一个循环只求出了一个seq的损失,要把所有seq的损失和加起来构造计算图,如上方最后一张图所示,所有seq的损失的和,才是最终的损失。
        loss += criterion(hidden, label)  
        _, idx = hidden.max(dim=1)  # 返回每一行最大概率的下标
        print(idx2char[idx.item()], end='')
    loss.backward()
    optimizer.step()
    print(', Epocn [%d / 15] loss=%.4f' % (epoch + 1, loss.item()))

运行结果:

======================== 0 ========================
Predicted string: ollll, Epocn [1 / 15] loss=5.4837
======================== 1 ========================
Predicted string: ollll, Epocn [2 / 15] loss=4.9311
======================== 2 ========================
Predicted string: ollll, Epocn [3 / 15] loss=4.4451
======================== 3 ========================
Predicted string: ohlll, Epocn [4 / 15] loss=3.9837
======================== 4 ========================
Predicted string: ohlol, Epocn [5 / 15] loss=3.6430
======================== 5 ========================
Predicted string: ohlol, Epocn [6 / 15] loss=3.4190
======================== 6 ========================
Predicted string: ohlol, Epocn [7 / 15] loss=3.1067
======================== 7 ========================
Predicted string: ohlol, Epocn [8 / 15] loss=2.8451
======================== 8 ========================
Predicted string: ohlol, Epocn [9 / 15] loss=2.6792
======================== 9 ========================
Predicted string: ohlol, Epocn [10 / 15] loss=2.5638
======================== 10 ========================
Predicted string: ohlol, Epocn [11 / 15] loss=2.4677
======================== 11 ========================
Predicted string: ohlol, Epocn [12 / 15] loss=2.3780
======================== 12 ========================
Predicted string: ohlol, Epocn [13 / 15] loss=2.2892
======================== 13 ========================
Predicted string: ohlol, Epocn [14 / 15] loss=2.2024
======================== 14 ========================
Predicted string: ohlol, Epocn [15 / 15] loss=2.1249

2. RNN

import torch

2.1 数据预处理

input_size2 = 4
hidden_size2 = 4
num_layers2 = 1
batch_size2 = 1  # 一个样本
seq_len2 = 5
idx2char2 = ['e', 'h', 'l', 'o']
x_data2 = [1, 0, 2, 2, 3]  # The input sequence is 'hello'
y_data2 = [3, 1, 2, 3, 2]  # The output sequence is 'ohlol'
one_hot_lookup2 = [[1, 0, 0, 0],
                  [0, 1, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]]
# Convert indices into one-hot vector
x_one_hot2 = [one_hot_lookup2[x2] for x2 in x_data2]  # [seqLen, input_size]
# Reshape the inputs to (seqLen, batch_size, input_size) 
inputs2 = torch.Tensor(x_one_hot2).view(seq_len2, batch_size2, input_size2)
labels2 = torch.LongTensor(y_data2)  # 不同于上一个程序,(seqLen*batch_size, 1), 呼应Model2中forward的返回值

2.2 定义 RNN 模型

class Model2(torch.nn.Module):
    def __init__(self, input_size2, hidden_size2, batch_size2, num_layers2):
        super(Model2, self).__init__()
        self.num_layers2 = num_layers2
        self.batch_size2 = batch_size2
        self.input_size2 = inp      ut_size2
        self.hidden_size2 = hidden_size2
        self.RNN = torch.nn.RNN(input_size=self.input_size2, 
        						hidden_size=self.hidden_size2,
        						num_layers=self.num_layers2)

    def forward(self, input2):
        # shape of hidden:(num_layers, batch_size, hidden_size)
        hidden2 = torch.zeros(self.num_layers2, self.batch_size2, self.hidden_size2)
        out, _ = self.RNN(input2, hidden2)
        return out.view(-1, self.hidden_size2)  # Reshape out to (seqLen*batch_size, hidden_size), 变成了一个矩阵,方便计算交叉熵。
net2 = Model2(input_size2, hidden_size2, batch_size2, num_layers2)
criterion2 = torch.nn.CrossEntropyLoss()
optimizer2 = torch.optim.Adam(net2.parameters(), lr=0.05)

2.3 训练模型

loss2 = 0
for epoch in range(15):
    optimizer2.zero_grad()
    outputs = net2(inputs2)
    loss2 = criterion2(outputs, labels2)
    loss2.backward()
    optimizer2.step()

    _, idx2 = outputs.max(dim=1)
    idx2 = idx2.data.numpy()
    print('Predicted: ', ''.join([idx2char2[x] for x in idx2]), end='')
    print(', Epocn [%d / 15] loss=%.3f' % (epoch + 1, loss2.item()))

运行结果

Predicted:  lllle, Epocn [1 / 15] loss=1.643
Predicted:  lllle, Epocn [2 / 15] loss=1.489
Predicted:  lllll, Epocn [3 / 15] loss=1.371
Predicted:  lllll, Epocn [4 / 15] loss=1.262
Predicted:  hllll, Epocn [5 / 15] loss=1.154
Predicted:  hhlll, Epocn [6 / 15] loss=1.059
Predicted:  hhlll, Epocn [7 / 15] loss=0.983
Predicted:  hhlll, Epocn [8 / 15] loss=0.925
Predicted:  hhlll, Epocn [9 / 15] loss=0.879
Predicted:  hhlll, Epocn [10 / 15] loss=0.839
Predicted:  ohlll, Epocn [11 / 15] loss=0.805
Predicted:  ohlll, Epocn [12 / 15] loss=0.777
Predicted:  ohlll, Epocn [13 / 15] loss=0.752
Predicted:  ohlll, Epocn [14 / 15] loss=0.729
Predicted:  ohlll, Epocn [15 / 15] loss=0.709
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值