12循环神经网络(基础篇)

RNN的简单介绍和实现

导入包库

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt

torch.nn.RNNCell的简单例子

单层RNN:

在这里插入图片描述

batchSize = 1
seqLen = 3    # x1 x2 x3
inputSize = 4
hiddenSize = 2

# 初始化
cell = nn.RNNCell(input_size=inputSize, hidden_size=hiddenSize)

# 构造数据
dataset = torch.randn(seqLen, batchSize, inputSize)
# 隐层,权重设为全0
hidden = torch.zeros(batchSize, hiddenSize)

for idx, input in enumerate(dataset):   # 分别取x1, x2, x3
    print('=' * 20, idx, '=' * 20)
    print('Input size: ', input.shape)

    hidden = cell(input, hidden)

    print('outputs size: ', hidden.shape)
    print(hidden)
print('-' * 50)

torch.nn.RNN的简单例子

多层RNN:

在这里插入图片描述
其中,inputs = [ x 1 x_1 x1 x 2 x_2 x2 x N x_N xN],outputs = [ h 1 h_1 h1 h 2 h_2 h2 h N h_N hN]。 h 0 1 h_0^1 h01是第一隐层的初始 h 0 h_0 h0 h N 1 h_N^1 hN1是第一隐层的输出 h N h_N hN

# 多层RNN
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 1

cell = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)

inputs = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(num_layers, batchSize, hiddenSize)   # h0

out, hidden = cell(inputs, hidden)

print('Output size: ', out.shape)
print('Output: ', out)
print('Hidden size: ', hidden.shape)
print('Hidden: ', hidden)

print('-' * 50)

简单的Seq2Seq例子

要求:训练一个模型来学习"hello" -> “ohlol”
在这里插入图片描述

由于"hello"和"ohlol"是str,无法直接参与计算,所以要转换为对应的index。这里使用的是one-hot vectors。
在这里插入图片描述

使用torch.nn.RNNCell

'''
	因为有四种分类,所以 input size = 4
'''
input_size = 4
batch_size = 1
hidden_size = 4

# 把str映射到index
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]

# one-hot vectors
one_hot_lookup = [[1, 0, 0, 0],
                  [0, 1, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]]

x_one_hot = [one_hot_lookup[x] for x in x_data]

inputs = torch.tensor(x_one_hot, dtype=torch.float32).view(-1, batch_size, input_size)
labels = torch.LongTensor(y_data).view(-1, 1)  # (seqLen, 1)

# 构造模型
class Model(nn.Module):
    def __init__(self, input_size, batch_size, hidden_size):
        super(Model, self).__init__()
        self.input_size = input_size
        self.batch_size = batch_size
        self.hidden_size = hidden_size

        self.rnncell = nn.RNNCell(input_size=self.input_size, hidden_size=self.hidden_size)

    def forward(self, input, hidden):
        hidden = self.rnncell(input, hidden)
        return hidden

    def init_hidden(self):
        return torch.zeros(self.batch_size, self.hidden_size)    # h0

net = Model(input_size, batch_size, hidden_size)


# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.1)

for epoch in range(15):
    loss = 0
    optimizer.zero_grad()    # grad清零

    hidden = net.init_hidden()   # h0
    print("Predicted string: ", end='')
    for input, label in zip(inputs, labels):
        hidden = net(input, hidden)
        loss += criterion(hidden, label)    # 一次循环是一个序列的loss,应该计算总序列的loss,所以应该相加,最后再反向传播求梯度
        _, idx = hidden.max(dim=1)          # 分类最大值的下标
        print(idx2char[idx.item()], end='')
        
    loss.backward()
    optimizer.step()
    print(', Epoch [%d/15] loss=%.4f' % (epoch+1, loss.item()))

使用torch.nn.RNN

# use torch.nn.RNN
input_size = 4
batch_size = 1
hidden_size = 4
num_layers = 1

# 转换为numbers
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]

one_hot_lookup = [[1, 0, 0, 0],
                  [0, 1, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]]

x_one_hot = [one_hot_lookup[idx] for idx in x_data]  # 将输入转换为one-hot

inputs = torch.tensor(x_one_hot, dtype=torch.float32).view(-1, batch_size, input_size)   # 改变形状为(seqLen, batchSize, inputSize)
labels = torch.LongTensor(y_data)  # (seqLen, 1)

# 定义网络结构
class Net(nn.Module):
    def __init__(self, input_size, batch_size, hidden_size, num_layers):
        super(Net, self).__init__()
        self.input_size = input_size
        self.batch_size = batch_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.rnn = nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers)

    def forward(self, x, hidden):   # 传入x和h
        out, hidden = self.rnn(x, hidden)
        return out.view(-1, self.hidden_size)   # (batch_size * seqLen, hidden_size)

    def init_hidden(self):
        return torch.zeros(self.num_layers, self.batch_size, self.hidden_size)    # h0,也可以不在此定义,在外面直接传入

model = Net(input_size, batch_size, hidden_size, num_layers)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)


for epoch in range(15):
    optimizer.zero_grad()
    hidden = model.init_hidden()   # h0

    # forward
    # torch.nn.RNN自己会循环操作,所以不需要我们单独去一个一个地循环处理。
    outputs = model(inputs, hidden)
    l = criterion(outputs, labels)

    # backward and update
    l.backward()
    optimizer.step()

    _, predicted = outputs.max(dim=1)   # 分类最大值的下标
    predicted = predicted.data.numpy()
    print('Predicted string: ', ''.join([idx2char[idx] for idx in predicted]), end='')
    print(', Epoch [%d/15] loss=%.4f' % (epoch+1, l.item()))

Using embedding and linear layer

one-hot vectors 缺点:

  • 维度高
  • 稀疏
  • 硬编码(对应关系是设置好的,而不是学习到的)

解决办法之一:使用 Embedding vectors
在这里插入图片描述
网络结构:
在这里插入图片描述

# Embedding and linear layers
# data
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]

# nn.Embedding要求input是LongTensor
inputs = torch.LongTensor(x_data)
labels = torch.LongTensor(y_data)

# parameters
num_class = 4
input_size = 4
hidden_size = 8
embedding_size = 10
num_layers = 2
batch_size = 1
seq_len = 5

# input should be (batchSize, seqLen),Target should be (batchSize*seqLen)
inputs = inputs.view(batch_size, seq_len)

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()

        self.emd = nn.Embedding(input_size, embedding_size)   # 输入的one-hot是4维的(因为input size=4)     Output:(*, embedding_size),其中*是 input shape
        self.rnn = nn.RNN(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)

        self.fc = nn.Linear(hidden_size, num_class)

    def forward(self, x):
        hidden = torch.zeros(num_layers, x.size(0), hidden_size)
        x = self.emd(x)   # (batch, seqLen, embeddingSize)
        x, hidden = self.rnn(x, hidden)
        x = self.fc(x)
        return x.view(-1, num_class)

net = Model()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)

for epoch in range(15):
    optimizer.zero_grad()
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    _, predicted = outputs.max(dim=1)    # 分类最大值的下标
    predicted = predicted.data.numpy()
    print('Predicted string is', ''.join([idx2char[idx] for idx in predicted]), end='')
    print(', Epoch [%d/15] loss=%.4f' % (epoch+1, loss.item()))
  • 33
    点赞
  • 30
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值