12循环神经网络（基础篇）

最新推荐文章于 2024-07-28 20:53:06 发布

怎么就这么难∞

最新推荐文章于 2024-07-28 20:53:06 发布

阅读量733

点赞数 33

分类专栏： pytorch深度学习实践文章标签： rnn 人工智能深度学习 pytorch

本文链接：https://blog.csdn.net/qq_52002561/article/details/139812317

版权

pytorch深度学习实践专栏收录该内容

19 篇文章 2 订阅

订阅专栏

文章目录

RNN的简单介绍和实现

RNN的简单介绍和实现

导入包库

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt

torch.nn.RNNCell的简单例子

单层RNN：

在这里插入图片描述

batchSize = 1
seqLen = 3    # x1 x2 x3
inputSize = 4
hiddenSize = 2

# 初始化
cell = nn.RNNCell(input_size=inputSize, hidden_size=hiddenSize)

# 构造数据
dataset = torch.randn(seqLen, batchSize, inputSize)
# 隐层，权重设为全0
hidden = torch.zeros(batchSize, hiddenSize)

for idx, input in enumerate(dataset):   # 分别取x1, x2, x3
    print('=' * 20, idx, '=' * 20)
    print('Input size: ', input.shape)

    hidden = cell(input, hidden)

    print('outputs size: ', hidden.shape)
    print(hidden)
print('-' * 50)

torch.nn.RNN的简单例子

多层RNN:

在这里插入图片描述
其中，inputs = [ $x_1$ $x_2$ … $x_N$ ]，outputs = [ $h_1$ $h_2$ … $h_N$ ]。 $h_0^1$ 是第一隐层的初始 $h_0$ ， $h_N^1$ 是第一隐层的输出 $h_N$ 。

# 多层RNN
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 1

cell = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)

inputs = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(num_layers, batchSize, hiddenSize)   # h0

out, hidden = cell(inputs, hidden)

print('Output size: ', out.shape)
print('Output: ', out)
print('Hidden size: ', hidden.shape)
print('Hidden: ', hidden)

print('-' * 50)

简单的Seq2Seq例子

要求：训练一个模型来学习"hello" -> “ohlol”
在这里插入图片描述

由于"hello"和"ohlol"是str，无法直接参与计算，所以要转换为对应的index。这里使用的是one-hot vectors。
在这里插入图片描述

使用torch.nn.RNNCell

'''
	因为有四种分类，所以 input size = 4
'''
input_size = 4
batch_size = 1
hidden_size = 4

# 把str映射到index
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]

# one-hot vectors
one_hot_lookup = [[1, 0, 0, 0],
                  [0, 1, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]]

x_one_hot = [one_hot_lookup[x] for x in x_data]

inputs = torch.tensor(x_one_hot, dtype=torch.float32).view(-1, batch_size, input_size)
labels = torch.LongTensor(y_data).view(-1, 1)  # (seqLen, 1)

# 构造模型
class Model(nn.Module):
    def __init__(self, input_size, batch_size, hidden_size):
        super(Model, self).__init__()
        self.input_size = input_size
        self.batch_size = batch_size
        self.hidden_size = hidden_size

        self.rnncell = nn.RNNCell(input_size=self.input_size, hidden_size=self.hidden_size)

    def forward(self, input, hidden):
        hidden = self.rnncell(input, hidden)
        return hidden

    def init_hidden(self):
        return torch.zeros(self.batch_size, self.hidden_size)    # h0

net = Model(input_size, batch_size, hidden_size)


# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.1)

for epoch in range(15):
    loss = 0
    optimizer.zero_grad()    # grad清零

    hidden = net.init_hidden()   # h0
    print("Predicted string: ", end='')
    for input, label in zip(inputs, labels):
        hidden = net(input, hidden)
        loss += criterion(hidden, label)    # 一次循环是一个序列的loss，应该计算总序列的loss，所以应该相加，最后再反向传播求梯度
        _, idx = hidden.max(dim=1)          # 分类最大值的下标
        print(idx2char[idx.item()], end='')
        
    loss.backward()
    optimizer.step()
    print(', Epoch [%d/15] loss=%.4f' % (epoch+1, loss.item()))

使用torch.nn.RNN

# use torch.nn.RNN
input_size = 4
batch_size = 1
hidden_size = 4
num_layers = 1

# 转换为numbers
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]

one_hot_lookup = [[1, 0, 0, 0],
                  [0, 1, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]]

x_one_hot = [one_hot_lookup[idx] for idx in x_data]  # 将输入转换为one-hot

inputs = torch.tensor(x_one_hot, dtype=torch.float32).view(-1, batch_size, input_size)   # 改变形状为(seqLen, batchSize, inputSize)
labels = torch.LongTensor(y_data)  # (seqLen, 1)

# 定义网络结构
class Net(nn.Module):
    def __init__(self, input_size, batch_size, hidden_size, num_layers):
        super(Net, self).__init__()
        self.input_size = input_size
        self.batch_size = batch_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.rnn = nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers)

    def forward(self, x, hidden):   # 传入x和h
        out, hidden = self.rnn(x, hidden)
        return out.view(-1, self.hidden_size)   # (batch_size * seqLen, hidden_size)

    def init_hidden(self):
        return torch.zeros(self.num_layers, self.batch_size, self.hidden_size)    # h0，也可以不在此定义，在外面直接传入

model = Net(input_size, batch_size, hidden_size, num_layers)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)


for epoch in range(15):
    optimizer.zero_grad()
    hidden = model.init_hidden()   # h0

    # forward
    # torch.nn.RNN自己会循环操作，所以不需要我们单独去一个一个地循环处理。
    outputs = model(inputs, hidden)
    l = criterion(outputs, labels)

    # backward and update
    l.backward()
    optimizer.step()

    _, predicted = outputs.max(dim=1)   # 分类最大值的下标
    predicted = predicted.data.numpy()
    print('Predicted string: ', ''.join([idx2char[idx] for idx in predicted]), end='')
    print(', Epoch [%d/15] loss=%.4f' % (epoch+1, l.item()))

Using embedding and linear layer

one-hot vectors 缺点：

维度高
稀疏
硬编码（对应关系是设置好的，而不是学习到的）

解决办法之一：使用 Embedding vectors
在这里插入图片描述
网络结构：

# Embedding and linear layers
# data
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]

# nn.Embedding要求input是LongTensor
inputs = torch.LongTensor(x_data)
labels = torch.LongTensor(y_data)

# parameters
num_class = 4
input_size = 4
hidden_size = 8
embedding_size = 10
num_layers = 2
batch_size = 1
seq_len = 5

# input should be (batchSize, seqLen)，Target should be (batchSize*seqLen)
inputs = inputs.view(batch_size, seq_len)

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()

        self.emd = nn.Embedding(input_size, embedding_size)   # 输入的one-hot是4维的（因为input size=4）     Output:(*, embedding_size)，其中*是 input shape
        self.rnn = nn.RNN(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)

        self.fc = nn.Linear(hidden_size, num_class)

    def forward(self, x):
        hidden = torch.zeros(num_layers, x.size(0), hidden_size)
        x = self.emd(x)   # (batch, seqLen, embeddingSize)
        x, hidden = self.rnn(x, hidden)
        x = self.fc(x)
        return x.view(-1, num_class)

net = Model()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)

for epoch in range(15):
    optimizer.zero_grad()
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    _, predicted = outputs.max(dim=1)    # 分类最大值的下标
    predicted = predicted.data.numpy()
    print('Predicted string is', ''.join([idx2char[idx] for idx in predicted]), end='')
    print(', Epoch [%d/15] loss=%.4f' % (epoch+1, loss.item()))

怎么就这么难∞

关注

33
点赞
踩
30

收藏

觉得还不错? 一键收藏
0
评论
12循环神经网络（基础篇）

torch.nn.RNNCell的简单例子单层RNN：torch.nn.RNN的简单例子多层RNN:其中，inputs = [x1x_1x1 x2x_2x2 … xNx_NxN]，outputs = [h1h_1h1 h2h_2h2 … hNh_NhN]。 h01h_0^1h01是第一隐层的初始h0h_0h0， hN1h_N^1hN1是第一隐层的输出hNh_NhN。简单的Seq2Seq例子要求：训练一个模型来学习"hello" -> “ohlol”由于"hello"和"
复制链接

扫一扫