PyTorch深度学习实践12——RNN基础

  • RNN主要处理具有序列关系的输入数据
  • RNNcell:每个RNNcell的输入不仅有当前样本xi,还有前一个cell运算得到的结果hi-1
  • 为什么循环:因为RNNcell一直在循环使用,即权重参数一直在参与各个样本输入的运算
  • RNNcell中的线性运算:
  • 如果用RNNcell来写,需要自己写循环不断通过RNNcell:
import torch
"需要:初始化h0,输入序列"
batch_size = 1
input_size = 4
hidden_size = 2
seq_len = 3
 
cell = torch.nn.RNNCell(input_size=input_size, hidden_size =  hidden_size)
 
dataset = torch.randn(seq_len,batch_size,input_size) #构造输入序列
hidden = torch.zeros(batch_size, hidden_size) #构造全是0的隐层,即初始化h0
 
for idex,input in enumerate(dataset):
    print('='*20, idex, '='*20)
    print('Input size:',input.shape)
    hidden = cell(input,hidden)
    print('outputs size:',hidden.shape)
    print('hidden:', hidden)
  • 如果直接用RNN来写,则不用自己写循环:

 

import torch
 
batch_size = 1
input_size = 4
hidden_size = 2
seq_len = 3
num_layers = 2
 
cell = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
inputs = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(num_layers, batch_size, hidden_size)
 
out,hidden = cell(inputs,hidden)
 
print('output size:', out.shape)
print('out:',out)
print('hidden size:',hidden.shape)
print('hidden:', hidden)

example 12-1:通过RNNCell学习序列“hello”→“ohlol”转换的规律

  1. 构建字典(建立character和index之间的映射关系),通过indices编码得到one-hot向量
  2. 确定RNN的输出:一个四维向量,再分类器来实现文本序列的重新排列

import torch
import torch.nn as nn
import torch.optim as optim

input_size = 4
hidden_size = 4
batch_size = 1

# prepare data
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3] # hello
y_data = [3, 1, 2, 3, 2] # ohlol

one_hot_lookup = [[1, 0, 0, 0],
                  [0, 1, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]] # 分别对应0,1,2,3即e,h,l,o

x_one_hot = [one_hot_lookup[x] for x in x_data]

inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size) #-1即seqLen
labels = torch.LongTensor(y_data).view(-1, 1) #(seqLen,1)

# define model
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, batch_size):
        super(Model, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.rnncell = nn.RNNCell(input_size=self.input_size,
                                  hidden_size=self.hidden_size)

    def forward(self, input, hidden):
        hidden = self.rnncell(input, hidden)
        return hidden

    def init_hidden(self):
        return torch.zeros(self.batch_size, self.hidden_size)

model = Model(input_size, hidden_size, batch_size)

# loss & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)

# training cycle
for epoch in range(15):
    loss = 0
    optimizer.zero_grad()
    hidden = model.init_hidden() # h0
    print('predicted string:', end='')
    for input, label in zip(inputs, labels):
        hidden = model(input, hidden)
        loss += criterion(hidden, label)
        _, idx = hidden.max(dim=1) # hidden是4维的,分别表示e,h,l,o的概率值
        print(idx2char[idx.item()], end='')

    loss.backward()
    optimizer.step()
    print(',epoch [%d/15] loss = %.4lf' % (epoch +1, loss.item()))

 example 12-1:通过RNN学习序列“hello”→“ohlol”转换的规律

import torch
import torch.nn as nn
import torch.optim as optim

input_size = 4
hidden_size = 4
batch_size = 1
num_layers = 1
seq_len = 5

# prepare data
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3] # hello
y_data = [3, 1, 2, 3, 2] # ohlol

one_hot_lookup = [[1, 0, 0, 0],
                  [0, 1, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]] # 分别对应0,1,2,3即e,h,l,o

x_one_hot = [one_hot_lookup[x] for x in x_data]

inputs = torch.Tensor(x_one_hot).view(seq_len, batch_size, input_size)
labels = torch.LongTensor(y_data)

# define model
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, batch_size, num_layers):
        super(Model, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.num_layers = num_layers
        self.RNN = nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers)

    def forward(self, input):
        hidden = torch.zeros(self.num_layers, self.batch_size, self.input_size)
        out, _ = self.RNN(input, hidden)
        return out.view(-1,self.hidden_size) #rashpe out to (seq_len×batch_size, hiddensize)


model = Model(input_size, hidden_size, batch_size, num_layers)

# loss & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.05)

# training cycle
for epoch in range(15):
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    print('outputs:',outputs)
    _, idx = outputs.max(dim=1)
    idx = idx.data.numpy()  # reshape to numpy
    print('idx', idx)
    print('Pridected:', ''.join([idx2char[x] for x in idx]), end='')  # end是不自动换行,''.join是连接字符串数组
    print(',Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))

 Embedding:

One-hot矩阵是high-dimension、sparse,hardcoded的,通过Embedding将one-hot稀疏矩阵映射成低维、稠密的矩阵。

网络模型:Embedding Layer + RNN Cell + Linear Layer

import torch
import torch.nn as nn
import torch.optim as optim

num_class = 4 #4个类别,
input_size = 4 #输入维度
hidden_size = 8 #隐层输出维度,有8个隐层
embedding_size = 10 #嵌入到10维空间
num_layers = 2 #2层的RNN
batch_size = 1
seq_len = 5 #序列长度5


# prepare data
idx2char = ['e', 'h', 'l', 'o']
x_data = [[1, 0, 2, 2, 3]] #(batch, seq_len) list
y_data = [3, 1, 2, 3, 2] # ohlol

inputs = torch.LongTensor(x_data)
labels = torch.LongTensor(y_data)

# define model
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.emb = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.RNN(input_size=embedding_size,
                          hidden_size=hidden_size,
                          num_layers=num_layers,
                          batch_first=True)
        self.fc = nn.Linear(hidden_size, num_class)

    def forward(self, x):
        hidden = torch.zeros(num_layers, x.size(0), hidden_size)
        x = self.emb(x)
        x, _ = self.rnn(x, hidden)
        x = self.fc(x)
        return x.view(-1, num_class)

model = Model()

# loss & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.05)

# training cycle
for epoch in range(15):
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    print('outputs:',outputs)
    _, idx = outputs.max(dim=1)
    idx = idx.data.numpy()  # reshape to numpy
    print('idx', idx)
    print('Pridected:', ''.join([idx2char[x] for x in idx]), end='')  # end是不自动换行,''.join是连接字符串数组
    print(',Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值