29 - RNN的原理、API讲解及其逐行代码实现

75 篇文章 2 订阅

1. 模型框架

1.1 记忆单元分类

记忆单元主要是为了存储过去的信息;常见的记忆单元如下:

  • RNN
  • GRU
  • LSTM

1.2 模型类别分类

  • 单向循环
  • 双向循环
  • 多层单向或双向循环

1.3 优缺点

  • 优点:
    (1)可以处理变长序列
    (2)模型大小与序列长度无关
    (3)计算量与序列长度呈线性增长
    (4)考虑历史信息
    (5)便于流式输出
    (6)权重时不变
  • 缺点:
    (1)串行计算比较慢
    (2)无法获取太长的历史信息

1.4 应用场景

  • AI诗歌生成;属于生成任务,one2many型
  • 文本情感分类:many2one
  • 词法识别:many2many
  • 机器翻译:many2many
  • 语言识别生成
  • 语言模型

2. 公式说明

2.1 公式

在这里插入图片描述

  • x t x_t xt·:表示第 t 时刻的输入
  • h t h_t ht:表示第 t 时刻的隐藏状态
  • h t − 1 h_{t-1} ht1:表示第 t-1 时刻的隐藏状态

2.2 输入输出大小

  • 输入:
    在这里插入图片描述
  • 输出:
    在这里插入图片描述

3. 代码

以下主要有两种实现方式

  1. 基于pytorch API 的实现单/双循环神经网络
  2. 自己根据公式手写的单/双循环神经网络
import torch
from torch import nn

hidden_in = 4
hidden_out = 3
num_layers = 1

# define the RNN layers
rnn_layer = nn.RNN(input_size=hidden_in, hidden_size=hidden_out, num_layers=num_layers,
				   batch_first=True)
batch_size = 2
sequence_length = 4

# random init the input
my_input = torch.randn(batch_size, sequence_length, hidden_in)

# random init the init hidden state
h_prev = torch.zeros(batch_size, hidden_out)
# my_output is all the state of h_n
# h_n is the final state
my_output, h_n = rnn_layer(my_input, h_prev.unsqueeze(0))


# print(f"my_output={my_output}")

# print(f"my_output.shape={my_output.shape}")

# print(f"h_n={h_n}")

# print(f"h_n.shape={h_n.shape}")


# custom_rnn_function
def custom_rnn_function(input, w_ih, w_hh, b_ih, b_hh, h_prev):
	"""
	formula:

	h_t = tanh(w_{ih}*x_t+b_{ih}+w_{hh}*h_{t-1}+b_{hh})
	x_t is the input at time t

	:param input: input(batch_size,sequence_length,hidden_in)
	:param w_ih: weight w_ih (hidden_out,hidden_in)
	:param w_hh: weight w_hh (hidden_out,hidden_out)

	:param b_ih: bias b_ih (hidden_out)
	:param b_hh: bias b_hh (hidden_out)
	:param h_prev: previous hidden h_prev (1,batch_size,hidden_out)
	:return: output ,h_n

	"""
	batch_size, sequence_length, hidden_in = input.shape
	hidden_out, hidden_in = w_ih.shape
	output = torch.zeros(batch_size, sequence_length, hidden_out)
	for t in range(sequence_length):
		# input[:,t,:].shape = [batch_size,hidden_in] -> (batch_size,hidden_in,1)
		x_t = input[:, t, :].unsqueeze(2)

		# w_ih.shape = [hidden_out,hidden_in] -> (batch_size,hidden_out,hidden_in)
		w_ih_batch = w_ih.unsqueeze(0).tile(batch_size, 1, 1)

		# w_hh = [hidden_out,hidden_out] -> (batch_size,hidden_out,hidden_out)
		# h_prev = [batch_size,hidden_out]

		w_hh_batch = w_hh.unsqueeze(0).tile(batch_size, 1, 1)

		# w_ih_times_x.shape=(batch_size,hidden_out,1) -> (batch_size,hidden_out)
		w_ih_times_x = torch.bmm(w_ih_batch, x_t).squeeze(-1)

		# w_hh_times_h.shape =(batch_size,hidden_out,1)->(batch_size,hidden_out)
		# h_prev = [batch_size,hidden_out] -> (batch_size,hidden_out,1)
		# w_hh = [hidden_out,hidden_out] -> (batch_size,hidden_out,hidden_out)
		w_hh_times_h = torch.bmm(w_hh_batch, h_prev.unsqueeze(2)).squeeze(-1)
		h_prev = torch.tanh((w_ih_times_x + b_ih + w_hh_times_h + b_hh))
		output[:, t, :] = h_prev

	return output, h_prev.unsqueeze(0)


# get the rnn_layers weights data
custom_w_ih = rnn_layer.weight_ih_l0
custom_w_hh = rnn_layer.weight_hh_l0
custom_bias_ih = rnn_layer.bias_ih_l0
custom_bias_hh = rnn_layer.bias_hh_l0

# sent rnn_layers'weight to custom_rnn_function
# if the output and h_n are the same with two function
# so that our custom function is correct.
custom_output, custom_hn = custom_rnn_function(my_input, custom_w_ih, custom_w_hh,
											   custom_bias_ih, custom_bias_hh, h_prev)


# print(f"custom_output={custom_output}")
# print(f"custom_hn={custom_hn}")
# print(f"my_output={my_output}")
# print(f"h_n={h_n}")
# print("check whether custom_output is equal to my_output")
# print(torch.isclose(custom_output, my_output))
# print("check whether custom_hn is equal to h_n")
# print(torch.isclose(custom_hn, h_n))


# custom_rnn_function
def bicstm_rnn_function(input, w_ih, w_hh, b_ih, b_hh, h_prev,
						w_ih_reverse, w_hh_reverse, b_ih_reverse, b_hh_reverse):
	batch_size, sequence_length, hidden_in = input.shape
	hidden_out, hidden_in = w_ih.shape
	output = torch.zeros(batch_size, sequence_length, hidden_out * 2)


	forward_output = custom_rnn_function(input, w_ih, w_hh, b_ih, b_hh, h_prev)[0]
	backward_output = custom_rnn_function(torch.flip(input, [1]), w_ih_reverse, w_hh_reverse, b_ih_reverse,
													 b_hh_reverse,h_prev)[0]
	output[:, :, :hidden_out] = forward_output
	output[:, :, hidden_out:] = torch.flip(backward_output,[1])


	# old
	# return output, output[:, -1, :].reshape((batch_size, 2, hidden_out)).transpose(0, 1)
	return output, torch.cat([forward_output[:,-1,:].unsqueeze(0),backward_output[:,-1,:].unsqueeze(0)],dim=0)


bi_rnn_layer = nn.RNN(input_size=hidden_in, hidden_size=hidden_out, num_layers=num_layers,
					  batch_first=True, bidirectional=True)

bi_h_prev = torch.zeros(2, batch_size, hidden_out)

bi_my_output, bi_h_n = bi_rnn_layer(my_input, bi_h_prev)
print(f"bi_my_output={bi_my_output}")
print(f"bi_h_n={bi_h_n}")

for k, v in bi_rnn_layer.named_parameters():
	print(k, v, v.shape)

bicstm_weight_ih_l0 = bi_rnn_layer.weight_ih_l0
bicstm_weight_hh_l0 = bi_rnn_layer.weight_hh_l0
bicstm_bias_ih_l0 = bi_rnn_layer.bias_ih_l0
bicstm_bias_hh_l0 = bi_rnn_layer.bias_hh_l0
bicstm_weight_ih_l0_reverse = bi_rnn_layer.weight_ih_l0_reverse
bicstm_weight_hh_l0_reverse = bi_rnn_layer.weight_hh_l0_reverse
bicstm_bias_ih_l0_reverse = bi_rnn_layer.bias_ih_l0_reverse
bicstm_bias_hh_l0_reverse = bi_rnn_layer.bias_hh_l0_reverse

bicstm_output, bicstm_h_n = bicstm_rnn_function(my_input, bicstm_weight_ih_l0, bicstm_weight_hh_l0, bicstm_bias_ih_l0,
												bicstm_bias_hh_l0,
												bi_h_prev[0], bicstm_weight_ih_l0_reverse, bicstm_weight_hh_l0_reverse,
												bicstm_bias_ih_l0_reverse,
												bicstm_bias_hh_l0_reverse)

print("pytorch API rnn")
# bi_my_output, bi_h_n
print(f"bi_my_output={bi_my_output}")
print(f"bi_h_n={bi_h_n}")
print("bicstm_output is equal to bi_my_output")
print(torch.isclose(bicstm_output,bi_my_output))
print(torch.isclose(bicstm_output,bi_my_output).shape)
print("bicstm_h_n is equal to bi_h_n")
print(torch.isclose(bicstm_h_n,bi_h_n))
print(torch.isclose(bicstm_h_n,bi_h_n).shape)

print("custom bidirectional rnn")
print(f"bicstm_output={bicstm_output}")
print(f"bicstm_output.shape={bicstm_output.shape}")
print(f"bicstm_h_n={bicstm_h_n}")
print(f"bicstm_h_n.shape={bicstm_h_n.shape}")
print("*"*50)
print(f"bi_my_output={bi_my_output}")
print(f"bi_my_output.shape={bi_my_output.shape}")
print(f"bi_h_n={bi_h_n}")
print(f"bi_h_n.shape={bi_h_n.shape}")
  • 结果:
bi_my_output=tensor([[[-0.3309, -0.6514,  0.8314, -0.7133, -0.8930, -0.7533],
         [-0.3520, -0.7544,  0.8153, -0.4129, -0.8242, -0.8933],
         [ 0.0365, -0.5342,  0.9046, -0.4952, -0.2890, -0.1609],
         [-0.3345, -0.4509,  0.6656, -0.7710, -0.4374, -0.2100]],

        [[-0.2685, -0.5694,  0.7102, -0.6100, -0.8374, -0.8259],
         [ 0.3823, -0.2883,  0.6636, -0.0259,  0.0331, -0.9175],
         [ 0.0728, -0.1796,  0.6884, -0.3441, -0.0081, -0.3160],
         [ 0.2953, -0.1599,  0.7536, -0.1300, -0.3686,  0.1407]]],
       grad_fn=<TransposeBackward1>)
bi_h_n=tensor([[[-0.3345, -0.4509,  0.6656],
         [ 0.2953, -0.1599,  0.7536]],

        [[-0.7133, -0.8930, -0.7533],
         [-0.6100, -0.8374, -0.8259]]], grad_fn=<StackBackward>)
weight_ih_l0 Parameter containing:
tensor([[-0.2153, -0.3879, -0.2379,  0.3674],
        [-0.3065, -0.1250, -0.3957,  0.0352],
        [ 0.4447, -0.1330,  0.3051,  0.5727]], requires_grad=True) torch.Size([3, 4])
weight_hh_l0 Parameter containing:
tensor([[-0.4278,  0.4842,  0.4345],
        [ 0.1279,  0.3849, -0.0556],
        [-0.2065, -0.4020,  0.4729]], requires_grad=True) torch.Size([3, 3])
bias_ih_l0 Parameter containing:
tensor([-0.4691, -0.4634,  0.5497], requires_grad=True) torch.Size([3])
bias_hh_l0 Parameter containing:
tensor([ 0.1844, -0.0852,  0.3367], requires_grad=True) torch.Size([3])
weight_ih_l0_reverse Parameter containing:
tensor([[-0.0869, -0.5565, -0.0156,  0.2845],
        [-0.4279, -0.1971, -0.3512,  0.3958],
        [ 0.4047,  0.1749, -0.4136,  0.5187]], requires_grad=True) torch.Size([3, 4])
weight_hh_l0_reverse Parameter containing:
tensor([[-0.3453,  0.2386,  0.5089],
        [-0.3314,  0.3804,  0.4880],
        [ 0.5452, -0.0493,  0.3664]], requires_grad=True) torch.Size([3, 3])
bias_ih_l0_reverse Parameter containing:
tensor([-0.3213, -0.5536,  0.1178], requires_grad=True) torch.Size([3])
bias_hh_l0_reverse Parameter containing:
tensor([-0.0876, -0.2065, -0.4133], requires_grad=True) torch.Size([3])
pytorch API rnn
bi_my_output=tensor([[[-0.3309, -0.6514,  0.8314, -0.7133, -0.8930, -0.7533],
         [-0.3520, -0.7544,  0.8153, -0.4129, -0.8242, -0.8933],
         [ 0.0365, -0.5342,  0.9046, -0.4952, -0.2890, -0.1609],
         [-0.3345, -0.4509,  0.6656, -0.7710, -0.4374, -0.2100]],

        [[-0.2685, -0.5694,  0.7102, -0.6100, -0.8374, -0.8259],
         [ 0.3823, -0.2883,  0.6636, -0.0259,  0.0331, -0.9175],
         [ 0.0728, -0.1796,  0.6884, -0.3441, -0.0081, -0.3160],
         [ 0.2953, -0.1599,  0.7536, -0.1300, -0.3686,  0.1407]]],
       grad_fn=<TransposeBackward1>)
bi_h_n=tensor([[[-0.3345, -0.4509,  0.6656],
         [ 0.2953, -0.1599,  0.7536]],

        [[-0.7133, -0.8930, -0.7533],
         [-0.6100, -0.8374, -0.8259]]], grad_fn=<StackBackward>)
         
# 重点:我们自己计算的结果跟官方定义的API结果一致为True
bicstm_output is equal to bi_my_output
tensor([[[True, True, True, True, True, True],
         [True, True, True, True, True, True],
         [True, True, True, True, True, True],
         [True, True, True, True, True, True]],

        [[True, True, True, True, True, True],
         [True, True, True, True, True, True],
         [True, True, True, True, True, True],
         [True, True, True, True, True, True]]])
torch.Size([2, 4, 6])
bicstm_h_n is equal to bi_h_n
tensor([[[True, True, True],
         [True, True, True]],

        [[True, True, True],
         [True, True, True]]])
         
torch.Size([2, 2, 3])
custom bidirectional rnn
bicstm_output=tensor([[[-0.3309, -0.6514,  0.8314, -0.7133, -0.8930, -0.7533],
         [-0.3520, -0.7544,  0.8153, -0.4129, -0.8242, -0.8933],
         [ 0.0365, -0.5342,  0.9046, -0.4952, -0.2890, -0.1609],
         [-0.3345, -0.4509,  0.6656, -0.7710, -0.4374, -0.2100]],

        [[-0.2685, -0.5694,  0.7102, -0.6100, -0.8374, -0.8259],
         [ 0.3823, -0.2883,  0.6636, -0.0259,  0.0331, -0.9175],
         [ 0.0728, -0.1796,  0.6884, -0.3441, -0.0081, -0.3160],
         [ 0.2953, -0.1599,  0.7536, -0.1300, -0.3686,  0.1407]]],
       grad_fn=<CopySlices>)
bicstm_output.shape=torch.Size([2, 4, 6])
bicstm_h_n=tensor([[[-0.3345, -0.4509,  0.6656],
         [ 0.2953, -0.1599,  0.7536]],

        [[-0.7133, -0.8930, -0.7533],
         [-0.6100, -0.8374, -0.8259]]], grad_fn=<CatBackward>)
bicstm_h_n.shape=torch.Size([2, 2, 3])
**************************************************
bi_my_output=tensor([[[-0.3309, -0.6514,  0.8314, -0.7133, -0.8930, -0.7533],
         [-0.3520, -0.7544,  0.8153, -0.4129, -0.8242, -0.8933],
         [ 0.0365, -0.5342,  0.9046, -0.4952, -0.2890, -0.1609],
         [-0.3345, -0.4509,  0.6656, -0.7710, -0.4374, -0.2100]],

        [[-0.2685, -0.5694,  0.7102, -0.6100, -0.8374, -0.8259],
         [ 0.3823, -0.2883,  0.6636, -0.0259,  0.0331, -0.9175],
         [ 0.0728, -0.1796,  0.6884, -0.3441, -0.0081, -0.3160],
         [ 0.2953, -0.1599,  0.7536, -0.1300, -0.3686,  0.1407]]],
       grad_fn=<TransposeBackward1>)
bi_my_output.shape=torch.Size([2, 4, 6])
bi_h_n=tensor([[[-0.3345, -0.4509,  0.6656],
         [ 0.2953, -0.1599,  0.7536]],

        [[-0.7133, -0.8930, -0.7533],
         [-0.6100, -0.8374, -0.8259]]], grad_fn=<StackBackward>)
bi_h_n.shape=torch.Size([2, 2, 3])

Process finished with exit code 0

4. 小结

关于单循环神经网络来说,我们需要注意的各个权重的矩阵大小,输入的大小问题。对于双循环神经网络来说,有两个顺序反向,第一个顺序是输入的反向,第二个是输出的反向。具体详见上述代码。

  • 2
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值