摘要
本文使用纯 Python 和 PyTorch 对比实现循环神经网络RNN及其反向传播
相关
原理和详细解释, 请参考:
循环神经网络RNNCell单元详解及反向传播的梯度求导
https://blog.csdn.net/oBrightLamp/article/details/85015325
正文
import torch
import numpy as np
class RNNCell:
def __init__(self, weight_ih, weight_hh,
bias_ih, bias_hh):
self.weight_ih = weight_ih
self.weight_hh = weight_hh
self.bias_ih = bias_ih
self.bias_hh = bias_hh
self.x_stack = []
self.dx_list = []
self.dw_ih_stack = []
self.dw_hh_stack = []
self.db_ih_stack = []
self.db_hh_stack = []
self.prev_hidden_stack = []
self.next_hidden_stack = []
# temporary cache
self.prev_dh = None
def __call__(self, x, prev_hidden):
self.x_stack.append(x)
next_h = np.tanh(
np.dot(x, self.weight_ih.T)
+ np.dot(prev_hidden, self.weight_hh.T)
+ self.bias_ih + self.bias_hh)
self.prev_hidden_stack.append(prev_hidden)
self.next_hidden_stack.append(next_h)
# clean cache
self.prev_dh = np.zeros(next_h.shape)
return next_h
def backward(self, dh):
x = self.x_stack