最近用用paddlepaddle实现了一下grucell,这里分享一下代码:
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
class GRUCell(nn.Layer):
def __init__(self, input_size, hidden_size):
super(GRUCell, self).__init__()
# 输入单元的大小
self.input_size = input_size
# 隐藏单元的大小
self.hidden_size = hidden_size
# 输入单元映射的权重
weight_attr1 = paddle.ParamAttr(name="weight1",initializer=paddle.nn.initializer.Constant(value=0.5))
# 输入单元映射的偏置
bias_attr1= paddle.ParamAttr(name="bias1",initializer=paddle.nn.initializer.Constant(value=1.0))
# 隐藏单元映射的权重
weight_attr2 = paddle.ParamAttr(name="weight2",initializer=paddle.nn.initializer.Constant(value=0.5))
# 隐藏单元映射的偏置
bias_attr2 = paddle.ParamAttr(name="bias2",initializer=paddle.nn.initializer.Constant(value=1.0))
# 输入映射 w*x+b
self.x2h = nn.Linear(input_size, 3 * hidden_size, weight_attr=weight_attr1,bias_attr=bias_attr1)
# 隐藏映射 w*h+b
self.h2h = nn.Linear(hidden_size, 3 * hidden_size, weight_attr=weight_attr2,bias_attr=bias_attr2)
def forward(self, x, hidden):
# 用全连接层给x乘上w,加上bias
x_t = self.x2h(x)
# 用全连接层给h_t乘上w,加上bias
h_t = self.h2h(hidden)
# 将输入x_t分割成3个子Tensor
x_reset, x_update, x_new = x_t.chunk(3, 1)
# 将输入h_t分割成3个子Tensor
h_reset, h_update, h_new = h_t.chunk(3, 1)
# 重置门
r_t = F.sigmoid(x_reset + h_reset)
# 更新门
z_t = F.sigmoid(x_update + h_update)
# 当前记忆内容
new_gate = F.tanh(x_new + (r_t * h_new))
# 当前时间步的最终记忆
h_t =(1-z_t)*hidden+z_t*new_gate
return h_t
if __name__=="__main__":
model=GRUCell(16,16)
x = paddle.randn((1, 16))
hidden=paddle.randn((1,16))
res=model(x,hidden)
print(res)
如果有问题,可以和我交流哈。
参考文献
[1].https://github.com/georgeyiasemis/Recurrent-Neural-Networks-from-scratch-using-PyTorch/blob/main/rnncells.py