参考:https://www.cnblogs.com/lovychen/p/9368390.html
LSTM公式
TensorFlow中的每个的大小:
BasicRNNCell源码解析
built:就是创建变量W,B
CALL:就是进行调用
class BasicRNNCell(LayerRNNCell):
"""The most basic RNN cell.
Args:
num_units: int, The number of units in the RNN cell.
activation: Nonlinearity to use. Default: `tanh`.
reuse: (optional) Python boolean describing whether to reuse variables
in an existing scope. If not `True`, and the existing scope already has
the given variables, an error is raised.
name: String, the name of the layer. Layers with the same name will
share weights, but to avoid mistakes we require reuse=True in such
cases.
dtype: Default dtype of the layer (default of `None` means use the type
of the first input). Required when `build` is called before `call`.
"""
def __init__(self,
num_units,
activation=None,
reuse=None,
name=None,
dtype=None):
super(BasicRNNCell, self).__init__(_reuse=reuse, name=name, dtype=dtype)#继承了layerRNNCell的默认参数
# Inputs must be 2-dimensional.
self.input_spec = base_layer.InputSpec(ndim=2)
self._num_units = num_units
self._activation = activation or math_ops.tanh
@property
def state_size(self):#cell的大小
return self._num_units
@property
def output_size(self):#output的大小
return self._num_units
def build(self, inputs_shape):#设置变量W&B
if inputs_shape[1].value is None:#如果输入的值只为一个,报错
raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
% inputs_shape)
input_depth = inputs_shape[1].value
# W: [input_size + Hidden_size, Hidden_size)
# B: [Hidden_size]
self._kernel = self.add_variable( #增加变量值
_WEIGHTS_VARIABLE_NAME,
shape=[input_depth + self._num_units, self._num_units])
self._bias = self.add_variable(
_BIAS_VARIABLE_NAME,
shape=[self._num_units],
initializer=init_ops.zeros_initializer(dtype=self.dtype))#init_op初始化节点,为0
self.built = True
# 循环calL函数
def call(self,inputs,state):#batch_size是全部乘进去的
"""Most basic RNN: output = new_state = act(W * input + U * state + B)."""
# output = Ht = tanh([x,Ht-1]*W + B)
# 0时刻是,state初始化为0
# input 的 shape为: [batch_size,emb_size]
# state 的 shape为:[batch_zize,Hidden_size]
# 此时计算: [input,state] * [W,U] == [Xt,Ht-1] * W,得到的shape为:[batch_size,Hidden_size]
# array_ops.concat: 矩阵连接,shape 为 [batch_size,input_size + Hidden_size],实际就是[Xt,Ht-1]
gate_inputs = math_ops.matmul(
array_ops.concat([inputs, state], 1), self._kernel)
gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)# nn_ops.bias_add,这个函数的计算方法是,让每个 batch 得到的值,都加上这个 B;
output = self._activation(gate_inputs)
#activation:激励函数,默认为tanh
# 此时return的维度为:[batch_size,Hidden_size]
# 当前output为t+1时刻的input和stat(即ht)
return output, output