BLSTM
先上代码
class BLSTM(object):
def __init__(self, embedded_chars, hidden_unit, cell_type, num_layers, dropout_rate,
initializers, num_labels, seq_length, labels, lengths, is_training):
"""
BLSTM 网络
:param embedded_chars: Fine-tuning embedding input
:param hidden_unit: LSTM的隐含单元个数
:param cell_type: RNN类型(LSTM OR GRU)
:param num_layers: RNN的层数
:param droupout_rate: droupout rate
:param initializers: variable init class
:param num_labels: 标签数量
:param seq_length: 序列最大长度
:param labels: 真实标签
:param lengths: [batch_size] 每个batch下序列的真实长度
:param is_training: 是否是训练过程
"""
self.hidden_unit = hidden_unit
self.dropout_rate = dropout_rate
self.cell_type = cell_type
self.num_layers = num_layers
self.embedded_chars = embedded_chars
self.initializers = initializers
self.seq_length = seq_length
self.num_labels = num_labels
self.labels = labels
self.lengths = lengths
self.embedding_dims = embedded_chars.shape[-1].value
self.is_training = is_training
def _witch_cell(self):
"""
RNN 类型,先创建出每个cell的类型
:return:
"""
cell_tmp = None
if self.cell_type == 'lstm':
cell_tmp = rnn.LSTMCell(self.hidden_unit)
elif self.cell_type == 'gru':
cell_tmp = rnn.GRUCell(self.hidden_unit)
return cell_tmp
def _bi_dir_rnn(self):
"""
双向RNN:每个cell在输出的时候进行dropout
:return:
"""
cell_fw = self._witch_cell()
cell_bw = self._witch_cell()
if self.dropout_rate is not None:
cell_bw = rnn.DropoutWrapper(cell_bw, output_keep_prob=self.dropout_rate)
cell_fw = rnn.DropoutWrapper(cell_fw, output_keep_prob=self.dropout_rate)
return cell_fw, cell_bw
def blstm_layer(self, embedding_chars):
"""
使用多个cell扩展成一个lstm层
两个lstm层组装成blstm
:return:
"""
with tf.variable_scope('rnn_layer'):
cell_fw, cell_bw = self._bi_dir_rnn()
if self.num_layers > 1:
cell_fw = rnn.MultiRNNCell([cell_fw] * self.num_layers, state_is_tuple=True)
cell_bw = rnn.MultiRNNCell([cell_bw] * self.num_layers, state_is_tuple=True)
outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, embedding_chars,
dtype=tf.float32)
outputs = tf.concat(outputs, axis=2)
return outputs
def project_bilstm_layer(self, lstm_outputs, name=None):
"""
处理blstm的输出,先经过一个全连接层,然后经过一个logits
hidden layer between lstm layer and logits
:param lstm_outputs: [batch_size, num_steps, emb_size]
:return: [batch_size, num_steps, num_tags]
"""
with tf.variable_scope("project" if not name else name):
with tf.variable_scope("hidden"):
W = tf.get_variable("W", shape=[self.hidden_unit * 2, self.hidden_unit],
dtype=tf.float32, initializer=self.initializers.xavier_initializer())
b = tf.get_variable("b", shape=[self.hidden_unit], dtype=tf.float32,
initializer=tf.zeros_initializer())
output = tf.reshape(lstm_outputs, shape=[-1, self.hidden_unit * 2])
hidden = tf.tanh(tf.nn.xw_plus_b(output, W, b))
# project to score of tags
with tf.variable_scope("logits"):
W = tf.get_variable("W", shape=[self.hidden_unit, self.num_labels],
dtype=tf.float32, initializer=self.initializers.xavier_initializer())
b = tf.get_variable("b", shape=[self.num_labels], dtype=tf.float32,
initializer=tf.zeros_initializer())
pred = tf.nn.xw_plus_b(hidden, W, b)
return tf.reshape(pred, [-1, self.seq_length, self.num_labels])
里面重要的几个函数:
1、rnn.LSTMCell
num_units: int, The number of units in the LSTM cell 网络的“宽度”
cell_clip:(可选) 一个float类型的值,通常是5.0,如果提供,则在单元输出到激活函数之前通过该值剪辑单元状态。
initializer: (可选)用于权重参数的初始化器。、
num_proj: (可选)一个int型的数值,投影矩阵的输出维数。如果没有,则不执行投影。(就是我们平时在rnn输出上加一个全连接网络,这里就是那个全连接网络的维度)
state_is_tuple:默认为True,接受状态和返回状态是(c_state,m_state)元组。如果为False,则沿列轴连接它们,只返回一个concate([c_state,m_state],axis=-1)
2、rnn.DropoutWrapper
使用dropout的方法在rnn中和cnn不同,在rnn中进行dropout时,对于rnn的部分不进行dropout,也就是说从t-1时候的状态传递到t时刻进行计算时,这个中间不进行memory的dropout;仅在同一个t时刻中,多层cell之间传递信息的时候进行dropout,
有input_keep_prob和output_keep_prob,使用DropoutWrapper之后,是希望input传入cell时dropout input信息,就设置input_keep_prob,则传入到cell是部分input;也可以cell的output的一部分部分作为下一层cell的input,就设置output_keep_prob。也就是说Dropout是网络层与层之间的Dropout,比如输入层与LSTM1层、LSTM1层与LSTM2层);但在同一个层,T时刻与T+1时刻是不会Dropout的。
tf.nn.dropout(output_layer, keep_prob=0.9)
3、rnn.MultiRNNCell
由多个简单的cells组成的RNN cell。用于构建多层循环神经网络。
__init__(cells, state_is_tuple=True)
cells:rnn cell 的list
state_is_tuple:true,状态Ct和ht就是分开记录,放在一个tuple中,接受和返回的states是n-tuples,其中n=len(cells),False,states是concatenated沿着列轴.(可选项)
4、tf.nn.bidirectional_dynamic_rnn
bidirectional_dynamic_rnn(
cell_fw, # 前向RNN
cell_bw, # 后向RNN
inputs, # 输入
sequence_length=None,# 输入序列的实际长度(可选,默认为输入序列的最大长度)
initial_state_fw=None, # 前向的初始化状态(可选)
initial_state_bw=None, # 后向的初始化状态(可选)
dtype=None, # 初始化和输出的数据类型(可选)
parallel_iterations=None,
swap_memory=False,
time_major=False,
# 决定了输入输出tensor的格式:如果为true, 向量的形状必须为 `[max_time, batch_size, depth]`.
# 如果为false, tensor的形状必须为`[batch_size, max_time, depth]`.
scope=None
)