def project_crf_layer(self, embedding_chars, name=None):
"""
hidden layer between input layer and logits
:param lstm_outputs: [batch_size, num_steps, emb_size]
:return: [batch_size, num_steps, num_tags]
"""
with tf.variable_scope("project" if not name else name):
with tf.variable_scope("logits"):
W = tf.get_variable("W", shape=[self.embedding_dims, self.num_labels],
dtype=tf.float32, initializer=self.initializers.xavier_initializer())
b = tf.get_variable("b", shape=[self.num_labels], dtype=tf.float32,
initializer=tf.zeros_initializer())
output = tf.reshape(self.embedded_chars,
shape=[-1, self.embedding_dims]) # [batch_size, embedding_dims]
pred = tf.tanh(tf.nn.xw_plus_b(output, W, b))
return tf.reshape(pred, [-1, self.seq_length, self.num_labels])
def crf_layer(self, logits):
"""
calculate crf loss
:param project_logits: [1, num_steps, num_tags]
:return: scalar loss
"""
with tf.variable_scope("crf_loss"):
trans = tf.get_variable(
"transitions",
shape=[self.num_labels, self.num_labels],
initializer=self.initializers.xavier_initializer())
if self.labels is None:
return None, trans
else:
log_likelihood, trans = tf.contrib.crf.crf_log_likelihood(
inputs=logits,
tag_indices=self.labels,
transition_params=trans,
sequence_lengths=self.lengths)
return tf.reduce_mean(-log_likelihood), trans