一般一句话往往有多个实体,而核心事件只有一个。因此围绕一个事件的核心主体提取是一个值得思考和学习的问题。
在特定的场景下,我们认为事件是可枚举的,因此使用多分类任务就可以实现对事件的预测。
第一步,利用bert进行文本分类
output_layer = model.get_pooled_output()#bert得到句向量
hidden_size = output_layer.shape[-1].value
output_weights = tf.get_variable(
"output_weights", [num_labels, hidden_size],
initializer=tf.truncated_normal_initializer(stddev=0.02))
output_bias = tf.get_variable(
"output_bias", [num_labels], initializer=tf.zeros_initializer())
with tf.variable_scope("loss"):
if is_training:
# I.e., 0.1 dropout
output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
# 全连接网络
logits_wx = tf.matmul(output_layer, output_weights, transpose_b=True)
logits = tf.nn.bias_add(logits_wx, output_bias)
# sigmoid计算概率
probabilities = tf.sigmoid(logits)
label_ids = tf.cast(labels, tf.float32)
# 损失函数
per_example_loss = tf.reduce_sum(
tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
labels=label_ids), axis=-1)
loss = tf.reduce_mean(per_example_loss)
第二步:利用bert进行ner
(一):是将第一步中的预测结果作为模型的输入:
tokens_b = [text_predicate] * len(text_token) #将第一部的预测作为bert输入中的第二句话
predicate_id = predicate_label_map[text_predicate] # 得到第一步预测结果的id
"""
第一句话(句子)
"""
for token, label in zip(text_token, token_label):
tokens.append(token)
segment_ids.append(0)
token_label_ids.append(token_label_map[label])
tokens.append("[SEP]")
segment_ids.append(0)
token_label_ids.append(token_label_map["[SEP]"])
input_ids = tokenizer.convert_tokens_to_ids(tokens)
"""
第二句话
"""
for token in tokens_b:
input_ids.append(predicate_id) #将预测结果的id赋值给第二句话的每个字
segment_ids.append(1)
token_label_ids.append(token_label_map["[category]"])#标签打成"种类"
#最后加入SEP结尾标志
input_ids.append(tokenizer.convert_tokens_to_ids(["[SEP]"])[0]) #102
segment_ids.append(1)
token_label_ids.append(token_label_map["[SEP]"])
input_mask = [1] * len(input_ids) #mask为全1向量
#0补全
while len(input_ids) < max_seq_length:
input_ids.append(0)
input_mask.append(0)
segment_ids.append(0)
token_label_ids.append(0)
(二)ner的模型构造
token_label_output_layer = model.get_sequence_output()#得到bert序列化输出
token_label_hidden_size = token_label_output_layer.shape[-1].value
token_label_output_weight = tf.get_variable(
"token_label_output_weights", [num_token_labels, token_label_hidden_size],
initializer=tf.truncated_normal_initializer(stddev=0.02)
)
token_label_output_bias = tf.get_variable(
"token_label_output_bias", [num_token_labels], initializer=tf.zeros_initializer()
)
with tf.variable_scope("token_label_loss"):
if is_training:
token_label_output_layer = tf.nn.dropout(token_label_output_layer, keep_prob=0.9)
token_label_output_layer = tf.reshape(token_label_output_layer, [-1, token_label_hidden_size])
# 全连接层
token_label_logits = tf.matmul(token_label_output_layer, token_label_output_weight, transpose_b=True)
token_label_logits = tf.nn.bias_add(token_label_logits, token_label_output_bias)
token_label_logits = tf.reshape(token_label_logits, [-1, FLAGS.max_seq_length, num_token_labels])
token_label_log_probs = tf.nn.log_softmax(token_label_logits, axis=-1)
token_label_one_hot_labels = tf.one_hot(token_label_ids, depth=num_token_labels, dtype=tf.float32)# size = (seq_len, num_token_labels)
# 注意这里是点乘
token_label_per_example_loss = -tf.reduce_sum(token_label_one_hot_labels * token_label_log_probs, axis=-1)
token_label_loss = tf.reduce_sum(token_label_per_example_loss)
token_label_probabilities = tf.nn.softmax(token_label_logits, axis=-1)
token_label_predictions = tf.argmax(token_label_probabilities, axis=-1)