def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
labels, num_labels, use_one_hot_embeddings):
"""Creates a classification model."""
model = modeling.BertModel(
config=bert_config,
is_training=is_training,
input_ids=input_ids,
input_mask=input_mask,
token_type_ids=segment_ids,
use_one_hot_embeddings=use_one_hot_embeddings)
# In the demo, we are doing a simple classification task on the entire
# segment.
#
# If you want to use the token-level output, use model.get_sequence_output()
# instead.
output_layer = model.get_pooled_output() # 句子首词(CLS)对应的隐藏层输出编码
# 要拿到所有词(字)的编码,要用model.get_sequence_output()
hidden_size = output_layer.shape[-1].value
output_weights = tf.get_variable(
"output_weights", [num_labels, hidden_size],
initializer=tf.truncated_normal_initializer(stddev=0.02)) # 创建output_layer对应的权重,相当于创建全连接层(偏置项后面加)
output_bias = tf.get_variable(
"output_bias", [num_labels], initializer=tf.zeros_initializer()) # 即偏置项
with tf.variable_scope("loss"):
if is_training:
# I.e., 0.1 dropout
output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
logits = tf.matmul(output_layer, output_weights, transpose_b=True)
logits = tf.nn.bias_add(logits, output_bias) # 和上一行代码这两步运算其实就是全连接层
probabilities = tf.nn.softmax(logits, axis=-1)
log_probs = tf.nn.log_softmax(logits, axis=-1)
one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
# 到此为止,因该是生层了一行,[batch_size, per_loss]
loss = tf.reduce_mean(per_example_loss) # 应该是 mean loss of a batch
return (loss, per_example_loss, logits, probabilities)
如果想在bert后面自己加层,可以在上面的模型创建模块内操作。
4008

被折叠的 条评论
为什么被折叠?



