1)加载ckpt模型
2)确定输出tensor名称,在bert中,cls的名称为:bert/pooler/dense/Tanh(而不是SoftMax)
3)存储为pb model
主代码:
def extract_bert_vector():
""" 抽取bert 768 特征向量
:return:
"""
OUTPUT_GRAPH = 'pb_model/bert_encoder.pb'
output_node = ["bert/pooler/dense/Tanh"]
ckpt_model = r'output'
bert_config_file = r'chinese_L-12_H-768_A-12/bert_config.json'
max_seq_length = 200
gpu_config = tf.ConfigProto()
gpu_config.gpu_options.allow_growth = True
sess = tf.Session(config=gpu_config)
graph = tf.get_default_graph()
with open(r'data/file_dict.json', 'r') as fr:
label_list = json.load(fr)
with graph.as_default():
print("going to restore checkpoint")
input_ids_p = tf.placeholder(tf.int32, [None, max_seq_length], name="input_ids")
input_mask_p = tf.placeholder(tf.int32, [None, max_seq_length], name="input_mask")
bert_config = modeling.BertConfig.from_json_file(bert_config_file)
(loss, per_example_loss, logits, probabilities) = create_model(
bert_config=bert_config, is_training=False, input_ids=input_ids_p, input_mask=input_mask_p,
segment_ids=None, labels=None, num_labels=len(label_list), use_one_hot_embeddings=False)
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint(ckpt_model))
graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, output_node)
with tf.gfile.GFile(OUTPUT_GRAPH, "wb") as f:
f.write(graph.SerializeToString())
print('extract vector pb model saved!')
768维度明显过高,采用白化处理,将768->256
代码如下:
def compute_kernel_bias(vecs, n_components=256):
"""计算kernel和bias
vecs.shape = [num_samples, embedding_size],
最后的变换:y = (x + bias).dot(kernel)
"""
mu = vecs.mean(axis=0, keepdims=True)
cov = np.cov(vecs.T)
u, s, vh = scipy.linalg.svd(cov)
W = np.dot(u, np.diag(1 / np.sqrt(s)))
return W[:, :n_components], -mu
def transform_and_normalize(vecs, kernel=None, bias=None):
""" 最终向量标准化
"""
if not (kernel is None or bias is None):
vecs = (vecs + bias).dot(kernel)
return vecs / (vecs**2).sum(axis=1, keepdims=True)**0.5
v_data = np.array(v_data)
kernel,bias=compute_kernel_bias(v_data,256)
v_data=transform_and_normalize(v_data, kernel=kernel, bias=bias)
该功能引用于:使用微调后的Bert模型做编码器进行文本特征向量抽取及特征降维 - 今夜无风 - 博客园 (cnblogs.com)你可能不需要BERT-flow:一个线性变换媲美BERT-flow - 科学空间|Scientific Spaces