bert4keras模型转onnx

hailongzhang26

已于 2024-02-01 11:37:32 修改

阅读量2k

点赞数 3

分类专栏：深度学习模型转换文章标签： bert keras tensorflow

于 2021-07-20 16:30:18 首次发布

本文链接：https://blog.csdn.net/hailongzhang26/article/details/118937909

版权

深度学习模型转换专栏收录该内容

1 篇文章 1 订阅

订阅专栏

该博客介绍了如何将基于BERT4Keras的RoFormer模型转换为ONNX格式，首先生成Keras的saved-model，然后使用tf2onnx工具进行转换。转换过程中需要注意TensorFlow和相关库的版本匹配。转换完成后，通过ONNXRuntime进行预测，展示了一个简单的ONNX模型预测示例。

摘要由CSDN通过智能技术生成

bert4keras模型转onnx

https://kexue.fm/archives/8454

#! -*- coding: utf-8 -*-
import os
os.environ['TF_KERAS'] = '1'
from bert4keras.backend import keras
from bert4keras.models import build_transformer_model
from bert4keras.tokenizers import Tokenizer

'''
生成saved-model的时候
bert4keras==0.10.6
keras==2.3.1
tensorflow-gpu==1.15.4
tensorflow-hub==0.12.0
h5py==2.10.0
'''
'''
生成后将saved-model转onnx，我是直接pip install tensorflow==2.5.0
建议创建一个新环境
bert4keras==0.10.6
keras==2.3.1
tensorflow==2.5.0
h5py==3.1.0
keras-nightly==2.5.0.dev2021032900
tensorflow-estimator==2.5.0
tf2onnx==1.9.1
onnx==1.9.0
onnxruntime==1.8.0
然后
# python -m tf2onnx.convert --saved-model encoder_model_tf --output encoder_simbert.onnx --opset 13
# python -m tf2onnx.convert --saved-model generate_model_tf --output generate_simbert.onnx --opset 13
'''

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# 基本信息
maxlen = 128

# 模型配置
model_file = 'chinese_roformer-sim-char_L-12_H-768_A-12'
config_path = '{}/bert_config.json'.format(model_file)
checkpoint_path = '{}/bert_model.ckpt'.format(model_file)
# checkpoint_path = None
dict_path = '{}/vocab.txt'.format(model_file)

# 建立分词器
tokenizer = Tokenizer(dict_path, do_lower_case=True)

roformer = build_transformer_model(
    config_path,
    checkpoint_path,
    model='roformer',
    application='unilm',
    with_pool='linear'
)
# 可以加载自己训练后的模型
output_best_model = 'model_path/best_model.weights'
if checkpoint_path is None:
    roformer.load_weights(output_best_model)

# 向量生成模型
# encoder = keras.models.Model(roformer.inputs, roformer.outputs[0])
# encoder.save('model_path/encoder_model_tf', save_format='tf')

# 解码器模型
seq2seq = keras.models.Model(roformer.inputs, roformer.outputs[1])
outputs = [
    keras.layers.Lambda(lambda x: x[:, -1])(output)
    for output in seq2seq.outputs
]
generate_model = keras.models.Model(seq2seq.inputs, outputs)
generate_model.save('model_path/generate_model_tf', save_format='tf')

# import keras2onnx
# onnx_model = keras2onnx.convert_keras(encoder)
# keras2onnx.save_model(onnx_model, 'bert-sim.onnx')

得到 saved_model 后再转onnx

'''
经过验证这个时候需要tensorflow==2.5.0  
安装pip install tensorflow==2.5.0  
'''
python -m tf2onnx.convert --saved-model encoder_model_tf --output simbert.onnx --opset 13

# 预测的时候还是用的tensorflow-gpu==1.15.4
如果是在一个环境里面操作，这里装两个tensorflow后再用bert4keras可能会出现问题，
uninstall tensorflow 后再install; h5py的版本也要注意。
pip uninstall keras-nightly
pip uninstall tensorflow
pip uninstall -y tensorflow-gpu
pip install keras==2.3.1
pip install tensorflow-gpu==1.15.4
pip install h5py==2.10.0
--opset 10 也是能保存成功，但是预测的时候出错了。

用Netron查看simbert.onnx模型，

INPUTS
name: Input-Segment
type: float32[unk__3503,unk__3504]

name: Input-Token
type: float32[unk__3505,unk__3506]

import onnxruntime
import numpy as np
from model_path import tokenization
from model_path.bert_input_process import *

class SimBertONNX:
    def __init__(self, weights, max_seg_len=128, vocab_file='chinese_roformer-sim-char_L-12_H-768_A-12/vocab.txt'):
        self.sess = onnxruntime.InferenceSession(weights)
        self.max_seg_len = max_seg_len
        self.tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=True)

    def predict(self, sent):
        feature = format_ner_input(sent, max_seq_length=self.max_seg_len, tokenizer=self.tokenizer)
        token = [i for i in feature['input_ids'][0] if i != 0]
        input_dict = {'Input-Token': [token],  "Input-Segment": [[0]*len(token)]}
        # {'Input-Token': [[101, 4636, 1668, 4686, 6994, 102]], 'Input-Segment': [[0, 0, 0, 0, 0, 0]]}
        result = self.sess.run(None, input_dict)[0][0]
        return result


pb_path = "model_path/simbert13.onnx"
simbert = SimBertONNX(pb_path)
print(simbert.predict('科学空间'))
[-1.11289009e-01 -5.35200179e-01  1.08341195e-01 -1.98119685e-01
 -5.59085608e-02  4.03575003e-01  1.67680249e-01 -2.41547719e-01
  4.21358049e-01 -2.02915221e-01 -1.85670257e-01 -2.22553447e-01
...

# bert_input_process

import numpy as np

def _truncate_seq_pair(tokens_a, tokens_b, max_length):
    while True:
        total_length = len(tokens_a) + len(tokens_b)
        if total_length <= max_length:
            break
        if len(tokens_a) > len(tokens_b):
            tokens_a.pop()
        else:
            tokens_b.pop()


class InputFeatures(object):
    def __init__(self, input_ids, input_mask, segment_ids, label_id, is_real_example=True):
        self.input_ids = input_ids
        self.input_mask = input_mask
        self.segment_ids = segment_ids
        self.label_id = label_id
        self.is_real_example = is_real_example


class PaddingInputExample(object):
    pass


class InputExample(object):
    def __init__(self, guid, text_a, text_b=None, label=None):
        self.guid = guid
        self.text_a = text_a
        self.text_b = text_b
        self.label = label


def convert_ner_example(example, max_seq_length, tokenizer):
    if isinstance(example, PaddingInputExample):
        return InputFeatures(
            input_ids=[0] * max_seq_length,
            input_mask=[0] * max_seq_length,
            segment_ids=[0] * max_seq_length,
            label_id=[0] * max_seq_length,
            is_real_example=False)

    tokens_a = tokenizer.tokenize(example.text_a)
    tokens_b = None
    if example.text_b:
        tokens_b = tokenizer.tokenize(example.text_b)

    if tokens_b:
        _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)
    else:
        if len(tokens_a) > max_seq_length - 2:
            tokens_a = tokens_a[0:(max_seq_length - 2)]
    tokens = []
    segment_ids = []
    label_id = [0] * max_seq_length
    tokens.append("[CLS]")
    segment_ids.append(0)
    for token in tokens_a:
        tokens.append(token)
        segment_ids.append(0)
    tokens.append("[SEP]")
    segment_ids.append(0)

    if tokens_b:
        for token in tokens_b:
            tokens.append(token)
            segment_ids.append(1)
        tokens.append("[SEP]")
        segment_ids.append(1)

    input_ids = tokenizer.convert_tokens_to_ids(tokens)
    label_id[0] = input_ids[0]
    label_id[len(tokens) - 1] = input_ids[len(tokens) - 1]

    input_mask = [1] * len(input_ids)

    while len(input_ids) < max_seq_length:
        input_ids.append(0)
        input_mask.append(0)
        segment_ids.append(0)

    assert len(input_ids) == max_seq_length
    assert len(input_mask) == max_seq_length
    assert len(segment_ids) == max_seq_length

    feature = InputFeatures(
        input_ids=input_ids,
        input_mask=input_mask,
        segment_ids=segment_ids,
        label_id=label_id,
        is_real_example=True)
    return feature


def format_ner_input(text_a, text_b=None, gid="id", max_seq_length=128, tokenizer=None):
    predict_example = InputExample(gid, text_a, text_b, None)
    feature = convert_ner_example(predict_example, max_seq_length, tokenizer)
    return {
        "input_ids": [feature.input_ids],
        "input_mask": [feature.input_mask],
        "segment_ids": [feature.segment_ids],
        "label_ids": [feature.label_id],
    }


def format_simbert_input(text_a_list, max_seq_length=128, tokenizer=None):
    input_ids = []
    segment_ids = []
    for text_a in text_a_list:
        predict_example = InputExample(guid="id", text_a=text_a, text_b=None, label=None)
        feature = convert_ner_example(predict_example, max_seq_length, tokenizer)
        input_ids.append(feature.input_ids)
        segment_ids.append(feature.segment_ids)
    return input_ids, segment_ids


def softmax(x, axis=-1):
    x = x - x.max(axis=axis, keepdims=True)
    x = np.exp(x)
    return x / x.sum(axis=axis, keepdims=True)


class AutoRegressiveDecoder(object):
    def __init__(self, start_id, end_id, maxlen, minlen=1):
        self.start_id = start_id
        self.end_id = end_id
        self.maxlen = maxlen
        self.minlen = minlen
        self.models = {}
        if start_id is None:
            self.first_output_ids = np.empty((1, 0), dtype=int)
        else:
            self.first_output_ids = np.array([[self.start_id]])

    @staticmethod
    def wraps(default_rtype='probas', use_states=False):
        def actual_decorator(predict):
            def new_predict(self, inputs, output_ids, states, temperature=1, rtype=default_rtype):
                assert rtype in ['probas', 'logits']
                prediction = predict(self, inputs, output_ids, states)
                if not use_states:
                    prediction = (prediction, None)
                if default_rtype == 'logits':
                    prediction = (softmax(prediction[0] / temperature), prediction[1])
                elif temperature != 1:
                    probas = np.power(prediction[0], 1.0 / temperature)
                    probas = probas / probas.sum(axis=-1, keepdims=True)
                    prediction = (probas, prediction[1])
                if rtype == 'probas':
                    return prediction
                else:
                    return np.log(prediction[0] + 1e-12), prediction[1]

            return new_predict

        return actual_decorator

    def predict(self, inputs, output_ids, states=None):
        raise NotImplementedError

    def random_sample(self, inputs, n, topp=None, states=None, temperature=1, min_ends=1):
        inputs = [np.array([i]) for i in inputs]
        output_ids = self.first_output_ids
        results = []
        for step in range(self.maxlen):
            probas, states = self.predict(
                inputs, output_ids, states, temperature, 'probas'
            )  # 计算当前概率
            probas /= probas.sum(axis=1, keepdims=True)  # 确保归一化
            if step == 0:  # 第1步预测后将结果重复n次
                probas = np.repeat(probas, n, axis=0)
                inputs = [np.repeat(i, n, axis=0) for i in inputs]
                output_ids = np.repeat(output_ids, n, axis=0)
            if topp is not None:
                p_indices = probas.argsort(axis=1)[:, ::-1]  # 从高到低排序
                probas = np.take_along_axis(probas, p_indices, axis=1)  # 排序概率
                cumsum_probas = np.cumsum(probas, axis=1)  # 累积概率
                flag = np.roll(cumsum_probas >= topp, 1, axis=1)  # 标记超过topp的部分
                flag[:, 0] = False  # 结合上面的np.roll，实现平移一位的效果
                probas[flag] = 0  # 后面的全部置零
                probas /= probas.sum(axis=1, keepdims=True)  # 重新归一化
            sample_func = lambda p: np.random.choice(len(p), p=p)  # 按概率采样函数
            sample_ids = np.apply_along_axis(sample_func, 1, probas)  # 执行采样
            sample_ids = sample_ids.reshape((-1, 1))  # 对齐形状
            if topp is not None:
                sample_ids = np.take_along_axis(p_indices, sample_ids, axis=1)
            output_ids = np.concatenate([output_ids, sample_ids], 1)  # 更新输出
            end_counts = (output_ids == self.end_id).sum(1)  # 统计出现的end标记
            if output_ids.shape[1] >= self.minlen:  # 最短长度判断
                flag = (end_counts == min_ends)  # 标记已完成序列
                if flag.any():  # 如果有已完成的
                    for ids in output_ids[flag]:  # 存好已完成序列
                        results.append(ids)
                    flag = (flag == False)  # 标记未完成序列
                    inputs = [i[flag] for i in inputs]  # 只保留未完成部分输入
                    output_ids = output_ids[flag]  # 只保留未完成部分候选集
                    end_counts = end_counts[flag]  # 只保留未完成部分end计数
                    if len(output_ids) == 0:
                        break
        # 如果还有未完成序列，直接放入结果
        for ids in output_ids:
            results.append(ids)
        # 返回结果
        return results