bert部署

最新推荐文章于 2024-04-17 09:46:05 发布

pogevip111

最新推荐文章于 2024-04-17 09:46:05 发布

阅读量462

点赞数

分类专栏： NLP 文章标签：自然语言处理 tensorflow

本文链接：https://blog.csdn.net/Andrew_megemeta/article/details/105840998

版权

NLP 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

bert部署

服务端

保存模型

"""=========================EXPORT MODEL========================"""

def serving_input_receiver_fn():
    """An input receiver that expects a serialized tf.Example."""
    reciever_tensors = {
        "input_ids": tf.placeholder(dtype=tf.int64,
                                    shape=[1, FLAGS.max_seq_length])
    }
    features = {
        "input_ids": reciever_tensors['input_ids'],
        "input_mask": 1 - tf.cast(tf.equal(reciever_tensors['input_ids'], 0), dtype=tf.int64),
        "segment_ids": tf.zeros(dtype=tf.int64,
                                shape=[1, FLAGS.max_seq_length]),
        'label_ids': tf.zeros(dtype=tf.int64, shape=[1, 1])
    }
    return tf.estimator.export.ServingInputReceiver(features, reciever_tensors)

    estimator._export_to_tpu = False
    estimator.export_savedmodel(FLAGS.export_model_dir,serving_input_receiver_fn)
        """=========================EXPORT MODEL========================"""

这里定义了feature的格式是由这几个key组成的，reciever_tensors是服务端接受的格式

运行服务端

docker run --runtime=nvidia \
    -e CUDA_VISIBLE_DEVICES=1 \
    -p 8700:8500 -p 8701:8501 \
    --mount type=bind,source=~/saved_model,target=/models/qing\
    -e MODEL_NAME=qing \
    -t tensorflow/serving:latest-gpu

客户端

http方式连接

import tokenization
from extract_features import InputExample, convert_examples_to_features
import numpy as np
import requests
import os
import time

vocab_file = os.environ.get('vocab_file', '~/bert_model/chinese_L-12_H-768_A-12/vocab.txt')
max_token_len = os.environ.get('max_token_len', 128)


def preprocess(text):
    text_a = text
    example = InputExample(unique_id=None, text_a=text_a, text_b=None)
    tokenizer = tokenization.FullTokenizer(
        vocab_file=vocab_file, do_lower_case=True)
    feature = convert_examples_to_features([example], max_token_len, tokenizer)[0]
    input_ids = np.reshape([feature.input_ids], (1, max_token_len))
    return {
        "inputs": {"input_ids": input_ids.tolist()}
    }


if __name__ == '__main__':
    while True:
        text = input("Input test sentence:\n")
        start = time.time()
        resp = requests.post('http://localhost:8701/v1/models/qing:predict', json=preprocess(text))
        end = time.time()
        pro_0, pro_1 = resp.json()['outputs'][0]
        print(f"negative pro:{pro_0} positive pro:{pro_1} time consuming:{int((end - start) * 1000)}ms")

grpc方式连接

import numpy as np
import grpc
import tensorflow as tf
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc
import time


import tokenization
from extract_features import InputExample, convert_examples_to_features
import numpy as np
import requests
import os
import time

vocab_file = os.environ.get('vocab_file', '~/bert_model/chinese_L-12_H-768_A-12/vocab.txt')
max_token_len = os.environ.get('max_token_len', 128)


def preprocess(text):
    text_a = text
    example = InputExample(unique_id=None, text_a=text_a, text_b=None)
    tokenizer = tokenization.FullTokenizer(
        vocab_file=vocab_file, do_lower_case=True)
    feature = convert_examples_to_features([example], max_token_len, tokenizer)[0]
    input_ids = np.reshape([feature.input_ids], (1, max_token_len))
    return {
        "inputs": {"input_ids": input_ids.tolist()}
    }
    
def send(txt='你好啊'):
    channel = grpc.insecure_channel('localhost:8700')
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
    request = predict_pb2.PredictRequest()
    request.model_spec.name = 'qing'
    
    a = preprocess(txt)
    ids = a['inputs']['input_ids']
    i = np.array(ids, dtype=np.int64)
    request.inputs['input_ids'].CopyFrom(tf.contrib.util.make_tensor_proto(i))
    result = stub.Predict(request)
    return result
    
    
if __name__ == '__main__':
    channel = grpc.insecure_channel('localhost:8700')
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
    request = predict_pb2.PredictRequest()
    request.model_spec.name = 'qing'
    text = '你好啊'
    a = preprocess(text)
    ids = a['inputs']['input_ids']
    i = np.array(ids, dtype=np.int64)
    #i = np.zeros([1, 128], dtype=np.int64)
    request.inputs['input_ids'].CopyFrom(tf.contrib.util.make_tensor_proto(i))
    result = stub.Predict(request)
    print(result)

版本维护和模型热更新

刚才我们将模型保存在了"model/1"中，其中1是模型的版本号。如果我们的算法工程师研发出了更好的模型，此时我们并不需要将TensorFlow Serving重启，只需要将新模型发布在"model/新版本号"中，如"model/2"。TensorFlow Serving就会自动发布新版本的模型，客户端也可以请求新版本对应的API了。