【tensorflow】docker 部署模型 grpc，RESTful 模型上线部署

最新推荐文章于 2024-09-04 16:03:59 发布

cy_dream

最新推荐文章于 2024-09-04 16:03:59 发布

阅读量932

点赞数 1

分类专栏：深度学习 Python BERT

本文链接：https://blog.csdn.net/qq_33304418/article/details/110494620

版权

Python 同时被 3 个专栏收录

16 篇文章 0 订阅

订阅专栏

深度学习

2 篇文章 0 订阅

订阅专栏

BERT

2 篇文章 1 订阅

订阅专栏

所需依赖

tensorflow==1.14.0

tensorflow-serving-api==1.14.0

grpcio==1.33.2

安装docker

相关环境已经安装完成，下面开始进行模型部署

网上下载serving镜像：

docker pull tensorflow/serving:latest-devel

docker image ls

首先运行官网例子链接 https://github.com/tensorflow/serving

# clone tensorflow/serving 官网有例子
git clone https://github.com/tensorflow/serving

# Location of demo models
TESTDATA="$(pwd)/serving/tensorflow_serving/servables/tensorflow/testdata"

# testdata目录中00000123 为版本号, 热更新用
# 包含saved_model.pb文件与variables 目录


# Start TensorFlow Serving container and open the REST API port
# 8501(可变):8501(docker中端口)
# /models/half_plus_two 与 MODEL_NAME=half_plus_two 需相同

docker run -t --rm -p 8501:8501 \
    -v "$TESTDATA/saved_model_half_plus_two_cpu:/models/half_plus_two" \
    -e MODEL_NAME=half_plus_two \
    tensorflow/serving &

# Query the model using the predict API
curl -d '{"instances": [1.0, 2.0, 5.0]}' \
    -X POST http://localhost:8501/v1/models/half_plus_two:predict

# Returns => { "predictions": [2.5, 3.0, 4.5] }

使用curl命令来查看一下启动的服务状态，也可以看到提供服务的模型版本以及模型状态。

curl http://localhost:8501/v1/models/half_plus_two

出现如下结果则为成功

训练模型导出为pd文件，导出目录中包含saved_model.pb文件与variables 目录

# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import csv
import os
import sys
import tensorflow as tf

import modeling

flags = tf.flags
FLAGS = flags.FLAGS

# 修改训练好的模型文件路径model.ckpt-56875.data-00000-of-00001
flags.DEFINE_string("data_path", 'output', "saved model path")

flags.DEFINE_string("labels_num", '69', "number of your labels")

# 导出目录
flags.DEFINE_string("export_path", 'exported/2', "savedModel export path")

flags.DEFINE_string("max_seq_length", "350", "max sequence length")

# 指定自己的chinese_L-12_H-768_A-12 文件路径
flags.DEFINE_string("bert_path", "/data/bert/chinese_L-12_H-768_A-12", "bert path")

flags.DEFINE_integer("num_label", None, "label number")

class ModelTransfer(object):
    def __init__(self, max_seq_length=FLAGS.max_seq_length):
        self.max_seq_length = max_seq_length
        self.labels_num = int(FLAGS.labels_num)
        self.bert_config_file = os.path.join(FLAGS.bert_path, 'bert_config.json')
      
    def _create_model(self, bert_config, is_training, input_ids, input_mask, segment_ids,
                      labels, num_labels, use_one_hot_embeddings):
        # 此方法直接从训练源码中copy
        """Creates a classification model."""
        model = modeling.BertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=segment_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        output_layer = model.get_pooled_output()
        hidden_size = output_layer.shape[-1].value
        output_weights = tf.get_variable(
            "output_weights", [num_labels, hidden_size],
            initializer=tf.truncated_normal_initializer(stddev=0.02))

        output_bias = tf.get_variable(
            "output_bias", [num_labels], initializer=tf.zeros_initializer())
        with tf.variable_scope("loss"):
            if is_training:
                # I.e., 0.1 dropout
                output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

            logits = tf.matmul(output_layer, output_weights, transpose_b=True)
            logits = tf.nn.bias_add(logits, output_bias)

            probabilities = tf.nn.sigmoid(logits)
            labels = tf.cast(labels, tf.float32)
            tf.logging.info("num_labels:{};logits:{};labels:{}".format(num_labels, logits, labels))
            per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)
            loss = tf.reduce_mean(per_example_loss)

            return (loss, per_example_loss, logits, probabilities)


    def transfer(self):
        gpu_config = tf.ConfigProto()
        gpu_config.gpu_options.allow_growth = True
        sess = tf.Session(config=gpu_config)
        print("going to restore checkpoint")
        bert_config = modeling.BertConfig.from_json_file(self.bert_config_file)
        # 指定输入 第一维指定为None 后续可实现批量预测
        input_ids = tf.placeholder(tf.int32, [None, self.max_seq_length], name="input_ids")
        input_mask = tf.placeholder(tf.int32, [None, self.max_seq_length], name="input_mask")
        segment_ids = tf.placeholder(tf.int32, [None, self.max_seq_length], name="segment_ids")
        label_ids = tf.placeholder(tf.int32, [None, self.labels_num], name="label_ids")
        total_loss, per_example_loss, logits, probabilities = self._create_model(
            bert_config, False, input_ids, input_mask, segment_ids,
            label_ids, self.labels_num, False)
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(FLAGS.data_path))
        tf.saved_model.simple_save(sess,
                                   FLAGS.export_path,
                                   inputs={
                                       'label_ids': label_ids,
                                       'input_ids': input_ids,
                                       'input_mask': input_mask,
                                       'segment_ids': segment_ids
                                   },
                                   outputs={"probabilities": probabilities})
        print('savedModel export finished')


if __name__ == '__main__':
    # path of model file and bert_config.json file
    flags.mark_flag_as_required("data_path")
    # export model saved path
    flags.mark_flag_as_required("export_path")
    flags.mark_flag_as_required("labels_num")
    ModelTransfer().transfer()

docker 运行导出的pd模型

8500为GRPC方式访问端口， 8501为RESTful方式访问端口

docker run -t --rm -p 9001:8500 -p 9000:8501 \
-v "/data/Multi_Label_Classifier/exported:/models/test" \
-e MODEL_NAME=test \
tensorflow/serving &

在写服务之前，需要明确模型的名字、输入、输出等。我们使用如下命令可以看到Docker中模型的基本信息。

# 我的物理机端口指定为9000，

curl http://localhost:9000/v1/models/test/metadata

RESTful方式访问

def build_pb_model_input(self, line):
  # 构造模型请求格式
  """
  :param line: a list. element is: [text_a,text_b]
  :return: a list. example is: [request1, request2]
  """
  if not isinstance(line, list):
	raise ValueError("List type required: %s" % (type(line)))
  
  predict_list = []
  for index, l in enumerate(line): 
	example= InputExample(guid=index, text_a=l, text_b=None, label=self.label_list[0])
	feature = convert_single_example(index, example, self.label_list, FLAGS.max_seq_length, self.tokenizer)
	input_ids = np.reshape([feature.input_ids], (1,FLAGS.max_seq_length))
	input_mask = np.reshape([feature.input_mask], (1,FLAGS.max_seq_length))
	segment_ids =  np.reshape([feature.segment_ids], (FLAGS.max_seq_length))
	label_ids =[feature.label_id]
	predict_request = {"input_ids":input_ids[0], "input_mask":input_mask[0], "segment_ids":segment_ids, "label_ids":label_ids[0]}
	predict_list.append(predict_request)

  return predict_list

def predict_online(self, line):
  """
  do online prediction. each time make prediction for one instance.
  you can change to a batch if you want.
  :param line: a list. element is: [text_a,text_b]
  :return:
  """
  predict_list = self.build_pb_model_input(line)

  server_url = 'http://localhost:9000/v1/models/test:predict'
  #server_url = 'http://localhost:8502/v1/models/test:predict'

  param = {"instances": predict_list}
  param = json.dumps(param, cls=NumpyEncoder)
  result = requests.post(server_url, data=param)
  res = json.loads(result.text)

grpc访问

import grpc
import tensorflow as tf
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc


  
def predict_online_grpc(self, line):
    if not isinstance(line, list):
      raise ValueError("List type required: %s" % (type(line)))
    
    predict_list = []
    for index, l in enumerate(line): 
      example= InputExample(guid=index, text_a=l, text_b=None, label=self.label_list[0])
      feature = convert_single_example(index, example, self.label_list, FLAGS.max_seq_length, self.tokenizer)
      input_ids = np.reshape([feature.input_ids], (1,FLAGS.max_seq_length))
      input_mask = np.reshape([feature.input_mask], (1,FLAGS.max_seq_length))
      segment_ids =  np.reshape([feature.segment_ids], (FLAGS.max_seq_length))
      label_ids =[feature.label_id]
      predict_list.append([input_ids[0], input_mask[0], segment_ids])#, label_ids[0]])

    predict_list = np.array(predict_list).astype(dtype=np.int32)
    
    channel = grpc.insecure_channel(FLAGS.server)
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
    request = predict_pb2.PredictRequest()
    request.model_spec.name = "test"
    request.model_spec.signature_name = "serving_default"

    request.inputs['input_ids'].CopyFrom(
        tf.contrib.util.make_tensor_proto(predict_list[:,0]))
    request.inputs['input_mask'].CopyFrom(
        tf.contrib.util.make_tensor_proto(predict_list[:,1]))
    request.inputs['segment_ids'].CopyFrom(
        tf.contrib.util.make_tensor_proto(predict_list[:,2]))
    request.inputs['label_ids'].CopyFrom(
        tf.contrib.util.make_tensor_proto([label_ids])) # label_ids 任务维度对结果均没影响
    result = stub.Predict(request, 10.0)  # 10 secs timeout