所需依赖
tensorflow==1.14.0
tensorflow-serving-api==1.14.0
grpcio==1.33.2
安装docker
相关环境已经安装完成,下面开始进行模型部署
网上下载serving镜像:
docker pull tensorflow/serving:latest-devel
docker image ls
# clone tensorflow/serving 官网有例子
git clone https://github.com/tensorflow/serving
# Location of demo models
TESTDATA="$(pwd)/serving/tensorflow_serving/servables/tensorflow/testdata"
# testdata目录中00000123 为版本号, 热更新用
# 包含saved_model.pb文件与variables 目录
# Start TensorFlow Serving container and open the REST API port
# 8501(可变):8501(docker中端口)
# /models/half_plus_two 与 MODEL_NAME=half_plus_two 需相同
docker run -t --rm -p 8501:8501 \
-v "$TESTDATA/saved_model_half_plus_two_cpu:/models/half_plus_two" \
-e MODEL_NAME=half_plus_two \
tensorflow/serving &
# Query the model using the predict API
curl -d '{"instances": [1.0, 2.0, 5.0]}' \
-X POST http://localhost:8501/v1/models/half_plus_two:predict
# Returns => { "predictions": [2.5, 3.0, 4.5] }
使用curl命令来查看一下启动的服务状态,也可以看到提供服务的模型版本以及模型状态。
curl http://localhost:8501/v1/models/half_plus_two
出现如下结果则为成功
训练模型导出为pd文件,导出目录中包含saved_model.pb文件与variables 目录
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import csv
import os
import sys
import tensorflow as tf
import modeling
flags = tf.flags
FLAGS = flags.FLAGS
# 修改训练好的模型文件路径model.ckpt-56875.data-00000-of-00001
flags.DEFINE_string("data_path", 'output', "saved model path")
flags.DEFINE_string("labels_num", '69', "number of your labels")
# 导出目录
flags.DEFINE_string("export_path", 'exported/2', "savedModel export path")
flags.DEFINE_string("max_seq_length", "350", "max sequence length")
# 指定自己的chinese_L-12_H-768_A-12 文件路径
flags.DEFINE_string("bert_path", "/data/bert/chinese_L-12_H-768_A-12", "bert path")
flags.DEFINE_integer("num_label", None, "label number")
class ModelTransfer(object):
def __init__(self, max_seq_length=FLAGS.max_seq_length):
self.max_seq_length = max_seq_length
self.labels_num = int(FLAGS.labels_num)
self.bert_config_file = os.path.join(FLAGS.bert_path, 'bert_config.json')
def _create_model(self, bert_config, is_training, input_ids, input_mask, segment_ids,
labels, num_labels, use_one_hot_embeddings):
# 此方法直接从训练源码中copy
"""Creates a classification model."""
model = modeling.BertModel(
config=bert_config,
is_training=is_training,
input_ids=input_ids,
input_mask=input_mask,
token_type_ids=segment_ids,
use_one_hot_embeddings=use_one_hot_embeddings)
output_layer = model.get_pooled_output()
hidden_size = output_layer.shape[-1].value
output_weights = tf.get_variable(
"output_weights", [num_labels, hidden_size],
initializer=tf.truncated_normal_initializer(stddev=0.02))
output_bias = tf.get_variable(
"output_bias", [num_labels], initializer=tf.zeros_initializer())
with tf.variable_scope("loss"):
if is_training:
# I.e., 0.1 dropout
output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
logits = tf.matmul(output_layer, output_weights, transpose_b=True)
logits = tf.nn.bias_add(logits, output_bias)
probabilities = tf.nn.sigmoid(logits)
labels = tf.cast(labels, tf.float32)
tf.logging.info("num_labels:{};logits:{};labels:{}".format(num_labels, logits, labels))
per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)
loss = tf.reduce_mean(per_example_loss)
return (loss, per_example_loss, logits, probabilities)
def transfer(self):
gpu_config = tf.ConfigProto()
gpu_config.gpu_options.allow_growth = True
sess = tf.Session(config=gpu_config)
print("going to restore checkpoint")
bert_config = modeling.BertConfig.from_json_file(self.bert_config_file)
# 指定输入 第一维指定为None 后续可实现批量预测
input_ids = tf.placeholder(tf.int32, [None, self.max_seq_length], name="input_ids")
input_mask = tf.placeholder(tf.int32, [None, self.max_seq_length], name="input_mask")
segment_ids = tf.placeholder(tf.int32, [None, self.max_seq_length], name="segment_ids")
label_ids = tf.placeholder(tf.int32, [None, self.labels_num], name="label_ids")
total_loss, per_example_loss, logits, probabilities = self._create_model(
bert_config, False, input_ids, input_mask, segment_ids,
label_ids, self.labels_num, False)
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint(FLAGS.data_path))
tf.saved_model.simple_save(sess,
FLAGS.export_path,
inputs={
'label_ids': label_ids,
'input_ids': input_ids,
'input_mask': input_mask,
'segment_ids': segment_ids
},
outputs={"probabilities": probabilities})
print('savedModel export finished')
if __name__ == '__main__':
# path of model file and bert_config.json file
flags.mark_flag_as_required("data_path")
# export model saved path
flags.mark_flag_as_required("export_path")
flags.mark_flag_as_required("labels_num")
ModelTransfer().transfer()
- docker 运行导出的pd模型
8500为GRPC方式访问端口, 8501为RESTful方式访问端口
docker run -t --rm -p 9001:8500 -p 9000:8501 \
-v "/data/Multi_Label_Classifier/exported:/models/test" \
-e MODEL_NAME=test \
tensorflow/serving &
- 在写服务之前,需要明确模型的名字、输入、输出等。我们使用如下命令可以看到Docker中模型的基本信息。
# 我的物理机端口指定为9000,
curl http://localhost:9000/v1/models/test/metadata
- RESTful方式访问
def build_pb_model_input(self, line):
# 构造模型请求格式
"""
:param line: a list. element is: [text_a,text_b]
:return: a list. example is: [request1, request2]
"""
if not isinstance(line, list):
raise ValueError("List type required: %s" % (type(line)))
predict_list = []
for index, l in enumerate(line):
example= InputExample(guid=index, text_a=l, text_b=None, label=self.label_list[0])
feature = convert_single_example(index, example, self.label_list, FLAGS.max_seq_length, self.tokenizer)
input_ids = np.reshape([feature.input_ids], (1,FLAGS.max_seq_length))
input_mask = np.reshape([feature.input_mask], (1,FLAGS.max_seq_length))
segment_ids = np.reshape([feature.segment_ids], (FLAGS.max_seq_length))
label_ids =[feature.label_id]
predict_request = {"input_ids":input_ids[0], "input_mask":input_mask[0], "segment_ids":segment_ids, "label_ids":label_ids[0]}
predict_list.append(predict_request)
return predict_list
def predict_online(self, line):
"""
do online prediction. each time make prediction for one instance.
you can change to a batch if you want.
:param line: a list. element is: [text_a,text_b]
:return:
"""
predict_list = self.build_pb_model_input(line)
server_url = 'http://localhost:9000/v1/models/test:predict'
#server_url = 'http://localhost:8502/v1/models/test:predict'
param = {"instances": predict_list}
param = json.dumps(param, cls=NumpyEncoder)
result = requests.post(server_url, data=param)
res = json.loads(result.text)
- grpc访问
import grpc
import tensorflow as tf
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc
def predict_online_grpc(self, line):
if not isinstance(line, list):
raise ValueError("List type required: %s" % (type(line)))
predict_list = []
for index, l in enumerate(line):
example= InputExample(guid=index, text_a=l, text_b=None, label=self.label_list[0])
feature = convert_single_example(index, example, self.label_list, FLAGS.max_seq_length, self.tokenizer)
input_ids = np.reshape([feature.input_ids], (1,FLAGS.max_seq_length))
input_mask = np.reshape([feature.input_mask], (1,FLAGS.max_seq_length))
segment_ids = np.reshape([feature.segment_ids], (FLAGS.max_seq_length))
label_ids =[feature.label_id]
predict_list.append([input_ids[0], input_mask[0], segment_ids])#, label_ids[0]])
predict_list = np.array(predict_list).astype(dtype=np.int32)
channel = grpc.insecure_channel(FLAGS.server)
stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
request = predict_pb2.PredictRequest()
request.model_spec.name = "test"
request.model_spec.signature_name = "serving_default"
request.inputs['input_ids'].CopyFrom(
tf.contrib.util.make_tensor_proto(predict_list[:,0]))
request.inputs['input_mask'].CopyFrom(
tf.contrib.util.make_tensor_proto(predict_list[:,1]))
request.inputs['segment_ids'].CopyFrom(
tf.contrib.util.make_tensor_proto(predict_list[:,2]))
request.inputs['label_ids'].CopyFrom(
tf.contrib.util.make_tensor_proto([label_ids])) # label_ids 任务维度对结果均没影响
result = stub.Predict(request, 10.0) # 10 secs timeout