背景:
最近在做智能对话项目,用triton进行模型的部署和管理。
triton 除了部署模型外,还支持.py文件的推理。根据项目需求,需要将自定的python代码,作为模型部署到triton中,且模型的输入是文字。输出的结果是分词结果。
准备:
1.triton 21.12镜像
2.容器中安装 LAC库。百度分词库。
pip install lac -i https://mirror.baidu.com/pypi/simple
操作:
1、在模型仓库下,创建如图所示的目录结构;
lac – 模型名称,在配置文件,客户端输入时,都需要指定
model.py – 服务端,具体的代码实现
config.pbtxt – 模型的配置文件
2、config.bptxt
name: "lac" # 模型名称
backend: "python"
input [
{
name: "INPUT0" #输入名称
data_type: TYPE_STRING #当输入为字符串时,配置该格式
dims: [ 1 ] #输入的维度为一维,即字符串
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_STRING #输出格式
dims: [ 1 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3.model.py #名称最好用model
import json
from LAC import LAC
import numpy as np
import triton_python_backend_utils as pb_utils
import logging
logger = logging.getLogger("test_logger")
logger.setLevel(logging.INFO)
test_log = logging.FileHandler("/models/lac/test.log","a",encoding='utf-8')
test_log.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(filename)s - line:%(lineno)d - %(levelname)s - %(message)s -%(process)s')
test_log.setFormatter(formatter)
logger.addHandler(test_log)
class TritonPythonModel:
def initialize(self, args):
self.model_config = model_config = json.loads(args['model_config'])
output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
self.output0_dtype = pb_utils.triton_string_to_numpy(output0_config['data_type'])
self.lac = LAC(mode="lac")
def execute(self, requests):
output0_dtype = self.output0_dtype
lac = self.lac
lac.load_customization("/models/lac/custom.txt",sep=None)
responses = []
for request in requests:
in_0 = pb_utils.get_input_tensor_by_name(request, 'INPUT0')
in_0 = in_0.as_numpy() #获取输入字符串的numpy格式数据
text = in_0[0].decode("utf-8") #输入的第0个字符,即传入的数据,该数据是客户端编码的格式,需要解码。
logger.info("text:%s",text)
out_0 = lac.run(text)
# text = "今天天气怎么样"
# print("in_0",in_0)
# out_0 = text
# out_0 = [i.decode("utf-8") for i in out_0]
out_0 = np.array(out_0)
out_tensor_0 = pb_utils.Tensor('OUTPUT0', out_0.astype(output0_dtype))
inference_response = pb_utils.InferenceResponse(output_tensors=[out_tensor_0])
responses.append(inference_response)
return responses
def finalize(self):
print('Cleaning up...')
4.client.py #客户端调用
import numpy as np
import tritonclient.http as httpclient
if __name__ == '__main__':
triton_client = httpclient.InferenceServerClient(url='localhost:8004')
inputs = []
inputs.append(httpclient.InferInput('INPUT0', [1], "BYTES")) #字符串的数据类型采用 BYTES,维度和配置文件一致
input_data0 = "今天济南天气怎么样"
input_data0 = np.array([input_data0.encode("utf-8")],dtype=np.object_)
inputs[0].set_data_from_numpy(input_data0)
outputs = []
outputs.append(httpclient.InferRequestedOutput('OUTPUT0', binary_data=False))
results = triton_client.infer(model_name='lac', inputs=inputs, outputs=outputs)
output_data0 = results.as_numpy('OUTPUT0')
print("input:",input_data0)
print("output:",output_data0)
# print("output:",output_data0)
5.通过docker镜像启动服务
docker run --gpus=1 --rm -p8004:8000 -p8005:8001 -p8006:8002 -v /general-user/ai/triton/model_repository:/models nvcr.io/nvidia/tritonserver:21.12-py3 tritonserver --model-repository=/models --strict-model-config=true
/general-user/ai/triton/model_repository:/models
指定本地仓库和容器地址进行映射
6、输出结果