pt转onnx
此处的model在运行时替换为自己的网络结构即可
"""
如果保存的是模型参数
"""
import torch
import torchvision.models as models
torch_model = torch.load("test.pth") # pytorch模型加载
model = models.resnet50()
model.fc = torch.nn.Linear(2048, 4)
model.load_state_dict(torch_model)
batch_size = 1 #批处理大小
input_shape = (3, 244, 384) #输入数据,改成自己的输入shape
# #set the model to inference mode
model.eval()
x = torch.randn(batch_size, *input_shape) # 生成张量
export_onnx_file = "test.onnx" # 目的ONNX文件名
torch.onnx.export(model,
x,
export_onnx_file,
opset_version=10,
do_constant_folding=True, # 是否执行常量折叠优化
input_names=["input"], # 输入名
output_names=["output"], # 输出名
dynamic_axes={"input":{0:"batch_size"}, # 批处理变量
"output":{0:"batch_size"}})
一般为用上面的转换
"""
如果保存的是整个模型
"""
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.load("test.pth") # pytorch模型加载
batch_size = 1 #批处理大小
input_shape = (3, 244, 384) #输入数据,改成自己的输入shape
# #set the model to inference mode
model.eval()
x = torch.randn(batch_size, *input_shape) # 生成张量
x = x.to(device)
export_onnx_file = "test.onnx" # 目的ONNX文件名
torch.onnx.export(model,
x,
export_onnx_file,
opset_version=10,
do_constant_folding=True, # 是否执行常量折叠优化
input_names=["input"], # 输入名
output_names=["output"], # 输出名
dynamic_axes={"input":{0:"batch_size"}, # 批处理变量
"output":{0:"batch_size"}})
onn转trt模型
import onnx
import tensorrt as trt
# import sys
# sys.setrecursionlimit(500000)
def onnx_export_engine(model_name, workspace):
# path='weights/cam_fusion_net.onnx'
# filename='onnxmodel'
# model_name='breath_cls'
# model_name='facedect'
path='test2/'+model_name+'.onnx'
path = r'D:\project\T2M-GPT2\weights\T2M-GPT-vqvae.onnx'
#创建构建器
logger=trt.Logger(trt.Logger.ERROR)
builder=trt.Builder(logger)
#创建一个构建配置
config=builder.create_builder_config()
# config.max_workspace_size=workspace*1<<30
#创建网络定义
flag=(1<<int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
network=builder.create_network(flag)
#导入onnx模型
parser=trt.OnnxParser(network,logger)
if not parser.parse_from_file(str(path)):
raise RuntimeError(f'failed to load ONNX file: {onnx}')
inputs=[network.get_input(i) for i in range(network.num_inputs)]
# print('inputs',inputs)
outputs=[network.get_output(i) for i in range(network.num_outputs)]
# network.get_input(0).setAllowedFormats(int)
# network.get_input(1).setAllowedFormats(int)
profile = builder.create_optimization_profile()
# profile.set_shape('idx', (1, 1), (1, 20), (1, 55))# trans
profile.set_shape('input', (1, 1), (1, 20), (1, 55))# 此处为定义动态输入
# profile.set_shape("index", (1, ), (1, ), (1, ))
config.add_optimization_profile(profile)
# for inp in inputs:
# LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')
# for out in outputs:
# LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
#
# LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 else 32} engine in {f}')
# if builder.platform_has_fast_fp16:
#
# config.set_flag(trt.BuilderFlag.FP16)
engine_path=model_name+'.engine'
# with builder.build_engine(network,config) as engine:
with builder.build_serialized_network(network,config) as engine:
with open(engine_path,'wb') as t:
# t.write(engine.serialize())
t.write(engine)
print('转化完成')
if __name__ == '__main__':
# model_names = ['modified_stable_diffusion']
model_names = ['resnet']
for modelname in model_names:
onnx_export_engine(modelname, 4)
onnx推理
import time
import onnxruntime as rt
import numpy as np
import torch
import onnxruntime
# # 创建 ONNX Runtime InferenceSession
# sess = onnxruntime.InferenceSession(r'D:\project\T2M-GPT\models\T2M-GPT-trans.onnx', providers=[ 'CUDAExecutionProvider', 'CPUExecutionProvider'])
# 准备输入数据(PyTorch张量)
data = torch.randn(1, 512)
print("初始输入",data.shape,'numpy输入:',data.numpy().shape)
print("初始输入",torch.tensor([[1]]).shape,'numpy输入:',torch.tensor([[1]]).numpy().shape)
print('*' * 80)
sess = rt.InferenceSession(r'D:\project\T2M-GPT2\onnx\T2M-GPT-trans2.onnx', providers=[ 'CUDAExecutionProvider', 'CPUExecutionProvider'])
iutput_info = sess.get_inputs()
for output in iutput_info:
print("Iutput Name:", output.name)
print("Iutput Shape:", output.shape)
output_info = sess.get_outputs()
for output in output_info:
print("Output Name:", output.name)
print("Output Shape:", output.shape)
print('*' * 80)
input_names = [input_name.name for input_name in iutput_info ]
output_names = sess.get_outputs()[0].name
# print(len(sess.run([output_names], {input_name:data.astype(np.float32)})))
pred_onx= sess.run([output_names], {'input':data.numpy(), 'idx':torch.tensor([[256,256,417]]).numpy()})[0]#,'idx':torch.tensor([[256, 417, 266, 211, 399]]).numpy()
print("初始输出", pred_onx,pred_onx.shape, '输出numpy类型:', type(pred_onx))
print('*' * 80)
trt推理
from typing import Union, Optional, Sequence, Dict, Any
import torch
import tensorrt as trt
class TRTWrapper(torch.nn.Module):
def __init__(self, engine: Union[str, trt.ICudaEngine],
output_names: Optional[Sequence[str]] = None) -> None:
super().__init__()
self.engine = engine
if isinstance(self.engine, str):
with trt.Logger() as logger, trt.Runtime(logger) as runtime:
with open(self.engine, mode='rb') as f:
engine_bytes = f.read()
self.engine = runtime.deserialize_cuda_engine(engine_bytes)
self.context = self.engine.create_execution_context()
names = [_ for _ in self.engine]
input_names = list(filter(self.engine.binding_is_input, names))
self._input_names = input_names
self._output_names = output_names
if self._output_names is None:
output_names = list(set(names) - set(input_names))
self._output_names = output_names
def forward(self, inputs: Dict[str, torch.Tensor]):
assert self._input_names is not None
assert self._output_names is not None
bindings = [None] * (len(self._input_names) + len(self._output_names))
profile_id = 0
for input_name, input_tensor in inputs.items():
# check if input shape is valid
profile = self.engine.get_profile_shape(profile_id, input_name)
assert input_tensor.dim() == len(
profile[0]), 'Input dim is different from engine profile.'
for s_min, s_input, s_max in zip(profile[0], input_tensor.shape,
profile[2]):
assert s_min <= s_input <= s_max, \
'Input shape should be between ' \
+ f'{profile[0]} and {profile[2]}' \
+ f' but get {tuple(input_tensor.shape)}.'
idx = self.engine.get_binding_index(input_name)
# All input tensors must be gpu variables
assert 'cuda' in input_tensor.device.type
input_tensor = input_tensor.contiguous()
# if input_tensor.dtype == torch.long:
# input_tensor = input_tensor.int()
self.context.set_binding_shape(idx, tuple(input_tensor.shape))
bindings[idx] = input_tensor.contiguous().data_ptr()
# create output tensors
outputs = {}
for output_name in self._output_names:
idx = self.engine.get_binding_index(output_name)
dtype = torch.float32 # 输出向量类型
shape = tuple(self.context.get_binding_shape(idx))
device = torch.device('cuda')
output = torch.empty(size=shape, dtype=dtype, device=device)
outputs[output_name] = output
bindings[idx] = output.data_ptr()
self.context.execute_async_v2(bindings,
torch.cuda.current_stream().cuda_stream)
return outputs
if __name__=="__main__":
model = TRTWrapper('end2end.engine', ['simcc_x', 'simcc_y'])
output = model(dict(input=torch.randn(1, 3, 256, 256).cuda()))
simcc_x, simcc_y = output
# keypoints, scores = decode(simcc_x, simcc_y, simcc_split_ratio)
#
# # rescale keypoints
# keypoints = keypoints / model_input_size * scale + center - scale / 2
print(output)
心得
- 转换时,onnx与tensorrt模型为静态模式,所以对于网络的forward里,要去掉if else、for循环这种,同时还要注意是否支持reshape,@(用torch.mutl替换)等操作,实测torch.topk是支持的。
- 在转换时,发现虽然设置的动态输入,但是经过向量索引时,参数还会固定,可以尝试一下在转onnx时,输入设置为空向量,或者模型内部设置第一次索引的向量值为空,实测成功了