OpenMMlab导出mobilenet-v2模型并用onnxruntime和tensorrt推理

导出onnx文件

使用mmpretrain导出mobilenet-v2的onnx模型:

import torch
from mmpretrain import get_model


model = get_model('mobilenet-v2_8xb32_in1k',pretrained='mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth', device='cpu') 
input = torch.zeros(1, 3, 224, 224)
out = model(input)
torch.onnx.export(model, input, "mobilenet-v2.onnx", opset_version=11)

安装有mmdeploy的话可以通过如下方法导出:

from mmdeploy.apis import torch2onnx
from mmdeploy.backend.sdk.export_info import export2SDK


img = 'goldfish.jpg'
work_dir = './work_dir/onnx/mobilenet_v2'
save_file = './end2end.onnx'
deploy_cfg = 'mmdeploy/configs/mmpretrain/classification_onnxruntime_static.py'
model_cfg = 'mmpretrain/configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py'
model_checkpoint = './checkpoints/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth'
device = 'cpu'

# 1. convert model to onnx
torch2onnx(img, work_dir, save_file, deploy_cfg, model_cfg, model_checkpoint, device)

# 2. extract pipeline info for sdk use (dump-info)
export2SDK(deploy_cfg, model_cfg, work_dir, pth=model_checkpoint, device=device)

onnxruntime推理

通过onnxruntime进行推理:

import cv2
import numpy as np
import onnxruntime


if __name__ == '__main__':
    img = cv2.imread('goldfish.jpg')
    if img.shape[0] < img.shape[1]: #h<w
        img = cv2.resize(img, (int(256*img.shape[1]/img.shape[0]), 256))
    else:
        img = cv2.resize(img, (256, int(256*img.shape[0]/img.shape[1])))

    crop_size = min(img.shape[0], img.shape[1])
    left = int((img.shape[1]-crop_size)/2)
    top = int((img.shape[0]-crop_size)/2)
    img_crop = img[top:top+crop_size, left:left+crop_size]
    img_crop = cv2.resize(img_crop, (224,224))

    img_crop = img_crop[:,:,::-1].transpose(2,0,1).astype(np.float32)   #BGR2RGB和HWC2CHW
    img_crop[0,:] = (img_crop[0,:] - 123.675) / 58.395   
    img_crop[1,:] = (img_crop[1,:] - 116.28) / 57.12
    img_crop[2,:] = (img_crop[2,:] - 103.53) / 57.375
    input = np.expand_dims(img_crop, axis=0)  

    onnx_session = onnxruntime.InferenceSession("mobilenet_v2.onnx", providers=['CPUExecutionProvider'])

    input_name=[]
    for node in onnx_session.get_inputs():
        input_name.append(node.name)

    output_name=[]
    for node in onnx_session.get_outputs():
        output_name.append(node.name)

    input_feed={}
    for name in input_name:
        input_feed[name] = input

    pred = onnx_session.run(None, input_feed)
    print(np.argmax(pred))

使用mmdeploy推理:

from mmdeploy.apis import inference_model

model
_cfg = 'mmpretrain/configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py'    
deploy_cfg = 'mmdeploy/configs/mmpretrain/classification_onnxruntime_static.py'
img = 'goldfish.jpg'
backend_files = ['work_dir/onnx/mobilenet_v2/end2end.onnx']
device = 'cpu'

result = inference_model(model_cfg, deploy_cfg, backend_files, img, device)
print(result)

或者

import cv2
from mmdeploy_runtime import Classifier


img = cv2.imread('goldfish.jpg')
classifier = Classifier(model_path='work_dir/onnx/mobilenet_v2', device_name='cpu')
result = classifier(img)
for label_id, score in result:
    print(label_id, score)

导出engine文件

这里通过trtexec转换onnx文件,LZ的版本是TensorRT-8.2.1.8。

./trtexec.exe --onnx=mobilenet_v2.onnx --saveEngine=mobilenet_v2.engine

tensorrt推理

import cv2
import numpy as np
import tensorrt as trt
import pycuda.autoinit  #负责数据初始化,内存管理,销毁等
import pycuda.driver as cuda  #GPU CPU之间的数据传输


if __name__ == '__main__':
    # 创建logger:日志记录器
    logger = trt.Logger(trt.Logger.WARNING)
    # 创建runtime并反序列化生成engine
    with open("mobilenet_v2.engine", "rb") as f, trt.Runtime(logger) as runtime:
        engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()
    # 分配CPU锁页内存和GPU显存
    h_input = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(0)), dtype=np.float32)
    h_output = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(1)), dtype=np.float32)
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output = cuda.mem_alloc(h_output.nbytes)
    # 创建cuda流
    stream = cuda.Stream()

    img = cv2.imread('goldfish.jpg')
    if img.shape[0] < img.shape[1]: #h<w
        img = cv2.resize(img, (int(256*img.shape[1]/img.shape[0]), 256))
    else:
        img = cv2.resize(img, (256, int(256*img.shape[0]/img.shape[1])))
    
    crop_size = min(img.shape[0], img.shape[1])
    left = int((img.shape[1]-crop_size)/2)
    top = int((img.shape[0]-crop_size)/2)
    img_crop = img[top:top+crop_size, left:left+crop_size]
    img_crop = cv2.resize(img_crop, (224,224))
    
    img_crop = img_crop[:,:,::-1].transpose(2,0,1).astype(np.float32)  #BGR2RGB和HWC2CHW
    img_crop[0,:] = (img_crop[0,:] - 123.675) / 58.395   
    img_crop[1,:] = (img_crop[1,:] - 116.28) / 57.12
    img_crop[2,:] = (img_crop[2,:] - 103.53) / 57.375
    input = np.expand_dims(img_crop, axis=0)   
    
    np.copyto(h_input, input.ravel())

    # 创建context并进行推理
    with engine.create_execution_context() as context:
        # Transfer input data to the GPU.
        cuda.memcpy_htod_async(d_input, h_input, stream)
        # Run inference.
        context.execute_async_v2(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
        # Transfer predictions back from the GPU.
        cuda.memcpy_dtoh_async(h_output, d_output, stream)
        # Synchronize the stream
        stream.synchronize()
        # Return the host output. 该数据等同于原始模型的输出数据
        pred = np.argmax(h_output)
        print(pred)

使用mmdeploy推理:

from mmdeploy.apis import inference_model


model_cfg = 'mmpretrain/configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py'
deploy_cfg = 'mmdeploy/configs/mmpretrain/classification_tensorrt_static-224x224.py'
backend_files = ['work_dir/trt/mobilenet_v2/end2end.engine']
img = 'goldfish.jpg'
device = 'cuda'

result = inference_model(model_cfg, deploy_cfg, backend_files, img, device)
print(result)

或者

import cv2
from mmdeploy_runtime import Classifier


img = cv2.imread('goldfish.jpg')
classifier = Classifier(model_path='work_dir/onnx/mobilenet_v2', device_name='cpu')

result = classifier(img)
for label_id, score in result:
    print(label_id, score)
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

给算法爸爸上香

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值