YOLOv10部署教程，使用tensorRT部署，有转化和推理代码

最新推荐文章于 2024-08-23 09:00:00 发布

一休哥※

最新推荐文章于 2024-08-23 09:00:00 发布

阅读量1.5k

点赞数 10

分类专栏：计算机视觉文章标签： YOLO

本文链接：https://blog.csdn.net/qq_44224801/article/details/140347499

版权

YOLOv10部署教程，使用tensorRT部署，有转化和推理代码

一、使用平台
- 1. 转化onnx模型
- 转化trt模型
模型推理
全部的代码

论文题目：YOLOv10: Real-Time End-to-End Object Detection
研究单位：清华大学
论文链接：http://arxiv.org/abs/2405.14458
代码链接：https://github.com/THU-MIG/yolov10

作者提供的模型性能评价图，如下：
在这里插入图片描述
YOLOv10-N:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10n.pt
YOLOv10-S:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10s.pt
YOLOv10-M:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10m.pt
YOLOv10-B:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10b.pt
YOLOv10-L:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10l.pt
YOLOv10-X:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10x.pt
推理时间速度很快，最主要是不需要后处理，就是网络比较难训练，有spa多占用了几g显存，并且收敛较慢

一、使用平台

win10、TensorRT=8.6.1

1. 转化onnx模型

git clone https://github.com/THU-MIG/yolov10.git
conda create -n YOLO python=3.9
conda activate YOLO
cd yolov10
pip install -r requirements.txt

下载pt模型
用下面代码转化

# -*- coding: utf-8 -*-
# @Time    : 2024/6/13 10:54
# @Site    : 
# @File    : export.py
# @Comment :
from ultralytics import YOLOv10

# Load a model
model = YOLOv10(r"yolov10s.pt")  # load an official model

# Export the model
model.export(format="onnx",device='0',batch=2,opset=12, half=True)

"""
Argument	Type	Default	Description
format	str	'torchscript'	Target format for the exported model, such as 'onnx', 'torchscript', 'tensorflow', or others, defining compatibility with various deployment environments.
imgsz	int or tuple	640	Desired image size for the model input. Can be an integer for square images or a tuple (height, width) for specific dimensions.
keras	bool	False	Enables export to Keras format for TensorFlow SavedModel, providing compatibility with TensorFlow serving and APIs.
optimize	bool	False	Applies optimization for mobile devices when exporting to TorchScript, potentially reducing model size and improving performance.
half	bool	False	Enables FP16 (half-precision) quantization, reducing model size and potentially speeding up inference on supported hardware.
int8	bool	False	Activates INT8 quantization, further compressing the model and speeding up inference with minimal accuracy loss, primarily for edge devices.
dynamic	bool	False	Allows dynamic input sizes for ONNX and TensorRT exports, enhancing flexibility in handling varying image dimensions.
simplify	bool	False	Simplifies the model graph for ONNX exports with onnxslim, potentially improving performance and compatibility.
opset	int	None	Specifies the ONNX opset version for compatibility with different ONNX parsers and runtimes. If not set, uses the latest supported version.
workspace	float	4.0	Sets the maximum workspace size in GiB for TensorRT optimizations, balancing memory usage and performance.
nms	bool	False	Adds Non-Maximum Suppression (NMS) to the CoreML export, essential for accurate and efficient detection post-processing.
batch	int	1	Specifies export model batch inference size or the max number of images the exported model will process concurrently in predict mode.
"""

转化trt模型

import onnx
import tensorrt as trt
# import sys
# sys.setrecursionlimit(500000)


def onnx_export_engine(workspace,onnx_path,trt_path):

    #创建构建器
    logger=trt.Logger(trt.Logger.WARNING)
    builder=trt.Builder(logger)

    #创建一个构建配置
    config=builder.create_builder_config()
    config.max_workspace_size=workspace*1<<30

    #创建网络定义
    flag=(1<<int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
    network=builder.create_network(flag)

    #导入onnx模型
    parser=trt.OnnxParser(network,logger)

    if not parser.parse_from_file(str(onnx_path)):
        raise RuntimeError(f'failed to load ONNX file: {onnx}')

    inputs=[network.get_input(i) for i in range(network.num_inputs)]
    outputs=[network.get_output(i) for i in  range(network.num_outputs)]
    # network.get_input(0).setAllowedFormats(int)
    # network.get_input(1).setAllowedFormats(int)

    # for inp in inputs:
    #     LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')
    # for out in outputs:
    #     LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
    #
    # LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 else 32} engine in {f}')

    # if builder.platform_has_fast_fp16:
    #
    #     config.set_flag(trt.BuilderFlag.FP16)
    # config.set_flag(trt.BuilderFlag.FP16)
    engine_path=trt_path

    with builder.build_serialized_network(network,config) as engine:
        with open(engine_path,'wb') as t:
            # t.write(engine.serialize())
            t.write(engine)

    print('转化完成')



if __name__ == '__main__':
    onnx_path='weights2/best.onnx'
    trt_path='end2end.engine'
    onnx_export_engine(4,onnx_path,trt_path)

模型推理

定义变量

from models import TRTModule  # isort:skip
import argparse
import cv2
from numpy import ndarray
import time
import random
import numpy as np
import os
import pickle
from collections import defaultdict, namedtuple
from pathlib import Path
from typing import List, Optional, Tuple, Union
import onnx
import tensorrt as trt
import torch

os.environ['CUDA_MODULE_LOADING'] = 'LAZY'
random.seed(0)

# detection model classes
CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
           'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
           'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
           'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
           'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
           'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
           'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
           'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
           'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
           'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
           'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
           'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
           'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
           'scissors', 'teddy bear', 'hair drier', 'toothbrush')
# # three:
# CLASSES = (
#     'person', 'sports ball', 'car'
# )

# colors for per classes
COLORS = {
   
    cls: [random.randint(0, 255) for _ in range(3)]
    for i, cls in enumerate(CLASSES)
}
# image suffixs
SUFFIXS = ('.bmp', '.dng', '.jpeg', '.jpg', '.mpo', '.png', '.tif', '.tiff',
           '.webp', '.pfm')

定义模型加载类

class TRTModule(torch.nn.Module):
    dtypeMapping = {
   
        trt.bool: torch.bool,
        trt.int8: torch.int8,
        trt.int32: torch.int32,
        trt.float16: torch.float16,
        trt.float32: torch.float32
    }

    def __init__(self, weight: Union[str, Path],
                 device: Optional[torch.device]) -> None:
        super(TRTModule, self).__init__()
        self.weight = Path(weight) if isinstance(weight, str) else weight
        self.device = device if device is not None else torch.device('cuda:0')
        self.stream = torch.cuda.Stream(device=device)
        self.__init_engine()
        self.__init_bindings()

    def __init_engine(self) -> None:
        logger = trt.Logger(trt.Logger.WARNING)
        trt.init_libnvinfer_plugins(logger, namespace='')
        with trt.Runtime(logger) as runtime:
            model = runtime.deserialize_cuda_engine(self.weight.read_bytes())

        context = model.create_execution_context()
        num_bindings = model.num_bindings
        names = [model.get_binding_name(i) for i in range(num_bindings)]

        self.bindings: List[int] = [0] * num_bindings
        num_inputs, num_outputs = 0, 0

        for i in range(num_bindings):
            if model.binding_is_input(i):
                num_inputs += 1
            else:
                num_outputs += 1

        self.num_bindings = num_bindings
        self.num_inputs = num_inputs
        self.num_outputs = num_outputs
        self.model = model
        self.context = context
        self.input_names = names[:num_inputs]
        self.output_names = names[num_inputs:]
        self.idx = list(range(self.num_outputs))

    def __init_bindings(self) -> None:
        idynamic = odynamic = False
        Tensor = namedtuple('Tensor', ('name', 'dtype', 'shape'))
        inp_info = []
        out_info = []
        for i, name in enumerate(self.input_names):
            assert self.model.get_binding_name(i) == name
            dtype = self.dtypeMapping[self.model.get_binding_dtype(i)]
            shape = tuple(self.model.get_binding_shape(i))
            if -1 in shape:
                idynamic |= True
            inp_info.append(Tensor(name, dtype, shape))
        for i, name in enumerate(self.output_names):
            i += self.num_inputs
            assert self.model.get_binding_name(i) == name
            dtype = self.dtypeMapping[self.model.get_binding_dtype(i)]
            shape = tuple(self.model.get_binding_shape(i))
            if -1 in shape:
                odynamic |= True
            out_info.append(Tensor(name, dtype, shape))

        if not odynamic:
            self.output_tensor = [
                torch.empty(info.shape, dtype=info.dtype, device=self.device)
                for info in out_info
            ]
        self.idynamic = idynamic
        self.odynamic = odynamic
        self.inp_info = inp_info
        self.out_info