
不同框架下跑yolov10(pt、onnx_runtime、tensorrt)


1.1 我的配置版本(不同配置对应版本不同)


1.2 检查python版本


1.3 创建虚拟环境

conda create -n yolov10_tensorrt python=3.8.8

1.4 检查CUDA版本(安装步骤跳过)

cat /usr/local/cuda/version.txt


1.5 安装、检查cuDNN版本

1.5.1 下载 cudnn安装包

https://developer.nvidia.com/rdp/cudnn-archive#a-collapse805-111wget https://developer.download.nvidia.com/compute/machine-learning/cudnn/secure/

1.5.2 解压文件

tar -xvf cudnn-11.3-linux-x64-v8.2.1.32.tgz

1.5.3 将解压后的头文件和库复制到cuda目录中:

cd cudasudo cp include/cudnn*    /usr/local/cuda/include sudo cp lib64/libcudnn*    /usr/local/cuda/lib64 sudo chmod a+r /usr/local/cuda/include/cudnn*   /usr/local/cuda/lib64/libcudnn*# 环境变量配置vim ~/.bashrc# 新增一行export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64 # 保存并退出source ~/.bashrc

1.5.4 cuDNN安装完成,查看安装的版本:

cat /usr/local/cuda/include/cudnn_version.h | grep CUDNN_MAJOR -A 2


1.6 安装pycuda

虚拟环境里执行conda install 可安装成功

conda install -c conda-forge pycuda

1.7 安装:TensorRT

1.7.1 下载TensorRT


https://developer.nvidia.com/tensorrt-downloadwget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.6.1/tars/TensorRT-下载文件:TensorRT-

1.7.2 安装TensorRT​​​​​​​

## step 1# 解压cd ./TensorRT_ONNXtar -xzvf TensorRT- 生成新目录 TensorRT-

## step 2
# 环境变量配置vim ~/.bashrc# 新增export LD_LIBRARY_PATH=./TensorRT_ONNX/TensorRT-$LD_LIBRARY_PATHexport PATH=./TensorRT_ONNX/TensorRT-$PATHexport LIBRARY_PATH=./TensorRT_ONNX/TensorRT-$LIBRARY_PATH
# 保存并退出source ~/.bashrc
## step 3cd ./TensorRT_ONNX/TensorRT- install tensorrt-8.6.1-cp38-none-linux_x86_64.whl
## step 4 验证安装正确性python3 import tensorrtprint(tensorrt.__version__)





cp -r ./tools/anaconda3/lib/python3.8/site-packages/tensorrt ./tools/anaconda3/envs/yolov10_tensorrt/lib/python3.8/site-packages

1.8 下载yolov10代码,安装yolov10环境

这部分不是必须的,你只要能安装运行需要的库就行,但是一个一个安装不是缺这个就是少那个,还不如一键安装。git clone https://github.com/THU-MIG/yolov10

cd yolov10pip install -r requirements.txtpip install -e .




2.1 pt推理

2.1.1 使用pt模型预测代码

from ultralytics import YOLOv10import globimport osimport numpy as npimport cv2import time classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',           'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',           'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',           'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',           'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',           'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',           'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',           'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',           'hair drier', 'toothbrush']
class Colors:    """Ultralytics color palette https://ultralytics.com/."""     def __init__(self):        """Initialize colors as hex = matplotlib.colors.TABLEAU_COLORS.values()."""        hexs = ('FF3838', '00C2FF', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',                '2C99A8', 'FF9D97', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')        self.palette = [self.hex2rgb(f'#{c}') for c in hexs]        # print(self.palette)        self.n = len(self.palette)     def __call__(self, i, bgr=False):        """Converts hex color codes to rgb values."""        c = self.palette[int(i) % self.n]        return (c[2], c[1], c[0]) if bgr else c    @staticmethod    def hex2rgb(h):  # rgb order (PIL)        return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))colors = Colors()  # create instance for 'from utils.plots import colors'imgpath = r'./TensorRT_ONNX/yolov10_onnx_rknn_horizon_tensorRT/yolov10/figures'modelpath = r'./TensorRT_ONNX/yolov10_onnx_rknn_horizon_tensorRT/yolov10/models/yolov10n.pt'save_dir = imgpath + '_Rst2'os.makedirs(save_dir,exist_ok=True)model = YOLOv10(modelpath) imgs = glob.glob(os.path.join(imgpath,'*.jpg'))for img in imgs:    imgname = img.split('/')[-1]    frame = cv2.imread(img)
    start_time = time.time()
    results = model.predict(img)[0]        end_time = time.time()    elapsed_time = end_time - start_time    print(f"The elapsed time is {elapsed_time} seconds.")    # results = model(img)     for box in results.boxes:        # print(box)        xyxy = box.xyxy.squeeze().tolist()        x1, y1, x2, y2 = int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3])        c, conf = int(box.cls), float(box.conf)        name = classes[c]        color = colors(c, True)        cv2.rectangle(frame, (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3])), color, thickness=2, lineType=cv2.LINE_AA)        cv2.putText(frame, f"{name}: {conf:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, color,                    2)    # cv2.imshow('image', frame)    # cv2.waitKey(0)    print(save_dir+'/'+imgname)    cv2.imwrite(save_dir+'/'+imgname,frame)

2.1.2 使用pt模型预测时间



2.1.3 使用pt模型预测结果



git clone https://github.com/cqu20160901/yolov10_onnx_rknn_horizon_tensorRT

2.2 onnx推理

2.2.1 使用onnx模型预测代码​​​​​​​

#!/usr/bin/env python3# -*- coding:utf-8 -*-import argparseimport osimport sysimport cv2import numpy as npimport onnxruntime as ortfrom math import expimport time
CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',           'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',           'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',           'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',           'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',           'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',           'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',           'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',           'hair drier', 'toothbrush']
meshgrid = []
class_num = len(CLASSES)head_num = 3strides = [8, 16, 32]map_size = [[80, 80], [40, 40], [20, 20]]object_thresh = 0.4
input_height = 640input_width = 640
topK = 50
class DetectBox:    def __init__(self, classId, score, xmin, ymin, xmax, ymax):        self.classId = classId        self.score = score        self.xmin = xmin        self.ymin = ymin        self.xmax = xmax        self.ymax = ymax
def GenerateMeshgrid():    for index in range(head_num):        for i in range(map_size[index][0]):            for j in range(map_size[index][1]):                meshgrid.append(j + 0.5)                meshgrid.append(i + 0.5)
def TopK(detectResult):    if len(detectResult) <= topK:        return detectResult    else:        predBoxs = []        sort_detectboxs = sorted(detectResult, key=lambda x: x.score, reverse=True)        for i in range(topK):            predBoxs.append(sort_detectboxs[i])        return predBoxs
def sigmoid(x):    return 1 / (1 + exp(-x))
def postprocess(out, img_h, img_w):    print('postprocess ... ')
    detectResult = []    output = []    for i in range(len(out)):        output.append(out[i].reshape((-1)))
    scale_h = img_h / input_height    scale_w = img_w / input_width
    gridIndex = -2    cls_index = 0    cls_max = 0
    for index in range(head_num):        reg = output[index * 2 + 0]        cls = output[index * 2 + 1]
        for h in range(map_size[index][0]):            for w in range(map_size[index][1]):                gridIndex += 2
                if 1 == class_num:                    cls_max = sigmoid(cls[0 * map_size[index][0] * map_size[index][1] + h * map_size[index][1] + w])                    cls_index = 0                else:                    for cl in range(class_num):                        cls_val = cls[cl * map_size[index][0] * map_size[index][1] + h * map_size[index][1] + w]                        if 0 == cl:                            cls_max = cls_val                            cls_index = cl                        else:                            if cls_val > cls_max:                                cls_max = cls_val                                cls_index = cl                    cls_max = sigmoid(cls_max)
                if cls_max > object_thresh:                    regdfl = []                    for lc in range(4):                        sfsum = 0                        locval = 0                        for df in range(16):                            temp = exp(reg[((lc * 16) + df) * map_size[index][0] * map_size[index][1] + h * map_size[index][1] + w])                            reg[((lc * 16) + df) * map_size[index][0] * map_size[index][1] + h * map_size[index][ 1] + w] = temp                            sfsum += temp
                        for df in range(16):                            sfval = reg[((lc * 16) + df) * map_size[index][0] * map_size[index][1] + h * map_size[index][                                1] + w] / sfsum                            locval += sfval * df                        regdfl.append(locval)
                    x1 = (meshgrid[gridIndex + 0] - regdfl[0]) * strides[index]                    y1 = (meshgrid[gridIndex + 1] - regdfl[1]) * strides[index]                    x2 = (meshgrid[gridIndex + 0] + regdfl[2]) * strides[index]                    y2 = (meshgrid[gridIndex + 1] + regdfl[3]) * strides[index]
                    xmin = x1 * scale_w                    ymin = y1 * scale_h                    xmax = x2 * scale_w                    ymax = y2 * scale_h
                    xmin = xmin if xmin > 0 else 0                    ymin = ymin if ymin > 0 else 0                    xmax = xmax if xmax < img_w else img_w                    ymax = ymax if ymax < img_h else img_h
                    box = DetectBox(cls_index, cls_max, xmin, ymin, xmax, ymax)                    detectResult.append(box)    # topK    print('before topK num is:', len(detectResult))    predBox = TopK(detectResult)
    return predBox
def precess_image(img_src, resize_w, resize_h):    image = cv2.resize(img_src, (resize_w, resize_h), interpolation=cv2.INTER_LINEAR)    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)    image = image.astype(np.float32)    image /= 255.0
    return image
def detect(img_path):    orig = cv2.imread(img_path)    start_time = time.time()    img_h, img_w = orig.shape[:2]    image = precess_image(orig, input_width, input_height)
    image = image.transpose((2, 0, 1))    image = np.expand_dims(image, axis=0)
    ort_session = ort.InferenceSession('./yolov10n_zq.onnx')    pred_results = (ort_session.run(None, {'data': image}))
    out = []    for i in range(len(pred_results)):        out.append(pred_results[i])    predbox = postprocess(out, img_h, img_w)
    end_time = time.time()    execution_time = end_time - start_time  # 得到执行时间,单位为秒    print("execution_time:",execution_time)
    print('after topk num is :', len(predbox))
    for i in range(len(predbox)):        xmin = int(predbox[i].xmin)        ymin = int(predbox[i].ymin)        xmax = int(predbox[i].xmax)        ymax = int(predbox[i].ymax)        classId = predbox[i].classId        score = predbox[i].score
        cv2.rectangle(orig, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)        ptext = (xmin, ymin + 10)        title = CLASSES[classId] + "%.2f" % score        cv2.putText(orig, title, ptext, cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 0, 255), 2, cv2.LINE_AA)
    cv2.imwrite('./test_onnx_result.jpg', orig)
if __name__ == '__main__':    print('This is main ....')    GenerateMeshgrid()    img_path = './test.jpg'    detect(img_path)

2.2.1 使用onnx模型预测时间



2.2.2 使用onnx模型预测结果


2.3 tensorrt推理

2.3.1 使用tensorrt模型预测代码

import cv2import numpy as npimport tensorrt as trtimport pycuda.driver as cudaimport pycuda.autoinitfrom math import expfrom math import sqrtimport time
np.bool = np.bool_
TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',           'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',           'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',           'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',           'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',           'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',           'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',           'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',           'hair drier', 'toothbrush']
meshgrid = []
class_num = len(CLASSES)head_num = 3strides = [8, 16, 32]map_size = [[80, 80], [40, 40], [20, 20]]object_thresh = 0.4
input_height = 640input_width = 640
topK = 50
# Simple helper data class that's a little nicer to use than a 2-tuple.class HostDeviceMem(object):    def __init__(self, host_mem, device_mem):        self.host = host_mem        self.device = device_mem
    def __str__(self):        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
    def __repr__(self):        return self.__str__()
def allocate_buffers(engine):    inputs = []    outputs = []    bindings = []    stream = cuda.Stream()    for binding in engine:        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size        dtype = trt.nptype(engine.get_binding_dtype(binding))        # Allocate host and device buffers        host_mem = cuda.pagelocked_empty(size, dtype)        device_mem = cuda.mem_alloc(host_mem.nbytes)        # Append the device buffer to device bindings.        bindings.append(int(device_mem))        # Append to the appropriate list.        if engine.binding_is_input(binding):            inputs.append(HostDeviceMem(host_mem, device_mem))        else:            outputs.append(HostDeviceMem(host_mem, device_mem))    return inputs, outputs, bindings, stream
def get_engine_from_bin(engine_file_path):    print('Reading engine from file {}'.format(engine_file_path))    with open(engine_file_path, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:        return runtime.deserialize_cuda_engine(f.read())
# This function is generalized for multiple inputs/outputs.# inputs and outputs are expected to be lists of HostDeviceMem objects.def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):    # Transfer input data to the GPU.    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]    # Run inference.    context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)    # Transfer predictions back from the GPU.    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]    # Synchronize the stream    stream.synchronize()    # Return only the host outputs.    return [out.host for out in outputs]
class DetectBox:    def __init__(self, classId, score, xmin, ymin, xmax, ymax):        self.classId = classId        self.score = score        self.xmin = xmin        self.ymin = ymin        self.xmax = xmax        self.ymax = ymax
def GenerateMeshgrid():    for index in range(head_num):        for i in range(map_size[index][0]):            for j in range(map_size[index][1]):                meshgrid.append(j + 0.5)                meshgrid.append(i + 0.5)
def TopK(detectResult):    if len(detectResult) <= topK:        return detectResult    else:        predBoxs = []        sort_detectboxs = sorted(detectResult, key=lambda x: x.score, reverse=True)        for i in range(topK):            predBoxs.append(sort_detectboxs[i])        return predBoxs
def sigmoid(x):    return 1 / (1 + exp(-x))
def postprocess(out, img_h, img_w):    print('postprocess ... ')
    detectResult = []    output = []    for i in range(len(out)):        output.append(out[i].reshape((-1)))
    scale_h = img_h / input_height    scale_w = img_w / input_width
    gridIndex = -2    cls_index = 0    cls_max = 0
    for index in range(head_num):        reg = output[index * 2 + 0]        cls = output[index * 2 + 1]
        for h in range(map_size[index][0]):            for w in range(map_size[index][1]):                gridIndex += 2
                if 1 == class_num:                    cls_max = sigmoid(cls[0 * map_size[index][0] * map_size[index][1] + h * map_size[index][1] + w])                    cls_index = 0                else:                    for cl in range(class_num):                        cls_val = cls[cl * map_size[index][0] * map_size[index][1] + h * map_size[index][1] + w]                        if 0 == cl:                            cls_max = cls_val                            cls_index = cl                        else:                            if cls_val > cls_max:                                cls_max = cls_val                                cls_index = cl                    cls_max = sigmoid(cls_max)
                if cls_max > object_thresh:                    regdfl = []                    for lc in range(4):                        sfsum = 0                        locval = 0                        for df in range(16):                            temp = exp(reg[((lc * 16) + df) * map_size[index][0] * map_size[index][1] + h * map_size[index][1] + w])                            reg[((lc * 16) + df) * map_size[index][0] * map_size[index][1] + h * map_size[index][ 1] + w] = temp                            sfsum += temp
                        for df in range(16):                            sfval = reg[((lc * 16) + df) * map_size[index][0] * map_size[index][1] + h * map_size[index][                                1] + w] / sfsum                            locval += sfval * df                        regdfl.append(locval)
                    x1 = (meshgrid[gridIndex + 0] - regdfl[0]) * strides[index]                    y1 = (meshgrid[gridIndex + 1] - regdfl[1]) * strides[index]                    x2 = (meshgrid[gridIndex + 0] + regdfl[2]) * strides[index]                    y2 = (meshgrid[gridIndex + 1] + regdfl[3]) * strides[index]
                    xmin = x1 * scale_w                    ymin = y1 * scale_h                    xmax = x2 * scale_w                    ymax = y2 * scale_h
                    xmin = xmin if xmin > 0 else 0                    ymin = ymin if ymin > 0 else 0                    xmax = xmax if xmax < img_w else img_w                    ymax = ymax if ymax < img_h else img_h
                    box = DetectBox(cls_index, cls_max, xmin, ymin, xmax, ymax)                    detectResult.append(box)    # topK    print('before topK num is:', len(detectResult))    predBox = TopK(detectResult)
    return predBox
def preprocess(src_image, input_width, input_height):    image = cv2.resize(src_image, (input_width, input_height)).astype(np.float32)    image = image * 0.00392156    image = image.transpose(2, 0, 1)    image = np.ascontiguousarray(image)    return image
def main():    engine_file_path = './yolov10n_zq.trt'    input_image_path = './test.jpg'
    orign_image = cv2.imread(input_image_path)    image = cv2.cvtColor(orign_image, cv2.COLOR_BGR2RGB)    img_h, img_w = image.shape[:2]    image = preprocess(image, input_width, input_height)
    with get_engine_from_bin(engine_file_path) as engine, engine.create_execution_context() as context:        inputs, outputs, bindings, stream = allocate_buffers(engine)
        inputs[0].host = image        t1  = time.time()        trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1)        t2 = time.time()        print('run tiems time:', (t2 - t1),"s")
        print('outputs heads num: ', len(trt_outputs))
        out = []        for i in range(len(trt_outputs)):            out.append(trt_outputs[i])
        predbox = postprocess(out, img_h, img_w)
        for i in range(len(predbox)):            xmin = int(predbox[i].xmin)            ymin = int(predbox[i].ymin)            xmax = int(predbox[i].xmax)            ymax = int(predbox[i].ymax)            classId = predbox[i].classId            score = predbox[i].score
            cv2.rectangle(orign_image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)            title = CLASSES[classId] + "%.2f" % score            cv2.putText(orign_image, title, (xmin, ymin), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imwrite('./test_result_tensorRT.jpg', orign_image)
if __name__ == '__main__':    print('This is main ...')    GenerateMeshgrid()    main()

2.3.2 使用tensorrt模型预测时间


python3 yolov10n_onnx_demo_zq.py


AttributeError: ‘tensorrt.tensorrt.Builder‘ object has no attribute ‘max_workspace_size‘ 【转tensorRT】


pip install nvidia-tensorrt==7.2.* --index-url https://pypi.ngc.nvidia.com



2.3.3 使用tensorrt模型预测结果



