jetson orin nano 部署yolov8模型-Python

最新推荐文章于 2025-04-27 16:53:20 发布

Abin_z

最新推荐文章于 2025-04-27 16:53:20 发布

阅读量2.5k

点赞数 11

文章标签： python YOLO 目标检测

本文链接：https://blog.csdn.net/weixin_45533921/article/details/136237330

版权

jetson orin nano部署yolov8模型

本文主要记录yolov8在nano上的python推理

模型转换

将pt模型转换为onnx模型

yolo export model=yolov8n.pt format=onnx opset=12

将onnx模型转换为engine模型

trtexec --onnx=yolov8n.onnx --saveEngine=yolov8n.engine --fp16

更多trtexec命令行参数可以参考如何熟练的使用trtexec

模型推理Python

首先是模型加载部分engine.py

#!/usr/bin/env python3
# coding:utf-8

import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt
TRT_LOGGER = trt.Logger(trt.Logger.ERROR)

class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem
    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
    def __repr__(self):
        return self.__str__()


# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) #* engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
    return inputs, outputs, bindings, stream


# This function is generalized for multiple inputs/outputs.
# inputs and outputs are expected to be lists of HostDeviceMem objects.
def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs]


# 加载tensorrt引擎文件
def load_engine(trt_path):
    # 反序列化引擎
    with open(trt_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
        return runtime.deserialize_cuda_engine(f.read())

其次是yolov8推理infer.py

#!/usr/bin/env python3
# coding:utf-8
import torch, cv2
import numpy as np
from engine import *
import time
from utils import *
from copy import deepcopy

import pycuda.driver as cuda



class Detection():
    def __init__(self) -> None:
        super(Detection,self).__init__()
        cuda.init()
        device = cuda.Device(0)
        self.ctx = device.make_context()	# 避免多进程使用时报错
        self.weights = '/home/jetson/workspace/yolov8n.engine'
        self.trt_engine = None
        if not self.trt_engine:
            print("Loading cached TensorRT engine from {}".format(self.weights))
            self.trt_engine = load_engine(self.weights)
        self.inputs, self.outputs, self.bindings, self.stream = \
            allocate_buffers(self.trt_engine)
        self.context = self.trt_engine.create_execution_context()

    def preprocess(self,im0):
        # stride = int(32)  # model stride
        img = letterbox(im0, 640, auto=False)[0]
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)
        img = torch.from_numpy(img)#.to(device)
        img = img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
        return img

    def infer(self,im):
        np.copyto(self.inputs[0].host, im.ravel())
        self.ctx.push()
        pred = do_inference(self.context, bindings=self.bindings, inputs

最低0.47元/天解锁文章