Python API使用TensorRT模型进行推理

最新推荐文章于 2024-04-07 22:45:28 发布

yzZ_here

最新推荐文章于 2024-04-07 22:45:28 发布

阅读量658

点赞数 7

文章标签： python 人工智能深度学习

本文链接：https://blog.csdn.net/qq_39333636/article/details/136247977

版权

本文介绍了如何在Python中加载和使用NVIDIATensorRT引擎进行图像识别模型的高效推理，包括读取引擎文件、设置输入形状、数据传输以及执行和获取输出的过程。

摘要由CSDN通过智能技术生成

import cv2
import numpy as np
import tensorrt as trt
import pycuda.driver as cuda
import torch
from typing import Union, Optional, Sequence,Dict,Any
import torchvision.transforms as transforms
from PIL import Image



def load_engine(self, engine_file_path):
    TRT_LOGGER = trt.Logger()
    assert os.path.exists(engine_file_path)
    print("Reading engine from file {}".format(engine_file_path))
    with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
        return runtime.deserialize_cuda_engine(f.read())
    

def engine_infer(self, engine, input_image):
    """
    engine: load_engine函数返回的trt模型引擎
    input_image: 模型推理输入图像，尺寸为(batch_size, channel, height, width)
    output：Unet模型推理的结果，尺寸为(batch_size, class_num, height, width)
    """
    batch_size = input_image.shape[0]
    image_channel = input_image.shape[1]
    image_height = input_image.shape[2]
    image_width = input_image.shape[3]

    with engine.create_execution_context() as context:
        # Set input shape based on image dimensions for inference
        context.set_binding_shape(engine.get_binding_index("input"), (batch_size, image_channel, image_height, image_width))

        # Allocate host and device buffers
        bindings = []
        for binding in engine:
            binding_idx = engine.get_binding_index(binding)
            size = trt.volume(context.get_binding_shape(binding_idx))
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            if engine.binding_is_input(binding):
                input_buffer = np.ascontiguousarray(input_image)
                input_memory = cuda.mem_alloc(input_image.nbytes)
                bindings.append(int(input_memory))
            else:
                output_buffer = cuda.pagelocked_empty(size, dtype)
                output_memory = cuda.mem_alloc(output_buffer.nbytes)
                bindings.append(int(output_memory))

        stream = cuda.Stream()
        # Transfer input data to the GPU.
        cuda.memcpy_htod_async(input_memory, input_buffer, stream)
        # Run inference
        context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
        # Transfer prediction output from the GPU.
        cuda.memcpy_dtoh_async(output_buffer, output_memory, stream)
        # Synchronize the stream
        stream.synchronize()

    output = np.reshape(output_buffer, (batch_size, self.num_classes, image_height, image_width))
    return output