import cv2
import numpy as np
import tensorrt as trt
import pycuda.driver as cuda
import torch
from typing import Union, Optional, Sequence,Dict,Any
import torchvision.transforms as transforms
from PIL import Image
def load_engine(self, engine_file_path):
TRT_LOGGER = trt.Logger()
assert os.path.exists(engine_file_path)
print("Reading engine from file {}".format(engine_file_path))
with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
return runtime.deserialize_cuda_engine(f.read())
def engine_infer(self, engine, input_image):
"""
engine: load_engine函数返回的trt模型引擎
input_image: 模型推理输入图像,尺寸为(batch_size, channel, height, width)
output:Unet模型推理的结果,尺寸为(batch_size, class_num, height, width)
"""
batch_size = input_image.shape[0]
image_channel = input_image.shape[1]
image_height = input_image.shape[2]
image_width = input_image.shape[3]
with engine.create_execution_context() as context:
# Set input shape based on image dimensions for inference
context.set_binding_shape(engine.get_binding_index("input"), (batch_size, image_channel, image_height, image_width))
# Allocate host and device buffers
bindings = []
for binding in engine:
binding_idx = engine.get_binding_index(binding)
size = trt.volume(context.get_binding_shape(binding_idx))
dtype = trt.nptype(engine.get_binding_dtype(binding))
if engine.binding_is_input(binding):
input_buffer = np.ascontiguousarray(input_image)
input_memory = cuda.mem_alloc(input_image.nbytes)
bindings.append(int(input_memory))
else:
output_buffer = cuda.pagelocked_empty(size, dtype)
output_memory = cuda.mem_alloc(output_buffer.nbytes)
bindings.append(int(output_memory))
stream = cuda.Stream()
# Transfer input data to the GPU.
cuda.memcpy_htod_async(input_memory, input_buffer, stream)
# Run inference
context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
# Transfer prediction output from the GPU.
cuda.memcpy_dtoh_async(output_buffer, output_memory, stream)
# Synchronize the stream
stream.synchronize()
output = np.reshape(output_buffer, (batch_size, self.num_classes, image_height, image_width))
return output
Python API使用TensorRT模型进行推理
最新推荐文章于 2024-04-07 22:45:28 发布
本文介绍了如何在Python中加载和使用NVIDIATensorRT引擎进行图像识别模型的高效推理,包括读取引擎文件、设置输入形状、数据传输以及执行和获取输出的过程。
摘要由CSDN通过智能技术生成