#导入模块import tensorrt as trt
import pycuda.autoinit #负责数据初始化,内存管理,销毁等import pycuda.driver as cuda #GPU CPU之间的数据传输import numpy as np
from PIL import Image
import matlotlib.pyplot as plt
import os
#step1:创建logger:日志记录器
logger = trt.Logger(trt.Logger.WARNING)#step2:创建runtime并反序列化生成enginewithopen(“sample.engine”, “rb”)as f, trt.Runtime(logger)as runtime:
engine = runtime.deserialize_cuda_engine(f.read())#step3:分配CPU锁页内存和GPU显存
h_input = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(0)), dtype=np.float32)
h_output = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(1)), dtype=np.float32)
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)#step4:创建cuda流
stream = cuda.Stream()#step5:创建context并进行推理with engine.create_execution_context()as context:# Transfer input data to the GPU.
cuda.memcpy_htod_async(d_input, h_input, stream)# Run inference.
context.execute_async_v2(bindings=[int(d_input),int(d_output)], stream_handle=stream.handle)# Transfer predictions back from the GPU.
cuda.memcpy_dtoh_async(h_output, d_output, stream)# Synchronize the stream
stream.synchronize()# Return the host output. return h_output
使用PythonAPI部署推理(重点)step1:创建runtimestep2:反序列化创建enginestep3:创建contextstep4:获取输入输出索引step5:创建buffersstep6:为输入输出开辟GPU显存step7:创建cuda流step8:从CPU到GPU----拷贝input数据step9:异步推理step10:从GPU到CPU----拷贝output数据step10:同步cuda流step11:释放资源#导入模块import tensorrt as