tensorrt python

最新推荐文章于 2024-07-28 22:23:48 发布

AI算法网奇

最新推荐文章于 2024-07-28 22:23:48 发布

阅读量2.2k

点赞数

分类专栏： onnx

本文链接：https://blog.csdn.net/jacke121/article/details/116572083

版权

onnx 专栏收录该内容

96 篇文章 25 订阅

订阅专栏

代码没有亲测，tensorrt感觉是linux的

Onnx To Tensorrt

在安装tensorrt时，会下载一些官方示例，以tensorrt6为例，在samples/python/introductoryparsersamples中，有名为onnx_resnet50.py的文件，此文件是将resnet50.onnx转为tensorrt模型的示例代码，接下来以这个文件为基础，讲述如何将我们自己的onnx模型转为tensorrt模型。

先上代码。

from PIL import Image

import numpy as np

import pycuda.driver as cuda

import time

import tensorrt as trt

import sys, os

sys.path.insert(1, os.path.join(sys.path[0], ".."))

import common


class ModelData(object):

MODEL_PATH = "result.onnx"

INPUT_SHAPE = (1, 512, 512)

# We can convert TensorRT data types to numpy types with trt.nptype()

DTYPE = trt.float32


# You can set the logger severity higher to suppress messages (or lower to display more messages).

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)


# Allocate host and device buffers, and create a stream.

def allocate_buffers(engine):

# Determine dimensions and create page-locked memory buffers (i.e. won't be swapped to disk) to hold host inputs/outputs.

h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))

h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))

# Allocate device memory for inputs and outputs.

d_input = cuda.mem_alloc(h_input.nbytes)

d_output = cuda.mem_alloc(h_output.nbytes)

# Create a stream in which to copy inputs/outputs and run inference.

stream = cuda.Stream()

return h_input, d_input, h_output, d_output, stream


def do_inference(context, h_input, d_input, h_output, d_output, stream):

# Transfer input data to the GPU.

cuda.memcpy_htod_async(d_input, h_input, stream)

# Run inference.

context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)

# Transfer predictions back from the GPU.

cuda.memcpy_dtoh_async(h_output, d_output, stream)

# Synchronize the stream

stream.synchronize()


# The Onnx path is used for Onnx models.

def build_engine_onnx(model_file):

with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:

builder.max_workspace_size = common.GiB(1)

# Load the Onnx model and parse it in order to populate the TensorRT network.

with open(model_file, 'rb') as model:

parser.parse(model.read())

last_layer = network.get_layer(network.num_layers - 1)

network.mark_output(last_layer.get_output(0))

returnresult=builder.build_cuda_engine(network)

return returnresult


def load_normalized_test_case(test_image, pagelocked_buffer):

# Converts the input image to a CHW Numpy array

def normalize_image(image):


# Resize, antialias and transpose the image to CHW.

c, h, w = ModelData.INPUT_SHAPE

image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS))

image_arr = np.reshape(image_arr, image_arr.shape + (1,))

image_arr=image_arr.transpose([2, 0, 1])

image_arr=image_arr.astype(trt.nptype(ModelData.DTYPE))

image_arr=image_arr.ravel()

# This particular ResNet50 model requires some preprocessing, specifically, mean normalization.

return (image_arr / 255.0 - 0.45) / 0.225


# Normalize the image and copy to pagelocked memory.

np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))

return test_image


def main():

onnx_model_file='result.onnx'

# Build a TensorRT engine.

with build_engine_onnx(onnx_model_file) as engine:

# Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.

# Allocate buffers and create a CUDA stream.

h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)

with engine.create_execution_context() as context:

# Load a normalized test case into the host input page-locked buffer.

starttime=time.time()

for i in range(100):

test_image ='test.jpg'

test_case = load_normalized_test_case(test_image, h_input)

# Run the engine. The output will be a 1D tensor of length 1000, where each value represents the

# probability that the image corresponds to that label

do_inference(context, h_input, d_input, h_output, d_output, stream)

#print('ok')

endtime=time.time()

pertime=(endtime-starttime)/100

print('perimg cost'+str(pertime))

以上代码的大部分都是源自于onnx_resnet50中，各位可以参照着比对，主要的改动是：

（1）初始化部分，模型的输入大小，即经过各种预处理之后喂入到网络的图像大小。

（2）build_engineonnx函数部分，下面是onnx_resnet50中的原函数和我修改后的函数对比。

#修改后

def build_engine_onnx(model_file):

with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:

builder.max_workspace_size = common.GiB(1)

# Load the Onnx model and parse it in order to populate the TensorRT network.

with open(model_file, 'rb') as model:

parser.parse(model.read())

last_layer = network.get_layer(network.num_layers - 1)

network.mark_output(last_layer.get_output(0))

returnresult=builder.build_cuda_engine(network)

return returnresult


#原函数

def build_engine_onnx(model_file):

with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:

builder.max_workspace_size = common.GiB(1)

# Load the Onnx model and parse it in order to populate the TensorRT network.

with open(model_file, 'rb') as model:

parser.parse(model.read())

returnresult=builder.build_cuda_engine(network)

return returnresult

主要的改动是在with循环之外增加了两句代码：

last_layer = network.get_layer(network.num_layers - 1)

network.mark_output(last_layer.get_output(0))

如果不做修改的话，会报如下错误：

[TensorRT] ERROR: Network must have at least one output

上述增加代码就是明确了模型结构的输出。

以上代码就是onnx+tensorrt部署keras模型的全记录，希望能让弯路少一点。关于速度提升，我只是简单的转换了一下模型，没有做任何的优化，大约能够50%的速度提升，后面会继续优化，看情况补上前后的速度对比。

AI算法网奇

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录