TensorRT安装
首先需要确保正确安装CUDA,安装后通过nvcc -V验证是否安装。
下载TensorRT
网址:https://developer.nvidia.com/nvidia-tensorrt-8x-download,下载最新版解压
tar -xzvf TensorRT-8.4.0.6.Linux.x86_64-gnu.cuda-11.6.cudnn8.3.tar.gz
为了节省根目录的内存,TensorRT我放在home下,添加环境变量
sudo vim ~/.bashrc
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:~/3D/TensorRT-8.4.0.6/lib
source ~/.bashrc
接着,我们通过样例程序判断TRT能否正常工作。我们先是将sampleMNIST源码进行编译,此时会在bin目录生成可执行文件,我们切换过去直接执行。
cd ~/3D/TensorRT-8.4.0.6/samples/sampleMNIST
make
cd ../../bin/
./sample_mnist
如果输出如下内容,最后显示PASSED,表明样例运行通过。
python支持
在上一部分中,虽然我们已经安装了TensorRT,但是我们的Python环境还不能通过import tensorrt导入,所以需要通过安装对应的.whl来实现。
pip install ../TensorRT-8.4.0.6/python/tensorrt-8.4.0.6-cp37-none-linux_x86_64.whl
ONNX部署
TensorRT是nvidia公司针对nvidia显卡训练的模型在特定平台进行加速的推理优化库,是一个c++库,仅支持推理,不支持训练;
进行推理,需要先创建IExecutionContext对象,要创建这个对象,就需要先创建一个ICudaEngine的对象(engine),两种创建engine的方式:
- 使用模型文件创建engine,并可把创建的engine序列化后存储到硬盘以便后面直接使用;
- 使用之前已经序列化存储的engine,这种方式比较高效些,因为解析模型并生成engine还是挺慢的。
C++
TensorRT版本改动挺多的,大家可以直接查看API文档
#include "NvInfer.h"
#include "NvOnnxParser.h"
#include "NvInferRuntimeCommon.h"
#include <string>
#include <iostream>
#include <fstream>
// Logger for TensorRT info/warning/errors
class Logger : public nvinfer1::ILogger
{
public:
Logger(Severity severity = Severity::kWARNING) : reportableSeverity(severity)
{
}
void log(Severity severity, char const* msg) noexcept
// void log(Severity severity, const char* msg) noexcept
{
// suppress messages with severity enum value greater than the reportable
if (severity > reportableSeverity)
return;
switch (severity)
{
case Severity::kINTERNAL_ERROR:
std::cerr << "INTERNAL_ERROR: ";
break;
case Severity::kERROR:
std::cerr << "ERROR: ";
break;
case Severity::kWARNING:
std::cerr << "WARNING: ";
break;
case Severity::kINFO:
std::cerr << "INFO: ";
break;
default:
std::cerr << "UNKNOWN: ";
break;
}
std::cerr << msg << std::endl;
}
Severity reportableSeverity;
};
static Logger g_logger_;
void onnxToTRTModel(const std::string &model_file, // name of the onnx model
nvinfer1::IHostMemory *&trt_model_stream) // output buffer for the TensorRT model
{
int verbosity = (int)nvinfer1::ILogger::Severity::kWARNING;
// create the builder
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(g_logger_);
// 创建INetworkDefinition 对象
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(1U <<static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH));
// 创建解析器
auto parser = nvonnxparser::createParser(*network, g_logger_);
// 解析onnx文件,并填充网络
if (!parser->parseFromFile(model_file.c_str(), verbosity))
{
std::string msg("failed to parse onnx file");
g_logger_.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
exit(EXIT_FAILURE);
}
// Build the engine
builder->setMaxBatchSize(1);
// 创建iBuilderConfig对象
nvinfer1::IBuilderConfig* iBuilderConfig = builder->createBuilderConfig();
// 设置engine可使用的最大GPU临时值
iBuilderConfig ->setMaxWorkspaceSize(1 << 20);
nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network,*iBuilderConfig);
// 将engine序列化,保存到文件中
trt_model_stream = engine->serialize();
// save engine
std::ofstream p("../model.trt", std::ios::binary);
if (!p) {
std::cerr << "could not open plan output file" << std::endl;
}
p.write(reinterpret_cast<const char *>(trt_model_stream->data()), trt_model_stream->size());
parser->destroy();
engine->destroy();
network->destroy();
builder->destroy();
iBuilderConfig->destroy();
}
int main() {
nvinfer1::IHostMemory *trt_model_stream;
onnxToTRTModel("../../config/pfe.onnx", trt_model_stream);
}
CMakeLists.txt
cmake_minimum_required(VERSION 3.10)
project(onnxToTensorRT)
find_package(CUDA)
SET(TENSORRT_PATH /home/xiaohu/3D/TensorRT-8.4.0.6/)
SET(TENSORRT_LIB_PATH /home/xiaohu/3D/TensorRT-8.4.0.6/lib)
include_directories(
include
${TENSORRT_PATH}/include
${CUDA_INCLUDE_DIRS}
)
set(SOURCE_FILES
onnxToTensorRT.cpp
)
file(GLOB TENSORRT_LIBS "${TENSORRT_LIB_PATH}/*.so")
cuda_add_executable(${PROJECT_NAME} ${SOURCE_FILES})
target_link_libraries(
${PROJECT_NAME}
${TENSORRT_LIBS}
${CUDA_LIBRARIES}
)
python
#############################################
# python api转
#############################################
import sys
import os
import argparse
import tensorrt as trt
EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
parser.add_argument("--onnx_path", type=str,
default='static_sim.onnx')
parser.add_argument("--trt_path", type=str,
default='static_sim.trt')
args = parser.parse_args()
onnx_file_path = args.onnx_path
engine_file_path = args.trt_path
print('get start')
TRT_LOGGER = trt.Logger()
with trt.Builder(TRT_LOGGER) as builder, builder.create_network(EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
config = builder.create_builder_config()
config.max_workspace_size =( 1 << 30 ) * 2 # 2 GB
builder.max_batch_size = 16
config.set_flag(trt.BuilderFlag.FP16)
# builder.fp16_mode = True
# Parse model file
print('Loading ONNX file from path {}...'.format(onnx_file_path))
with open(onnx_file_path, 'rb') as model:
print('Beginning ONNX file parsing')
if not parser.parse(model.read()):
print ('ERROR: Failed to parse the ONNX file.')
for error in range(parser.num_errors):
print (parser.get_error(error))
print(f"raw shape of {network.get_input(0).name} is: ", network.get_input(0).shape)
print(f"raw shape of {network.get_input(1).name} is: ", network.get_input(1).shape)
print(f"raw shape of {network.get_input(2).name} is: ", network.get_input(2).shape)
print('Completed parsing of ONNX file')
print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
engine = builder.build_engine(network,config)
print("Completed creating Engine")
with open(engine_file_path, "wb") as f:
f.write(engine.serialize())
trtexec
TensorRT-8.4.3.1/targets/x86_64-linux-gnu/bin/trtexec --onnx=static_sim.onnx --explicitBatch --saveEngine= static_sim.trt --workspace=1024