tensorRT部署之 代码实现 onnx转engine/trt模型
- 前提已经装好显卡驱动、cuda、cudnn、以及tensorRT
- 下面将给出Python、C++两种转换方式
1. C++实现
- 项目属性配置好CUDA、tensoeRT库
- 通常在实际应用中会直接读取onnx模型进行判断,如果对应路径已经存在engine模型,将直接通过tensorrt读入engine,如果没有,则对onnx进行编译生成engine模型后在进行读入
- TensorRT在线加载模型,并序列化保存支持动态batch的引擎,实现源码可参考 TextandCode
- 一篇超级详细的onnx基础教程(非常好):TextandCode
- 代码实现:
#include <iostream>
#include <fstream>
#include "NvInfer.h"
#include "NvOnnxParser.h"
class Logger : public nvinfer1::ILogger
{
void log(Severity severity, const char* msg) noexcept override
{
if (severity <= Severity::kWARNING)
std::cout << msg << std::endl;
}
} logger;
void ONNX2TensorRT(const char* ONNX_file, std::string save_ngine)
{
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(logger);
uint32_t flag = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(flag);
nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, logger);
parser->parseFromFile(ONNX_file, static_cast<int32_t>(nvinfer1::ILogger::Severity::kWARNING));
for (int32_t i = 0; i < parser->getNbErrors(); ++i)
{
std::cout << parser->getError(i)->desc() << std::endl;
}
nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, 16 * (1 << 20));
if (builder->platformHasFastFp16())
{
config->setFlag(nvinfer1::BuilderFlag::kFP16);
}
nvinfer1::IHostMemory* serializedModel = builder->buildSerializedNetwork(*network, *config);
std::ofstream p(save_ngine, std::ios::binary);
p.write(reinterpret_cast<const char*>(serializedModel->data()), serializedModel->size());
delete parser;
delete network;
delete config;
delete builder;
delete serializedModel;
}
void exportONNX(const char* ONNX_file, std::string save_ngine)
{
std::ifstream file(ONNX_file, std::ios::binary);
if (!file.good())
{
std::cout << "Load ONNX file failed! No file found from:" << ONNX_file << std::endl;
return ;
}
std::cout << "Load ONNX file from: " << ONNX_file << std::endl;
std::cout << "Starting export ..." << std::endl;
ONNX2TensorRT(ONNX_file, save_ngine);
std::cout << "Export success, saved as: " << save_ngine << std::endl;
}
int main(int argc, char** argv)
{
const char* ONNX_file = "../weights/test.onnx";
std::string save_ngine = "../weights/test.engine";
exportONNX(ONNX_file, save_ngine);
return 0;
}