3维目标检测算法smoke使用c++ 版TensorRT部署的一些库和基本流程
0. 需要的库
-
TensorRT 相关
#include <cuda_runtime_api.h> #include <NvInfer.h> #include "NvInferPlugin.h"
-
opencv 相关
#include <opencv2/imgproc.hpp> #include <opencv2/highgui.hpp>
-
其他的标准库
#include <iostream> #include <fstream> #include <vector> #include <cassert> #include <cstring> #include <sys/stat.h> #include <glog/logging.h>
1. 流程
-
初始化
TensorRT
TRT_Logger gLogger; IRuntime* runtime = createInferRuntime(gLogger); assert(runtime != nullptr);
-
解析序列化文件
const std::string enginePath = "/home/user1/cv_project/mmdeploy/smoke2/end2end.trt.engine"; // 替换为您的模型引擎文件路径 std::ifstream engineFile(enginePath, std::ios::binary); assert(engineFile.is_open()); // 从引擎文件中反序列化引擎 engineFile.seekg(0, engineFile.end); int engineSize = engineFile.tellg(); engineFile.seekg(0, engineFile.beg); std::vector<char> engineData(engineSize); engineFile.read(engineData.data(), engineSize); engineFile.close();
-
对于有自定义算子的情况
// 在deserializeCudaEngine()前加上 bool didInitPlugins = initLibNvInferPlugins(nullptr, "");
-
创建CudaEngine和推理上下文
ICudaEngine* engine = runtime->deserializeCudaEngine(engineData.data(), engineSize, nullptr); assert(engine != nullptr);
-
GPU内存分配
const int batchSize = 1; const int inputChannels = 3; // 三通道图片 const int inputHeight = 384; const int inputWidth = 1280; const int inputSize = inputChannels * inputHeight * inputWidth * sizeof(float); // 输出1的大小 const int outputSize1 = 1*3*96*320* sizeof(float)/* 第一个输出的大小 */; // 输出2的大小 const int outputSize2 = 1*8*96*320* sizeof(float)/* 第二个输出的大小 */; void* buffers[3]; // 3个缓冲区(1个输入和2个输出) cudaMalloc(&buffers[0], inputSize); cudaMalloc(&buffers[1], outputSize1); cudaMalloc(&buffers[2], outputSize2);
-
数据读取和转换
cv::Mat inputImage = cv::imread("/home/user1/cv_project/mmdeploy/z_log/000008.png"); // 替换为您的图片路径 cv::Mat resizedImage; cv::resize(inputImage, resizedImage, cv::Size(inputWidth, inputHeight)); resizedImage.convertTo(resizedImage, CV_32FC3); // // 将图像从HWC转换为CHW // vector<float> a(inputSize); float* input_last = new float[inputSize]; // cv::Mat chwImage(inputChannels, inputHeight, inputWidth, CV_32FC3); for (int c = 0; c < inputChannels; ++c) { for (int h = 0; h < inputHeight; ++h) { for (int w = 0; w < inputWidth; ++w) { // cout<<"helloworld:"<<(c * inputHeight * inputWidth + h * inputWidth + w)<<endl; input_last[c * inputHeight * inputWidth + h * inputWidth + w] = static_cast<float>(resizedImage.at<cv::Vec3f>(h, w)[c]) / 255.0f; } } }
-
复制数据到GPU
// 将输入数据复制到GPU
cout<<"helloworld:2";
float* inputData = new float[inputChannels * inputHeight * inputWidth];
cout<<"helloworld:";
memcpy(inputData, input_last, inputSize);
for (int i=0;i<(inputChannels * inputHeight * inputWidth);i++){
cout<<"test:"<<inputData[i]<<endl;
}
cudaMemcpy(buffers[0], inputData, inputSize, cudaMemcpyHostToDevice);
-
执行推理
context->execute(batchSize, buffers);
-
获取输出数据
// 获取输出数据 float* outputData1 = new float[outputSize1]; float* outputData2 = new float[outputSize2]; cudaMemcpy(outputData1, buffers[1], outputSize1, cudaMemcpyDeviceToHost); cudaMemcpy(outputData2, buffers[2], outputSize2, cudaMemcpyDeviceToHost);
2. 整体代码
#include <iostream>
#include <fstream>
#include <vector>
#include <cassert>
#include <cstring>
#include <cuda_runtime_api.h>
#include <NvInfer.h>
#include <sys/stat.h>
#include <glog/logging.h>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include "NvInferPlugin.h"
using namespace nvinfer1;
using namespace std;
using namespace cv;
// Logger for TRT info/warning/errors, https://github.com/onnx/onnx-tensorrt/blob/main/onnx_trt_backend.cpp
class TRT_Logger : public nvinfer1::ILogger
{
nvinfer1::ILogger::Severity _verbosity;
std::ostream* _ostream;
public:
TRT_Logger(Severity verbosity = Severity::kWARNING, std::ostream& ostream = std::cout)
: _verbosity(verbosity)
, _ostream(&ostream)
{
}
void log(Severity severity, const char* msg) noexcept override
{
if (severity <= _verbosity)
{
time_t rawtime = std::time(0);
char buf[256];
strftime(&buf[0], 256, "%Y-%m-%d %H:%M:%S", std::gmtime(&rawtime));
const char* sevstr = (severity == Severity::kINTERNAL_ERROR ? " BUG" : severity == Severity::kERROR
? " ERROR"
: severity == Severity::kWARNING ? "WARNING" : severity == Severity::kINFO ? " INFO"
: "UNKNOWN");
(*_ostream) << "[" << buf << " " << sevstr << "] " << msg << std::endl;
}
}
};
int main()
{
// 指定模型引擎文件的路径
TRT_Logger gLogger;
const std::string enginePath = "/home/user1/cv_project/mmdeploy/smoke2/end2end.trt.engine"; // 替换为您的模型引擎文件路径
// 初始化TensorRT
IRuntime* runtime = createInferRuntime(gLogger);
assert(runtime != nullptr);
std::ifstream engineFile(enginePath, std::ios::binary);
assert(engineFile.is_open());
// 从引擎文件中反序列化引擎
engineFile.seekg(0, engineFile.end);
int engineSize = engineFile.tellg();
engineFile.seekg(0, engineFile.beg);
std::vector<char> engineData(engineSize);
engineFile.read(engineData.data(), engineSize);
engineFile.close();
// 创建CudaEngine
// 在deserializeCudaEngine()前加上
bool didInitPlugins = initLibNvInferPlugins(nullptr, "");
ICudaEngine* engine = runtime->deserializeCudaEngine(engineData.data(), engineSize, nullptr);
assert(engine != nullptr);
// 创建推理上下文
IExecutionContext* context = engine->createExecutionContext();
assert(context != nullptr);
// 分配GPU内存
// 分配GPU内存
const int batchSize = 1;
const int inputChannels = 3; // 三通道图片
const int inputHeight = 384;
const int inputWidth = 1280;
const int inputSize = inputChannels * inputHeight * inputWidth * sizeof(float);
// 输出1的大小
const int outputSize1 = 1*3*96*320* sizeof(float)/* 第一个输出的大小 */;
// 输出2的大小
const int outputSize2 = 1*8*96*320* sizeof(float)/* 第二个输出的大小 */;
void* buffers[3]; // 3个缓冲区(1个输入和2个输出)
cudaMalloc(&buffers[0], inputSize);
cudaMalloc(&buffers[1], outputSize1);
cudaMalloc(&buffers[2], outputSize2);
// 准备输入数据(假设您有一张RGB图片)
cv::Mat inputImage = cv::imread("/home/user1/cv_project/mmdeploy/z_log/000008.png"); // 替换为您的图片路径
cv::Mat resizedImage;
cv::resize(inputImage, resizedImage, cv::Size(inputWidth, inputHeight));
resizedImage.convertTo(resizedImage, CV_32FC3); //
// 将图像从HWC转换为CHW
// vector<float> a(inputSize);
float* input_last = new float[inputSize];
// cv::Mat chwImage(inputChannels, inputHeight, inputWidth, CV_32FC3);
for (int c = 0; c < inputChannels; ++c)
{
for (int h = 0; h < inputHeight; ++h)
{
for (int w = 0; w < inputWidth; ++w)
{
// cout<<"helloworld:"<<(c * inputHeight * inputWidth + h * inputWidth + w)<<endl;
input_last[c * inputHeight * inputWidth + h * inputWidth + w] = static_cast<float>(resizedImage.at<cv::Vec3f>(h, w)[c]) / 255.0f;
}
}
}
// 将输入数据复制到GPU
cout<<"helloworld:2";
float* inputData = new float[inputChannels * inputHeight * inputWidth];
cout<<"helloworld:";
memcpy(inputData, input_last, inputSize);
for (int i=0;i<(inputChannels * inputHeight * inputWidth);i++){
cout<<"test:"<<inputData[i]<<endl;
}
cudaMemcpy(buffers[0], inputData, inputSize, cudaMemcpyHostToDevice);
// 执行推理
context->execute(batchSize, buffers);
// 获取输出数据
float* outputData1 = new float[outputSize1];
float* outputData2 = new float[outputSize2];
// ...
cudaMemcpy(outputData1, buffers[1], outputSize1, cudaMemcpyDeviceToHost);
cudaMemcpy(outputData2, buffers[2], outputSize2, cudaMemcpyDeviceToHost);
// ...
// 处理输出数据
// 在这里进行后处理或使用输出数据进行其他操作
std::ofstream outputFile1("output1.txt");
if (outputFile1.is_open())
{
for (int i = 0; i < outputSize1/(sizeof(float)); i++)
{
outputFile1 << outputData1[i] << " ";
}
outputFile1.close();
}
std::ofstream outputFile2("output2.txt");
if (outputFile2.is_open())
{
for (int i = 0; i < outputSize2/(sizeof(float)); i++)
{
outputFile2 << outputData2[i] << " ";
}
outputFile2.close();
}
// 释放资源
delete[] inputData;
delete[] outputData1;
delete[] outputData2;
delete[] input_last;
// ...
context->destroy();
engine->destroy();
runtime->destroy();
return 0;
}