一、配置简介
windows10 + GTX 1050Ti 4GB显存
cuda 11.5
TensorRT-8.6.1.6
visual studio 2019
.onnx模型,input为float32的{1,1,N,3};output为float32的{N,7}
二、代码示例
#include <vector>
#include <iostream>
#include <fstream>
#include "utils.h"
/*
* 用于记录TensorRT的相关操作日志
*/
class Logger : public nvinfer1::ILogger {
public:
void log(Severity severity, const char* msg) noexcept override {
using namespace std;
string s;
bool printMsg = true;
switch (severity) {
case Severity::kINTERNAL_ERROR:
s = "INTERNAL_ERROR";
break;
case Severity::kERROR:
s = "ERROR";
break;
case Severity::kWARNING:
s = "WARNING";
break;
case Severity::kINFO:
s = "INFO";
printMsg = m_printVerbose;
break;
case Severity::kVERBOSE:
s = "VERBOSE";
printMsg = m_printVerbose;
break;
}
if (printMsg)
std::cout << s << ": " << msg << endl;
}
public:
void setPrintVerbose(bool printVerbose) {
this->m_printVerbose = printVerbose;
};
private:
bool m_printVerbose = true;
};
Logger gLogger;
const wchar_t* trtModelName = L"model.trt";
int main() {
/*
* 1.读取序列化的trt模型
*/
std::ifstream trtModelFile(trtModelName, std::ios_base::in | std::ios_base::binary);
if (!trtModelFile)
throw "TRT Model Path Error!";
trtModelFile.seekg(0, std::ios::end);
int m_size = (int)trtModelFile.tellg();
trtModelFile.seekg(0, std::ios::beg);
char* p_modelBuff = new char[m_size];
trtModelFile.read(p_modelBuff, m_size);
trtModelFile.close();
/*
* 2.构造推理环境
*/
nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger); // Create an instance of a safe::IRuntime class. 创建运行实例
nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine((void*)p_modelBuff, m_size, NULL); // Deserialize an engine from a byte array. 反序列化引擎
if (engine == NULL)
throw "Deserialize model failed!\n";
nvinfer1::IExecutionContext* p_context = engine->createExecutionContext(); // Create an execution context. 创建执行语境
// 创建cuda流
cudaStream_t stream;
cudaStreamCreate(&stream);
/*
* 3.创建输入输出
*/
pcl::PointCloud<pcl::PointXYZ>::Ptr cloud(new pcl::PointCloud<pcl::PointXYZ>);
std::string pointcloudPath = "pointcloud.xyz";
ReadCloudFromTxt(pointcloudPath, cloud);
NormalizePoints(cloud);
int size = cloud->points.size();
float* host_input = new float[size * 3];
float* host_output = new float[size * 7];
for (int i = 0; i < size; ++i) { // 展平x y z x y z…
host_input[i * 3] = cloud->points[i].x;
host_input[i * 3 + 1] = cloud->points[i].y;
host_input[i * 3 + 2] = cloud->points[i].z;
}
//for (int i = 0; i < engine->getNbBindings(); i++){
// nvinfer1::Dims dims = engine->getBindingDimensions(i);
// printf("index %d, dims: (");
// for (int d = 0; d < dims.nbDims; d++)
// {
// if (d < dims.nbDims - 1)
// printf("%d,", dims.d[d]);
// else
// printf("%d", dims.d[d]);
// }
// printf(")\n");
//}
void* buffers[2]{ 0 };
cudaMalloc(&buffers[0], size * 3 * sizeof(float));
cudaMalloc(&buffers[1], size * 7 * sizeof(float)); //输出结果的大小
cudaMemcpyAsync(buffers[0], host_input, size * 3 * sizeof(float), cudaMemcpyHostToDevice);
/*
* 4.动态输入维度固定
*/
nvinfer1::Dims dims4;
dims4.nbDims = 4; // 定义维度
dims4.d[0] = 1;
dims4.d[1] = 1;
dims4.d[2] = size;
dims4.d[3] = 3;
p_context->setBindingDimensions(0, dims4); // Set the dynamic dimensions of an input binding. 动态维度需要在推理时固定!!!切记!!!
/*
* 5.执行推理
*/
p_context->enqueueV2(buffers, (cudaStream_t)stream, nullptr);
cudaStreamSynchronize(stream);
/*
* 6.数据后处理
*/
cudaMemcpyAsync(host_output, buffers[1], size * 7 * sizeof(float), cudaMemcpyDeviceToHost);
int* label = new int[size];
for (int i = 0; i < size; i++) {
label[i] = 0; // 初始化数组元素
}
GetFinalLabel(host_output, label, size);
// 导出可视化
std::fstream fs;
fs.open("result_.txt", std::ios::out);
if (!fs)
return -1;
for (size_t i = 0; i < size; i++){
fs << cloud->points[i].x << " " << cloud->points[i].y << " " << cloud->points[i].z << " " << label[i] << std::endl;
}
fs.close();
delete[] p_modelBuff;
delete[] label;
delete[] host_input;
delete[] host_output;
std::cout << "Hello world" << std::endl;
return 0;
}
注意事项:
1.代码中ReadCloudFromTxt和NormalizePoints函数分别是读取点云文件与对点云做归一化操作;非重点,不同模型需要不同的数据。
2.第4步中,对动态输入做固定是使用动态trt模型的必要步骤,这是区别于静态输入的根本所在。