Linux下yolov5s+TensorRT C++检测的实现

最新推荐文章于 2024-02-26 20:06:22 发布

Rainbow Sea

最新推荐文章于 2024-02-26 20:06:22 发布

阅读量138

点赞数

文章标签： linux YOLO c++

本文链接：https://blog.csdn.net/qq_46463876/article/details/133890184

版权

一、源代码和相关CMakeLists.txt

(1) 头文件（Tensor.h）

#include <iostream>
#include <fstream>
#include <sstream>
#include <time.h>
#include <string>

#include <NvInfer.h>
#include <NvOnnxParser.h>
#include <NvInferRuntime.h>

#include <opencv2/opencv.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core/utils/logger.hpp>
#include<opencv2/dnn/dnn.hpp>

using namespace nvinfer1;
using namespace nvonnxparser;
using namespace cv;
using namespace std;
//日志相关
class Logger : public ILogger
{
	virtual void log(Severity severity, const char* msg) noexcept override
	{
		// suppress info-level messages
		if (severity != Severity::kINFO)
			std::cout << msg << std::endl;
	}
} gLogger;

class TensorRT_detect
{
public:
    //默认构造函数
    TensorRT_detect(const char* model_path_engine, const char* image_path, const char* input_node_name, const char* output_node_name, string classesFile);
    //初始化模型函数
    void initialize();
    //对图像进行等比例缩放函数
    Mat Padding_Resize(Mat srcimg, int t_height, int t_width);
    //重载等比例缩放函数
    void Padding_Resize();
    //析构函数
    ~TensorRT_detect();
    //得到模型需要输入的图像的长宽信息
    int getinputh();
    int getinputw();
    //创建输入输出缓存区
    void Create_Buffer();
    //检测函数
    void detect();
    //数据后处理函数
    void Post_processing();
    //数据前处理函数
    void Pre_processing();
    Mat dstimg;                             //缩放后的图
    std::vector<float> input_data;          //输入
    const char* image_path;                 //测试图片的路径
private:
    const char* model_path_engine;          //engine模型文件的路径
	std::string classesFile;                //标签加载
    const char* input_node_name;            //模型输入节点的名称
	const char* output_node_name ;          //模型输出节点的名称
    std::vector<std::string> class_names;   //标签数组
    int num_ionode = 0;                     //模型输入和输出的节点总数
    Logger logger;
    void** data_buffer;                     //存储输入输出数据的缓冲区
	nvinfer1::IRuntime* runtime;            //反序列化引擎
	nvinfer1::ICudaEngine* engine;          //推理引擎
	nvinfer1::IExecutionContext* context;   //上下文
    int input_node_index;                   //输入节点对应的编号
    nvinfer1::Dims input_node_dim;          //输入节点的维度相关的信息
    size_t input_data_length;               //输入的数据量的大小
    int output_node_index;                  //输出节点对应的编号
    nvinfer1::Dims output_node_dim;         //输出节点的维度相关信息
    size_t output_data_length;              //输出的数据量的大小
    cudaStream_t stream;                    //创建cuda数据流
    float* result_array;                    //输出数据
    Mat test_img;                           //测试图片
};

(2) 代码文件

#include "Tensor.h"

using namespace nvinfer1;
using namespace nvonnxparser;
using namespace cv;
using namespace std;

TensorRT_detect::TensorRT_detect(const char* model_path_engine, const char* image_path, const char* input_node_name, const char* output_node_name, string classesFile):
    model_path_engine(model_path_engine), image_path(image_path), input_node_name(input_node_name), output_node_name(output_node_name), classesFile(classesFile)
{
        //加载标签样本
        std::ifstream ifs(classesFile.c_str());
        std::string line;
        while (getline(ifs, line)) class_names.push_back(line);
        cout<<"标签的数量：：：："<<class_names.size()<<endl;

        //读取模型文件
        std::ifstream file_ptr(model_path_engine, std::ios::binary);
        if (!file_ptr.good()) {
        std::cerr << "模型文件无法打开！" << std::endl;
        }
        //读取测试图片
        test_img = imread(image_path);

        size_t size = 0;
        file_ptr.seekg(0, file_ptr.end);	
        size = file_ptr.tellg();	
        file_ptr.seekg(0, file_ptr.beg);	
        char* model_stream = new char[size];
        file_ptr.read(model_stream, size);
        file_ptr.close();
        //初始化引擎
        runtime = nvinfer1::createInferRuntime(logger);
        engine = runtime->deserializeCudaEngine(model_stream, size);
        context = engine->createExecutionContext();

}

void TensorRT_detect::initialize()
{
    num_ionode = engine->getNbBindings();
    data_buffer = new void*[num_ionode];
    input_node_index = engine->getBindingIndex(input_node_name);
    input_node_dim = engine->getBindingDimensions(input_node_index);
    input_data_length = input_node_dim.d[1] * input_node_dim.d[2] * input_node_dim.d[3];
    output_node_index = engine->getBindingIndex(output_node_name);
    output_node_dim = engine->getBindingDimensions(output_node_index);
    output_data_length = output_node_dim.d[1] * output_node_dim.d[2];
    input_data.resize(input_data_length);
    result_array = new float[output_data_length];
}

Mat TensorRT_detect::Padding_Resize(Mat srcimg, int t_height, int t_width)
{
    int srch = srcimg.rows;
    int srcw = srcimg.cols;
    int newh, neww;
    Mat dstimg;
    if(srch != srcw){
        float scale = (float)srch / srcw;
        if(scale > 1){
            newh = t_height;
            neww = int(t_height / scale);
            resize(srcimg, dstimg, Size(neww, newh), INTER_AREA);
            int left = int(t_width - neww)*0.5;
            copyMakeBorder(dstimg, dstimg, 0, 0, left, t_width - neww - left, BORDER_CONSTANT, 0);
        }
        else{
            neww = t_width;
            newh = (int)t_width * scale;
            resize(srcimg, dstimg, Size(neww, newh), INTER_AREA);
            int top = (t_height - newh) * 0.5;
            copyMakeBorder(dstimg, dstimg, top, t_height - newh - top, 0, 0, BORDER_CONSTANT, 0);
        }
    }
    else{
        resize(srcimg, dstimg, Size(neww, newh), INTER_AREA);
    }
    return dstimg;
}

void TensorRT_detect::Padding_Resize()
{
    int t_height = input_node_dim.d[3];
    int t_width = input_node_dim.d[2];
    //resize(test_img, dstimg, Size(t_width, t_height), INTER_AREA);
    int srch = test_img.rows;
    int srcw = test_img.cols;
    int newh, neww;
    float scale = (float)srch / srcw;

    if(scale > 1){
        newh = t_height;
        neww = int(t_height / scale);
        resize(test_img, dstimg, Size(neww, newh), INTER_AREA);
        int left = int(t_width - neww)*0.5;
        copyMakeBorder(dstimg, dstimg, 0, 0, left, t_width - neww - left, BORDER_CONSTANT, 0);
    }
    else if(scale < 1){
        neww = t_width;
        newh = (int)t_width * scale;
        clock_t start_time1 = clock();
        resize(test_img, dstimg, Size(neww, newh), INTER_AREA);       
        clock_t end_time1 = clock();
        double exec_time1 = static_cast<double>(end_time1 - start_time1) / CLOCKS_PER_SEC;
        std::cout << "REsize Execution time: " << exec_time1<< " seconds" << std::endl;
        //resize(test_img, dstimg, Size(neww, newh), INTER_AREA);
        int top = (t_height - newh) * 0.5;
        clock_t start_time2 = clock();
        copyMakeBorder(dstimg, dstimg, top, t_height - newh - top, 0, 0, BORDER_CONSTANT, 0);
        clock_t end_time2 = clock();
        double exec_time2 = static_cast<double>(end_time2 - start_time2) / CLOCKS_PER_SEC;
        std::cout << "copyMakeBorder Execution time: " << exec_time2<< " seconds" << std::endl;
        //copyMakeBorder(dstimg, dstimg, top, t_height - newh - top, 0, 0, BORDER_CONSTANT, 0);
    }
    else{
        resize(test_img, dstimg, Size(neww, newh), INTER_AREA);
    }
    //imwrite("test.jpg",dstimg);
}


int TensorRT_detect::getinputw()
{
    return input_node_dim.d[2];
}

int TensorRT_detect::getinputh()
{
    return input_node_dim.d[3];
}

TensorRT_detect::~TensorRT_detect()
{
    delete []data_buffer;
    if(data_buffer != nullptr){
        data_buffer = nullptr;
    }

    delete []result_array;
    if(result_array != nullptr){
        result_array = nullptr;
    }
}

void TensorRT_detect::Create_Buffer()
{

    //创建输入缓存区
    cudaError_t err1 = cudaMalloc(&(data_buffer[input_node_index]), input_data_length * sizeof(float));
	if (err1 != cudaSuccess) {
		std::cout << "Failed to allocate memory for input data: " << cudaGetErrorString(err1) << std::endl;
		return;
	}
    //创建输出缓存区
    cudaError_t err2 = cudaMalloc(&(data_buffer[output_node_index]), output_data_length * sizeof(float));
	if (err1 != cudaSuccess) {
		std::cout << "Failed to allocate memory for input data: " << cudaGetErrorString(err2) << std::endl;
		return;
	}
    //创建其他三个维度的输出缓存区
	nvinfer1::Dims output_node_dim1 = engine->getBindingDimensions(output_node_index-1);
	size_t output_data_length1 = output_node_dim1.d[1] * output_node_dim1.d[2]* output_node_dim1.d[3]* output_node_dim1.d[4];
	//std::cout << "output_data_length1" << output_data_length1 << std::endl;
	cudaMalloc(&(data_buffer[output_node_index-1]), output_data_length1 * sizeof(float));

	nvinfer1::Dims output_node_dim2 = engine->getBindingDimensions(output_node_index - 2);
	size_t output_data_length2 = output_node_dim2.d[1] * output_node_dim2.d[2] * output_node_dim2.d[3] * output_node_dim2.d[4];
	//std::cout << "output_data_length2" << output_data_length2 << std::endl;
	cudaMalloc(&(data_buffer[output_node_index - 2]), output_data_length2 * sizeof(float));

	nvinfer1::Dims output_node_dim3 = engine->getBindingDimensions(output_node_index - 3);
	size_t output_data_length3 = output_node_dim3.d[1] * output_node_dim3.d[2] * output_node_dim3.d[3] * output_node_dim3.d[4];
	//std::cout << "output_data_length3" << output_data_length3 << std::endl;
	cudaMalloc(&(data_buffer[output_node_index - 3]), output_data_length3 * sizeof(float));

    cudaStreamCreate(&stream);
}

void TensorRT_detect::Pre_processing()
{
    int c1 = input_node_dim.d[1];
    int row = input_node_dim.d[2];
    int col = input_node_dim.d[3];
    int temp = row * col;
    for (int c = 0; c < c1; c++)
		{
			for (int i = 0; i < row; i++)
			{
				for (int j = 0; j < col; j++)
				{
					float pix = dstimg.ptr<uchar>(i)[j * 3 + 2 - c];//
					input_data[c * temp + i * row + size_t(j)] = pix / 255.0;//
				}
			}
		}
}

void TensorRT_detect::detect()
{
    //将数据拷贝至显卡
    cudaError_t err3 = cudaMemcpyAsync(data_buffer[input_node_index], input_data.data(), input_data_length * sizeof(float), cudaMemcpyHostToDevice, stream);
    if (err3 != cudaSuccess) {
    std::cout << "Failed to transfer input data to GPU1: " << cudaGetErrorString(err3) << std::endl;
        return;
    }
    //进行推理
    context->enqueueV2(data_buffer, stream, nullptr);
    //将数据拷贝至主机
    cudaError_t err4 = cudaMemcpyAsync(result_array, data_buffer[output_node_index], output_data_length * sizeof(float), cudaMemcpyDeviceToHost, stream);
    if (err4 != cudaSuccess) {
    std::cout << "Failed to transfer input data to HOST: " << cudaGetErrorString(err4) << std::endl;
        return;
    }
}

void TensorRT_detect::Post_processing()
{
    std::vector<float> output(result_array, result_array + output_data_length);
	std::vector<cv::Rect> boxes;
	std::vector<float> confs;
	std::vector<int> classIds;
    //cout<<"输出矩阵的大小:"<<output.size()<<endl;
	int numClasses = (int)output_node_dim.d[2] - 5;
	float confThreshold = 0.5;
	for (auto it = output.begin(); it != output.begin() + output_data_length; it += output_node_dim.d[2])
	{
		float clsConf = *(it + 4);//object scores
		if (clsConf > confThreshold)
		{
			int centerX = (int)(*it);
			int centerY = (int)(*(it + 1));
			int width = (int)(*(it + 2));
			int height = (int)(*(it + 3));
			int x1 = centerX - width / 2;
			int y1 = centerY - height / 2;
			boxes.emplace_back(cv::Rect(x1, y1, width, height));

			// first 5 element are x y w h and obj confidence
			int bestClassId = -1;
			float bestConf = 0.0;

			for (int i = 5; i < numClasses + 5; i++)
			{
				if ((*(it + i)) > bestConf)
				{
					bestConf = it[i];
					bestClassId = i - 5;
				}
			}

			//confs.emplace_back(bestConf * clsConf);
			confs.emplace_back(clsConf);
			classIds.emplace_back(bestClassId);
		}
	}
    //std::cout<<"11111111"<<std::endl;
	float iouThreshold = 0.5;
	std::vector<int> indices;
	// Perform non maximum suppression to eliminate redundant overlapping boxes with
	// lower confidences����ֵ����
	cv::dnn::NMSBoxes(boxes, confs, confThreshold, iouThreshold, indices);
	RNG rng((unsigned)time(NULL));
	for (size_t i = 0; i < indices.size(); ++i)
	{
		int index = indices[i];
		int colorR = rng.uniform(0, 255);
		int colorG = rng.uniform(0, 255);
		int colorB = rng.uniform(0, 255);

		//
		float scores = round(confs[index] * 100) / 100;
		std::ostringstream oss;
		oss << scores;
		rectangle(dstimg, Point(boxes[index].tl().x, boxes[index].tl().y), Point(boxes[index].br().x, boxes[index].br().y), Scalar(colorR, colorG, colorB), 1.5);
		putText(dstimg, class_names[classIds[index]] + " " + oss.str(), Point(boxes[index].tl().x, boxes[index].tl().y - 5), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(colorR, colorG, colorB), 2);
	}
    
}

int main()
{
    //路径取决于cmake的路径
    TensorRT_detect TD("best.engine", "test.png", "images", "output", "best.txt");
    TD.initialize();
    TD.Create_Buffer();
    TD.Padding_Resize();
    for(int i = 0; i < 20; i++)
    {
        clock_t start_time = clock();
        TD.Pre_processing();
        clock_t end_time = clock();  
		double exec_time = static_cast<double>(end_time - start_time) / CLOCKS_PER_SEC;
		std::cout << "Precessing Execution time: " << exec_time<< " seconds" << std::endl;
        TD.detect(); 
        TD.Post_processing();
        clock_t end_time1 = clock();  
		double exec_time1 = static_cast<double>(end_time1 - start_time) / CLOCKS_PER_SEC;
		std::cout << "Execution time: " << exec_time1<< " seconds" << std::endl;
    }
    return 0;
}

（3）CMakeLists.txt文件

#最低版本要求
cmake_minimum_required(VERSION 3.10)

#项目名称
project(Tensor_Detect)

#设置优化级别
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -g")

#添加目标：可执行文件
add_executable(Tensor_Detect "Tensor.cpp")
#add_library(Tensor_Detect SHARED "Tensor_dir/Tensor.cpp" "Tensor_dir/Tensor.h")

#添加自定义代码头文件
target_include_directories(Tensor_Detect PUBLIC "Tensor")


#SET(HELLO_SO "/home/ywy/Tensor/libTensor_Detect.so")

# add OpenCV
find_package(OpenCV REQUIRED)
message("OPENCV_LIBRARIES:${OpenCV_INCLUDE_DIRS}")
INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIRS})
message("OpenCV_LIBRARIES:${OpenCV_LIBRARIES}")
 
# add TensorRT8.6.1
target_include_directories(Tensor_Detect PRIVATE "/home/ywy/TensorRT/TensorRT-8.6.1.6/include")
target_include_directories(Tensor_Detect PRIVATE "/home/ywy/TensorRT/TensorRT-8.6.1.6/samples/common")
set(TENSORRT_LIB_PATH "/home/ywy/TensorRT/TensorRT-8.6.1.6/lib")
file(GLOB LIBS "${TENSORRT_LIB_PATH}/*.so")

#动态库
#add_library(Tensor_Detect SHARED "Tensor_dir/Tensor.cpp" "Tensor_dir/Tensor.h")

# add CUDA 11.8
find_package(CUDA 11.8 REQUIRED)
message("CUDA_LIBRARIES:${CUDA_LIBRARIES}")
message("CUDA_INCLUDE_DIRS:${CUDA_INCLUDE_DIRS}")
target_include_directories(Tensor_Detect PRIVATE ${CUDA_INCLUDE_DIRS})
 
# link
target_link_libraries(Tensor_Detect ${LIBS} ${CUDA_LIBRARIES} ${OpenCV_LIBS})

二、遇到的相关问题

( 1 ) 数据前处理时间太长，三个for循环，神奇的地方是将缩放图片放在主函数for循环里面时间就是20ms左右，但是放在for循环外侧，就是4ms左右，比较费解

（2）在安装cuda驱动的时候总是会报错，换一种run的方式，除此以外，如果已经安装了Nvidia的驱动，安装的时候就不要勾选了

（3）TensotRT的模型文件不能跨平台使用，而且与cuda版本和cudnn的版本、TensorRT的版本严格对应，在不同的环境下生成的engine文件可能会有问题

Rainbow Sea

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Linux下yolov5s+TensorRT C++检测的实现

( 1 ) 数据前处理时间太长，三个for循环，神奇的地方是将缩放图片放在主函数for循环里面时间就是20ms左右，但是放在for循环外侧，就是4ms左右，比较费解。（3）TensotRT的模型文件不能跨平台使用，而且与cuda版本和cudnn的版本、TensorRT的版本严格对应，在不同的环境下生成的engine文件可能会有问题。（2）在安装cuda驱动的时候总是会报错，换一种run的方式，除此以外，如果已经安装了Nvidia的驱动，安装的时候就不要勾选了。（3）CMakeLists.txt文件。
复制链接

扫一扫