YoloV8-seg 模型tensort推理解析

最新推荐文章于 2025-07-03 08:32:34 发布

大道@至简

最新推荐文章于 2025-07-03 08:32:34 发布

阅读量543

点赞数 5

CC 4.0 BY-SA版权

文章标签： YOLO 算法目标检测分割计算机视觉

本文链接：https://blog.csdn.net/long630576366/article/details/143847651

YOLOV8 实例分割输出两个output, 一个用于实例bbox检测，另一个用于分割，两者相结合提取实例分割目标结果。

#include "logging.h"
#include "NvInfer.h"
#include "NvOnnxParser.h"
#include <NvInferRuntime.h>
#include <cuda_runtime.h> // cuda include
#include<fstream>  
#include<iostream>  
#include<string> 
#include<opencv2/opencv.hpp>
using namespace nvinfer1; 
static Logger gLogger;

# define CHECK(call)\
do\
{\
const cudaError_t error_code=call;\
if (error_code!=cudaSuccess)\
{\
printf("CUDA Error:\n");\
printf(" FILE :%s",__FILE__);\
printf("LINE %d\n",__LINE__);\
printf("Error code:%d\n",error_code);\
printf("Error text:%s\n",cudaGetErrorString(error_code));\
exit(1);\
}\
}while(0)\


#define clip(x) (x < 0 ? 0.0 : ( x > 255.0 ? 255.0 : x))

class YOLO_SEG
{
private:
    char *_trtModelStream{nullptr};
    IRuntime* _runtime = nullptr;
    ICudaEngine* _engine=nullptr;
    IExecutionContext* _context=nullptr;
    void *_inferbuffers[3];
    cudaStream_t _stream;
public:
    int _max_batchsize = 1;
    int _input_h = 960;
    int _input_w = 1920;
    int _inputSize = 3 * _input_h * _input_w;
    int _outputSize0 = 1; 
    int _outputSize1 = 1;
    int _inputIndex;
    int _outputIndex0;
    int _outputIndex1 ;
private:
 void get_input_ouput_size(){
    _inputIndex = _engine->getBindingIndex("images");
    _outputIndex0 = _engine->getBindingIndex("output0");
    _outputIndex1 =  _engine->getBindingIndex("output1");
    assert(_inputIndex == 0);
    assert(_outputIndex0 == 2);
    assert(_outputIndex1 == 1);
    auto out_dims1 = _engine->getBindingDimensions(_outputIndex0);
    // BCHW
    for(int j = 1; j < out_dims1.nbDims; j++) {  
        std::cout << "j = " << j << " size = " << out_dims1.d[j] << std::endl;
        _outputSize0 *= out_dims1.d[j];
    }
    // BCN
    auto out_dims2 = _engine->getBindingDimensions(_outputIndex1);
    for(int j = 1; j < out_dims2.nbDims; j++) {  
        std::cout << "j = " << j << " size = " << out_dims2.d[j] << std::endl;
        _outputSize1 *= out_dims2.d[j];
    }

    //std::cout << " _outputSize2 = " << _outputSize2 <<  std::endl;
}
public:
    YOLO_SEG(/* args */){};
    ~YOLO_SEG(){
        if (nullptr != _trtModelStream){
            delete [] _trtModelStream;
        }
    };
    // 文件读取模型，并反序列化成engine
    void load_trtmodel(std::string trt_model_path){
        std::ifstream file(trt_model_path, std::ios::binary);
        size_t size{0};
        if (file.good()) {
                file.seekg(0, file.end);
                size = file.tellg();
                file.seekg(0, file.beg);
                _trtModelStream = new char[size];
                assert(_trtModelStream);
                file.read(_trtModelStream, size);
                file.close();
        }
    _runtime = createInferRuntime(gLogger);
    assert(_runtime != nullptr);
    _engine = _runtime->deserializeCudaEngine(_trtModelStream, size);
    assert(_engine != nullptr); 
    _context = _engine->createExecutionContext();
    assert(_context != nullptr);
    }

    //分配处理相关内存
    void initbuff(){
        get_input_ouput_size();
        //fix _max_batchsize
        _context->setBindingDimensions(0, nvinfer1::Dims4(_max_batchsize, 3, _input_h, _input_w));	
        assert(_engine->getNbBindings() == 3);
        //_context->setBindingDimensions(1, nvinfer1::Dims4(_max_batchsize, 3, _input_h * 3, _input_w * 3));		
       

        std::cout << " outputIndex0 =  " << _outputIndex0 << " outputIndex1 = " << _outputIndex1 << std::endl;
        //const int outputIndex1 = 1;
        //const int outputIndex2 = 2;
       
        CHECK(cudaMalloc((void**)&_inferbuffers[_inputIndex],  _max_batchsize * _inputSize * sizeof(float)));  //trt输入内存申请
        CHECK(cudaMalloc((void**)&_inferbuffers[_outputIndex0], _max_batchsize * _outputSize0 * sizeof(float)));           //trt输出内存申请
        CHECK(cudaMalloc((void**)&_inferbuffers[_outputIndex1], _max_batchsize * _outputSize1 * sizeof(float)));           //trt输出内存申请
        CHECK(cudaStreamCreate(&_stream));
    }
    void releasebuff(){
        CHECK(cudaFree(_inferbuffers[_inputIndex]));
        CHECK(cudaFree(_inferbuffers[_outputIndex0]));
        CHECK(cudaFree(_inferbuffers[_outputIndex1]));
        _context->destroy();
        _engine->destroy();
        _runtime->destroy();
    }
    // 推理
    void infer_trtmodel(const int infer_batch, const float* input_data, float *outputbuff0, float *outputbuff1){
        //图像数据填充_inferbuffers[0],GPU CUDA处理
        cudaMemcpy(_inferbuffers[0], input_data, infer_batch * _inputSize * sizeof(float), cudaMemcpyHostToDevice);
        
        printf("infer model \n");
        _context->enqueueV2((void **)_inferbuffers, _stream, nullptr);
        cudaStreamSynchronize(_stream);
        printf("post process \n");
        //
        //_inferbuffers[1]模型输出后处理,可以GPU处理，否则拷贝到cpu处理
        cudaMemcpy(outputbuff0, _inferbuffers[_outputIndex0], infer_batch * _outputSize0 * sizeof(float), cudaMemcpyDeviceToHost);
        cudaMemcpy(outputbuff1, _inferbuffers[_outputIndex1], infer_batch * _outputSize1 * sizeof(float), cudaMemcpyDeviceToHost);
    }
};


struct OutputSeg {
	int id;             //结果类别id
	float confidence;   //结果置信度
	cv::Rect box;       //矩形框
	cv::Mat boxMask;       //矩形框内mask，节省内存空间和加快速度
};
void DrawPred(cv::Mat& img, std:: vector<OutputSeg> result) {
	std::vector<cv::Scalar> color = {cv::Scalar(0, 0, 255)} ;
	cv::Mat mask = img.clone();
	for (int i = 0; i < result.size(); i++) {
		cv::rectangle(img, result[i].box, color[result[i].id], 2, 8);
		mask(result[i].box).setTo(color[result[i].id], result[i].boxMask);
        std::string  label = std::to_string(result[i].id) + " " +  std::to_string(result[i].confidence);
        cv::putText(img, label, cv::Point(result[i].box.x, result[i].box.y-5), cv::FONT_HERSHEY_SIMPLEX, 0.5, color[result[i].id], 1);
	}
	cv::addWeighted(img, 0.7, mask, 0.3, 1, img); //将mask加在原图上面
}


int main(int argc, char** argv){
    YOLO_SEG *yoloseg = new YOLO_SEG();
    //srcnn ->load_trtmodel("../best.trt");
    //const std::string trt_path = argv[1];
    const std::string trt_path = "../best_seg.trt";
    //const std::string img_path = argv[2];
    const std::string img_path = "../input_seg.jpg";
   
    yoloseg ->load_trtmodel(trt_path);
    yoloseg ->initbuff();

    int BatchSize = yoloseg->_max_batchsize;
    int channel = 3;
    int imgH = yoloseg->_input_h;
    int imgW = yoloseg->_input_w; 
    float* input_last = new float[BatchSize * channel * imgH * imgW];
    //bgr2rgb + resize + norm + hwc2chw
    cv::Mat img;
    for(int i =0; i< BatchSize; i++){
        //std::string img_path = "../../imgs/face" + std::to_string(i) + ".png";
        img = cv::imread(img_path);
        int img_width = img.cols;
	    int img_height = img.rows;
        // cv::imshow("img", img);
        // cv::waitKey(0);
        //std::cout << " img_height " << img_height << " imgH " << imgH << std::endl;
        //std::cout << "size = " << (BatchSize * channel * imgH * imgW) << std::endl; 

        //crop + hwc->chw + bgr->rgb + norm 
        for (int h = img_height - imgH; h < img_height; ++h){
            for (int w = 0; w < imgW; ++w){   
                for (int c = 0; c < channel; ++c){
                    int hid = h - (img_height - imgH);
                    //input_last[0] = static_cast<float>(img.at<cv::Vec3b>(h, w)[2-c]) / 255.0;
                    input_last[i * channel * imgH * imgW + c * imgH * imgW + hid * imgW + w] = static_cast<float>(img.at<cv::Vec3b>(h, w)[2-c]) / 255.0;
                }
            }
        }
    }

    int outputSize0 = yoloseg->_outputSize0;
    int outputSize1 = yoloseg->_outputSize1;
    float *outputbuff0 = new float[BatchSize * outputSize0];
    float *outputbuff1 = new float[BatchSize * outputSize1];
    std::cout << " infer model " << std::endl;
    yoloseg ->infer_trtmodel(BatchSize, input_last, outputbuff0, outputbuff1);

    std::vector<int> classIds;//结果id数组
	std::vector<float> confidences;//结果每个id对应置信度数组
	std::vector<cv::Rect> boxes;//每个id矩形框
	std::vector<cv::Mat> picked_proposals;  //后续计算mask
    // 处理box

    static const float CONF_THRESHOLD = 0.1;
    static const float NMS_THRESHOLD = 0.5;
    static const float MASK_THRESHOLD = 0.5;
    static const int _segWidth = imgW / 4;
    static const int _segHeight = imgH / 4;
    static const int _segChannels = 32;
    static const int CLASSES = 1;
    static const int Num_box = 37800;
    int net_length = CLASSES + 4 + _segChannels;

    // 处理box
	cv::Mat out1 = cv::Mat(net_length, Num_box, CV_32F, outputbuff0);
	auto start = std::chrono::system_clock::now();
	for (int i = 0; i < Num_box; i++) {
		//输出是1*net_length*Num_box;所以每个box的属性是每隔Num_box取一个值，共net_length个值
		cv::Mat scores = out1(cv::Rect(i, 4, 1, CLASSES)).clone();
		cv::Point classIdPoint;
		double max_class_socre;
		minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);
		max_class_socre = (float)max_class_socre;
		if (max_class_socre >= CONF_THRESHOLD) {
			cv::Mat temp_proto = out1(cv::Rect(i, 4 + CLASSES, 1, _segChannels)).clone();
			picked_proposals.push_back(temp_proto.t());
            float x = out1.at<float>(0, i) ;  //cx
			float y = out1.at<float>(1, i);  //cy
			float w = out1.at<float>(2, i);  //w
			float h = out1.at<float>(3, i);  //h
			int left = MAX((x - 0.5 * w), 0);
			int top = MAX((y - 0.5 * h), 0);
			int width = (int)w;
			int height = (int)h;
			if (width <= 0 || height <= 0) { continue; }

			classIds.push_back(classIdPoint.y);
			confidences.push_back(max_class_socre);
			boxes.push_back(cv::Rect(left, top, width, height));
		}
	}
	//执行非最大抑制以消除具有较低置信度的冗余重叠框（NMS）
	std::vector<int> nms_result;
	cv::dnn::NMSBoxes(boxes, confidences, CONF_THRESHOLD, NMS_THRESHOLD, nms_result);
	std::vector<cv::Mat> temp_mask_proposals;
	std::vector<OutputSeg> output;
    cv::Rect holeImgRect(0, 0, imgW, imgH);
	for (int i = 0; i < nms_result.size(); ++i) {
		int idx = nms_result[i];
		OutputSeg result;
		result.id = classIds[idx];
		result.confidence = confidences[idx];
		result.box = boxes[idx]&holeImgRect;
		output.push_back(result);
		temp_mask_proposals.push_back(picked_proposals[idx]);
	}
    std::cout << " process mask " << std::endl;
 	// 处理mask
	cv::Mat maskProposals;
	for (int i = 0; i < temp_mask_proposals.size(); ++i)
		maskProposals.push_back(temp_mask_proposals[i]);

	cv::Mat protos = cv::Mat(_segChannels, _segWidth * _segHeight, CV_32F, outputbuff1);
	cv::Mat matmulRes = (maskProposals * protos).t();// A*B是以数学运算中矩阵相乘的方式实现的，要求A的列数等于B的行数时
	cv::Mat masks = matmulRes.reshape(output.size(), { _segHeight, _segWidth});

	std::vector<cv::Mat> maskChannels;
	cv::split(masks, maskChannels);
    for (int i = 0; i < output.size(); ++i) {
		cv::Mat dest, mask;
		cv::exp(-maskChannels[i], dest);//sigmoid
		dest = 1.0 / (1.0 + dest);
        std::cout << " cv::resize " << std::endl;
		cv::resize(dest, mask, cv::Size(imgW, imgH), cv::INTER_NEAREST);
		//crop----截取box中的mask作为该box对应的mask
		cv::Rect temp_rect = output[i].box;
		mask = mask(temp_rect) > MASK_THRESHOLD;
		output[i].boxMask = mask;
	}
	auto end = std::chrono::system_clock::now();
	std::cout << "后处理时间：" << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;

	DrawPred(img, output);
	cv::imshow("output.jpg", img);
	char c = cv::waitKey(0);

    
    delete []input_last;
    input_last =nullptr;
    delete []outputbuff0;
    outputbuff0 =nullptr;
    delete []outputbuff1;
    outputbuff1 =nullptr;
    yoloseg ->releasebuff();
    return 0;
}

CMakeLists.txt

project(trt_detect)
#add_definitions(-std=c++11)
add_definitions(-w)
find_package(CUDA REQUIRED)
# OpenCV package
FIND_PACKAGE(OpenCV REQUIRED) 
# OpenCV include directories
INCLUDE_DIRECTORIES(${OpenCV_INCLUDE_DIRS})
set(CMAKE_CXX_STANDARD 14)
#set(CMAKE_BUILD_TYPE Release)
set(CMAKE_BUILD_TYPE Debug)
#cuda 
include_directories(/usr/local/cuda/include)
link_directories(/usr/local/cuda/lib64)
include_directories(/home/a/TensorRT-8.5.1.7/include)
link_directories(/home/a/TensorRT-8.5.1.7/lib)
cuda_add_executable(inference_seg inference_seg.cpp)
target_link_libraries(inference_seg nvinfer)
target_link_libraries(inference_seg cudart)
target_link_libraries(inference_seg nvonnxparser)
target_link_libraries(inference_seg ${OpenCV_LIBS})
add_definitions(-O2)