OpenCV----YOLOv5目标检测模型推理 (兼容YOLACT)

qq_37172182

已于 2023-04-28 14:14:00 修改

阅读量3.9k

点赞数 1

分类专栏：目标检测 opencv 模型部署文章标签：目标检测 opencv 计算机视觉 yolov5

于 2022-08-27 10:07:46 首次发布

本文链接：https://blog.csdn.net/qq_37172182/article/details/126554052

版权

opencv 同时被 3 个专栏收录

12 篇文章 3 订阅

订阅专栏

模型部署

12 篇文章 0 订阅

订阅专栏

目标检测

11 篇文章 0 订阅

订阅专栏

题目要求：在上一篇OpenCV----YOLACT实例分割模型推理博客中介绍了YOLACT检测和分割任务，本次兼容YOLOv5, 构建基于面向对象设计的目标检测模型框架。
yolov5 github: YOLOv5 source code
yolov5与yolov4的battle：yolov5 vs yolov4。从结果看，v5结果比v4稍差，但灵活性更高，部署更加友好。

分析：
1）opencv的DNN模块集成了很多深度学习模型，包括人脸检测、图像分类、分割、目标检测等，集成了Pytorch、tensorflow、paddlepaddle等模型框架（参看代码库OpenCV/dnn）
2）深度学习推理模型一般步骤：加载模型，包括配置文件和权重文件；输入图像预处理，转换成模型可接受的文件类型和尺寸；模型预测后处理，对于实例分割，主要是NMS后处理方法；

结果展示：

main.exe -h
#######
Usage: main.exe [params] image confThreshold nmsThresshold model_name 

        -?, -h, --help, --usage (value:true)
                opecv based deep learining demo

        image (value:inference/horses.jpg)
                Image to process
        confThreshold (value:0.5)
                confidence threshold, default 0.5
        nmsThresshold (value:0.5)
                nms threshold, default 0.5
        model_name (value:yolov5)
                dnn model, default yolov5
parse wrong, please check command or type help

 main.exe inference/horses.jpg 0.5 0.5 yolov5

CMakeLists.txt:

# cmake needs this line
SET(CMAKE_BUILD_TYPE "Release")
# # Define project name
# PROJECT(CppDnn)

include_directories(".../opencv/build/include" ".../opencv/build/include/opencv2")
link_directories(".../opencv/build/x64/vc15/lib")

add_executable (main main.cpp)
add_library(yolact yolact.cpp)
add_library(yolov5 yolov5.cpp)
add_library(config config.cpp)
target_link_libraries(main yolact yolov5 config opencv_world460)

# unit test
# add_executable(yolov5 yolov5.cpp)
# target_link_libraries(yolov5 config opencv_world460)

代码示例:

1：检测模型配置文件头文件 config.hpp

// config.hpp
extern const char* class_names[];
extern const unsigned char colors[81][3];

2: 检测模型配置实现 config.cpp

// config.cpp
#pragma once
#include<string>
#include"config.hpp"

// coco 81 classes (background included)
extern const char* class_names[] = { "background",
										"person", "bicycle", "car", "motorcycle", "airplane", "bus",
										"train", "truck", "boat", "traffic light", "fire hydrant",
										"stop sign", "parking meter", "bench", "bird", "cat", "dog",
										"horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
										"backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
										"skis", "snowboard", "sports ball", "kite", "baseball bat",
										"baseball glove", "skateboard", "surfboard", "tennis racket",
										"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
										"banana", "apple", "sandwich", "orange", "broccoli", "carrot",
										"hot dog", "pizza", "donut", "cake", "chair", "couch",
										"potted plant", "bed", "dining table", "toilet", "tv", "laptop",
										"mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
										"toaster", "sink", "refrigerator", "book", "clock", "vase",
										"scissors", "teddy bear", "hair drier", "toothbrush"
};

// anchor color setting
extern const unsigned char colors[81][3] = {{56, 0, 255}, {226, 255, 0}, {0, 94, 255},
	{0, 37, 255}, {0, 255, 94}, {255, 226, 0}, {0, 18, 255}, {255, 151, 0},
	{170, 0, 255}, {0, 255, 56}, {255, 0, 75}, {0, 75, 255}, {0, 255, 169},
	{255, 0, 207}, {75, 255, 0}, {207, 0, 255}, {37, 0, 255}, {0, 207, 255},
	{94, 0, 255}, {0, 255, 113}, {255, 18, 0}, {255, 0, 56}, {18, 0, 255},
	{0, 255, 226}, {170, 255, 0}, {255, 0, 245}, {151, 255, 0}, {132, 255, 0},
	{75, 0, 255}, {151, 0, 255}, {0, 151, 255}, {132, 0, 255}, {0, 255, 245},
	{255, 132, 0}, {226, 0, 255}, {255, 37, 0}, {207, 255, 0},
	{0, 255, 207}, {94, 255, 0}, {0, 226, 255},
	{56, 255, 0}, {255, 94, 0}, {255, 113, 0},{0, 132, 255}, {255, 0, 132},
	{255, 170, 0}, {255, 0, 188}, {113, 255, 0}, {245, 0, 255}, {113, 0, 255},
	{255, 188, 0}, {0, 113, 255}, {255, 0, 0}, {0, 56, 255}, {255, 0, 113},
	{0, 255, 188}, {255, 0, 94}, {255, 0, 18}, {18, 255, 0}, {0, 255, 132},
	{0, 188, 255}, {0, 245, 255}, {0, 169, 255},{37, 255, 0},
	{255, 0, 151}, {188, 0, 255}, {0, 255, 37}, {0, 255, 0},
	{255, 0, 170}, {255, 0, 37}, {255, 75, 0}, {0, 0, 255}, {255, 207, 0},
	{255, 0, 226}, {255, 245, 0}, {188, 255, 0}, {0, 255, 18}, {0, 255, 75},
	{0, 255, 151}, {255, 56, 0}, {245, 255, 0}
};

// network config
extern struct net_config{
    float confThreshold;
    float nmsThreshold;
    std::string model_name;
	int img_size;
	std::string model_path;
};

3: yolov5推理模型

// yolov5.cpp
#include <fstream>
#include <sstream>
#include <iostream>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>

#include "config.cpp"

using namespace cv;
using namespace dnn;
using namespace std;

class yolov5
{
public:
	// detail constructor
    yolov5(float confThreshold, float nmsThreshold, string model_path = "model/yolov5m.onnx", const int keep_top_k = 200);
    // general constructor
    yolov5(net_config& config);
    // detection
    void detect(Mat& frame);
private:
    const float anchors[3][6] = {{10.0, 13.0, 16.0, 30.0, 33.0, 23.0}, {30.0, 61.0, 62.0, 45.0, 59.0, 119.0},{116.0, 90.0, 156.0, 198.0, 373.0, 326.0}};
    const float stride[3] = { 8.0, 16.0, 32.0 };
    const int inpWidth = 640;
    const int inpHeight = 640;
    float confThreshold = 0.5;
    float nmsThreshold = 0.5;
    float objThreshold = 0.5;

    Net net;
    
    void drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid);
	
	// post process for scores
    void sigmoid(Mat* out, int length){
        float* pdata = (float*)(out->data);
        int i = 0; 
        for (i = 0; i < length; i++)
        {
            pdata[i] = 1.0 / (1 + expf(-pdata[i]));
        }
    }
};

yolov5::yolov5(float confThreshold, float nmsThreshold, string model_path, const int keep_top_k)
{
    this->confThreshold = confThreshold;
    this->nmsThreshold = nmsThreshold;
    this->net = readNet(model_path);
}

yolov5::yolov5(net_config& config)
{
    this->confThreshold = config.confThreshold;
    this->nmsThreshold = config.nmsThreshold;
    this->net = readNet(config.model_path);
}

void yolov5::drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid)   // Draw the predicted bounding box
{
    //Draw a rectangle displaying the bounding box
    rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 2);

    //Get the label for the class name and its confidence
    string label = format("%.2f", conf);
    label = string(class_names[classid+1]) + ":" + label;

    //Display the label at the top of the bounding box
    int baseLine;
    Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
    top = max(top, labelSize.height);
    //rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
    putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
	// **** for video detection **** //
    // static const string kWinName = "yolov5 Object Detection in OpenCV";
	// namedWindow(kWinName, WINDOW_NORMAL);
	// imshow(kWinName, frame);
	// waitKey(10);
}

void yolov5::detect(Mat& frame)
{
    Mat blob;
	blobFromImage(frame, blob, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false);
	this->net.setInput(blob);
	vector<Mat> outs;
	this->net.forward(outs, this->net.getUnconnectedOutLayersNames());
	
	// generate proposals
	vector<int> classIds;
	vector<float> confidences;
	vector<Rect> boxes;
	float ratioh = (float)frame.rows / this->inpHeight;
    float ratiow = (float)frame.cols / this->inpWidth;

	int n = 0, q = 0, i = 0, j = 0, nout = 80 + 5, c = 0;
	for (n = 0; n < 3; n++)
	{
		int num_grid_x = (int)(this->inpWidth / this->stride[n]);
		int num_grid_y = (int)(this->inpHeight / this->stride[n]);
		int area = num_grid_x * num_grid_y;
		this->sigmoid(&outs[n], 3 * nout * area);
		for (q = 0; q < 3; q++)
		{
			const float anchor_w = this->anchors[n][q * 2];
			const float anchor_h = this->anchors[n][q * 2 + 1];
			float* pdata = (float*)outs[n].data + q * nout * area;  
			for (i = 0; i < num_grid_y; i++)
			{
				for (j = 0; j < num_grid_x; j++)
				{
					float box_score = pdata[4 * area + i * num_grid_x + j];
					if (box_score > this->objThreshold)
					{
						float max_class_socre = 0, class_socre = 0;
						int max_class_id = 0;
						for (c = 0; c < 80; c++) // get max socre
						{
							class_socre = pdata[(c + 5) * area + i * num_grid_x + j];
							if (class_socre > max_class_socre)
							{
								max_class_socre = class_socre;
								max_class_id = c;
							}
						}
						
						if (max_class_socre > this->confThreshold)
						{
							float cx = (pdata[i * num_grid_x + j] * 2.f - 0.5f + j) * this->stride[n];  ///cx
							float cy = (pdata[area + i * num_grid_x + j] * 2.f - 0.5f + i) * this->stride[n];   ///cy
							float w = powf(pdata[2 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_w;   ///w
							float h = powf(pdata[3 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_h;  ///h
							
							int left = (cx - 0.5*w)*ratiow;
							int top = (cy - 0.5*h)*ratioh;

							classIds.push_back(max_class_id);
							confidences.push_back(max_class_socre);
							boxes.push_back(Rect(left, top, (int)(w*ratiow), (int)(h*ratioh)));
						}	
					}	
				}
			}
		}
	}
    // nms to eliminate redundant overlapping boxes with lower confidences
    vector<int> indices;
    NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
    for (size_t i = 0; i < indices.size(); ++i)
    {
        int idx = indices[i];
        Rect box = boxes[idx];
        
        
        this->drawPred(confidences[idx], box.x, box.y,
            box.x + box.width, box.y + box.height, frame, classIds[idx]);
    }
}

// unit test
// int main()
// {
//     yolov5 net(0.5, 0.5, "model/yolov5m.onnx");
//     string imgpath = "inference/horses.jpg";
//     Mat srcimg = imread(imgpath);
//     net.detect(srcimg);

//     cout << "detect ! " << endl;
//     static const string kWinName = "Deep learning object detection in OpenCV";
//     namedWindow(kWinName, WINDOW_NORMAL);
//     imshow(kWinName, srcimg);
//     waitKey(0);
//     destroyAllWindows();
// }

4: 整体代码结构


#define _CRT_SECURE_NO_WARNINGS
#include <fstream>
#include <iostream>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>

#include "config.cpp"
#include "yolact.cpp"
#include "yolov5.cpp"

using namespace cv;
using namespace dnn;
using namespace std;

bool parseParam(int argc, char** argv, const char* keys, Mat& img, net_config& config){
	CommandLineParser parser(argc, argv, keys);
	if(parser.has("help")){
		parser.printMessage();
		return false;
	}
	if(!parser.check()){
		parser.printErrors();
		return false;
	}
	String imgFile = parser.get<String>(0);
	img = imread(imgFile);
	if(img.empty()){
		cout << "wrong image path ! please check again." << endl;
		return false;
	}
	config.confThreshold = parser.get<float>(1);
	config.nmsThreshold = parser.get<float>(2);
	config.model_name = parser.get<string>(3);
	return true;
}

int main(int argc, char** argv)
{	
	const char* keys  = {
        "{help h usage ? | | opecv based deep learining demo}"
        "{@image | inference/horses.jpg | Image to process}"
        "{@confThreshold | 0.5 | confidence threshold, default 0.5}"
        "{@nmsThresshold | 0.5 | nms threshold, default 0.5}"
        "{@model_name | yolov5 | dnn model, default yolov5}"
		};

	net_config config;
	Mat srcimg;
	if(!parseParam(argc, argv, keys, srcimg, config)){
		cout << "parse wrong, please check command or type help" << endl;
        return 0;
    }

	if(config.model_name == "yolact"){
		config.model_path = "model/yolact_base_54_800000.onnx";
		yolact model(config);
		model.detect(srcimg);
		static const string kWinName = "Deep learning object detection in OpenCV";
		namedWindow(kWinName, WINDOW_NORMAL);
		imshow(kWinName, srcimg);
		waitKey(0);
		destroyAllWindows();
	}else if(config.model_name == "yolov5"){
		config.model_path = "model/yolov5m.onnx";
		yolov5 model(config);
		model.detect(srcimg);
		static const string kWinName = "Deep learning object detection in OpenCV";
		namedWindow(kWinName, WINDOW_NORMAL);
		imshow(kWinName, srcimg);
		waitKey(0);
		destroyAllWindows();
		// load outer video device
		// VideoCapture capture(1);
		// Mat frame;
		// while(true){
		// 	capture >> frame;
		// 	resize(frame, frame, Size(640, 640), INTER_LINEAR);
		// 	flip(frame, frame, 1);
		// 	Mat m = frame;
		// 	model.detect(m);
		// 	if(waitKey(1) == 'q'){
		// 		break;
		// 	}
		// }
		}
	}else{
		cout << "model not defined" << endl;
	}
	return 0;
}