目录
1. 注意事项
- pt->onnx 一定要选择静态尺寸,dnn只支持静态尺寸,否则会报错。
parser.add_argument('--dynamic', default=True, help='ONNX/TF/TensorRT: dynamic axes')
-
opencv version: 3.4.15
2. C++代码
#include <iostream>
#include<opencv2//opencv.hpp>
#include<math.h>
#include<time.h>
using namespace std;
using namespace cv;
using namespace dnn;
// reference https://github.com/UNeedCryDear/yolov5-opencv-dnn-cpp/blob/main/yolo.cpp
std::vector<std::string> extensions{ "jpg", "bmp", "png", "jpeg" };
static const string kWinName = "YOLOV5 object detection in OpenCV";
static vector<string> loadLabels(const string& filename)
{
vector<string> labels;
ifstream ifs(filename.c_str());
string line;
while (getline(ifs, line))
{
labels.push_back(line);
}
return labels;
}
void GetImgFilenames(std::string path, std::vector<std::string>& imgFilenames)
{
// imgFilenames.clear();
if (path.find(".") != std::string::npos)
{
imgFilenames.push_back(path);
}
else
{
std::string fpath = path.append("*.*");
std::vector<cv::String> allfiles; //cv::String
cv::glob(fpath, allfiles);
for (int i = 0; i < allfiles.size(); i++)
{
size_t iPos = allfiles[i].rfind('.');
std::string fileEx = allfiles[i].substr(iPos + 1, allfiles[i].length());
if (std::find(extensions.begin(), extensions.end(), fileEx) != extensions.end())
{
imgFilenames.push_back(allfiles[i]);
}
}
}
}
std::vector<cv::Scalar> colormap(int numClass)
{
std::vector<cv::Scalar> colors;
for (int i = 0; i < numClass; i++) {
int b = rand() % 256;
int g = rand() % 256;
int r = rand() % 256;
colors.push_back(Scalar(b, g, r));
}
return colors;
}
#define isP6 false //是否使用P6模型
struct OutputDet {
int cls; // 类别id
float confidence; //置信度
cv::Rect bndbox; //矩形框
};
class YoloDet {
public:
bool modelInit(cv::dnn::Net& net, std::string& netPath, std::string classFile, bool isCuda);
bool Detect(cv::Mat srcImg, cv::dnn::Net& net, std::vector<OutputDet>& output);
void Visvalize(cv::Mat img, std::vector<OutputDet> predOuts);
void letterbox(cv::Mat srcImg, cv::Mat& pendImg);
private:
#if(defined isP6 && isP6==true)
const float netAnchors[4][6] = { { 19,27, 44,40, 38,94 },{ 96,68, 86,152, 180,137 },{ 140,301, 303,264, 238,542 },{ 436,615, 739,380, 925,792 } };
const int inpWidth = 1280;
const int inpHeight = 1280;
const int strideSize = 4; //stride size
#else
const float netAnchors[3][6] = { { 10,13, 16,30, 33,23 },{ 30,61, 62,45, 59,119 },{ 116,90, 156,198, 373,326 } };
const int inpWidth = 640;
const int inpHeight = 640;
const int strideSize = 3; //stride size
#endif // isP6
const float netStride[4] = { 8, 16.0,32,64 };
float objThreshold = 0.25;
float confThreshold = 0.25;
float nmsThreshold = 0.45;
float nmsScoreThreshold = objThreshold * confThreshold;
bool keep_ratio = true;
cv::Vec4d param;
vector<string> classNames;
int numClasses;
std::vector<cv::Scalar> colors;
// std::vector<std::string> classNames = { "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
// "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
// "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
// "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
// "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
// "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
// "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
// "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
// "hair drier", "toothbrush" };
};
bool YoloDet::modelInit(Net& net, string& netPath, std::string classFile, bool isCuda = false) {
try {
net = readNet(netPath);
}
catch (const std::exception&) {
return false;
}
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
ifstream ifs(classFile);
string line;
while (getline(ifs, line)) this->classNames.push_back(line);
if (classNames.empty()) {
cout << "Failed to load labels: " << classFile << endl;
}
this->numClasses = classNames.size();
this->colors = colormap(numClasses);
return true;
}
void YoloDet::letterbox(cv::Mat srcImg, cv::Mat& pendImg)
{
// int sh = srcImg.rows, sw = srcImg.cols;
// int nh = this->inpHeight;
// int nw = this->inpWidth;
// //float ratioh = 1, ratiow = 1;
// int left = 0, top = 0;
// //Mat pendImg;
// if (this->keep_ratio && sh != sw) {
// float hw_scale = (float)sh / sw;
// if (hw_scale > 1) {
// nh = this->inpHeight;
// nw = int(this->inpWidth / hw_scale);
// cv::resize(srcImg, pendImg, Size(nw, nh), INTER_AREA);
// left = int((this->inpWidth - nw) * 0.5);
// cv::copyMakeBorder(pendImg, pendImg, 0, 0, left, this->inpWidth - nw - left, BORDER_CONSTANT, 114);
// }
// else {
// nh = (int)this->inpHeight * hw_scale;
// nw = this->inpWidth;
// cv::resize(srcImg, pendImg, Size(nw, nh), INTER_AREA);
// top = (int)(this->inpHeight - nh) * 0.5;
// cv::copyMakeBorder(pendImg, pendImg, top, this->inpHeight - nh - top, 0, 0, BORDER_CONSTANT, 114);
// }
// }
// else {
// cv::resize(srcImg, pendImg, Size(nw, nh), INTER_AREA);
// }
// float ratioh = (float)sh / nh, ratiow = (float)sw / nw;
// cv::Vec4d p(ratioh, ratiow, top, left);
// this-> param = p;
int col = srcImg.cols;
int row = srcImg.rows;
pendImg = srcImg.clone();
int maxSize = col>=row?col:row;
// int maxSize = MAX(col, row);
if (maxSize > 1.2 * col || maxSize > 1.2 * row) {
pendImg = cv::Mat::zeros(maxSize, maxSize, CV_8UC3);
srcImg.copyTo(pendImg(Rect(0, 0, col, row)));
}
float ratioh = (float)pendImg.rows / inpHeight;
float ratiow = (float)pendImg.cols / inpWidth;
cv::Vec4d p(ratioh, ratiow, 0, 0);
this-> param = p;
}
bool YoloDet::Detect(cv::Mat srcImg, cv::dnn::Net& net, vector<OutputDet>& output) {
cv::Mat netInputImg;
this->letterbox(srcImg.clone(), netInputImg);
cv::Mat blob;
cv::dnn::blobFromImage(netInputImg, blob, 1 / 255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(0, 0, 0), true, false);
//blobFromImage(netInputImg, blob, 1 / 255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(104, 117, 123), true, false);
//blobFromImage(netInputImg, blob, 1 / 255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(114, 114,114), true, false);
net.setInput(blob);
std::vector<cv::Mat> netOutputImg;
net.forward(netOutputImg, net.getUnconnectedOutLayersNames());
#if CV_VERSION_MAJOR==4&&CV_VERSION_MINOR==6
std::sort(netOutputImg.begin(), netOutputImg.end(), [](Mat& A, Mat& B) {return A.size[1] > B.size[1]; });//opencv 4.6
#endif
int netWidth = numClasses + 5;
float* rawOutput = (float*)netOutputImg[0].data;
std::vector<int> clsIDs;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
//vector<BoxInfo> boxes;
for (int n = 0; n < this-> strideSize; n++)
{
const float stride = pow(2, n + 3);
int grid_x = (int)ceil((this->inpWidth / stride));
int grid_y = (int)ceil((this->inpHeight / stride));
for (int anchor = 0; anchor < 3; anchor++)
{ //anchors
for (int i = 0; i < grid_y; ++i)
{
for (int j = 0; j < grid_x; ++j)
{
float boxScore = rawOutput[4]; ;//bndbox-confidence
if (boxScore >= objThreshold)
{
cv::Mat confs(1, classNames.size(), CV_32FC1, rawOutput + 5);
Point clsID;
double maxConf;
minMaxLoc(confs, 0, &maxConf, 0, &clsID);
maxConf = (float)maxConf;
if (maxConf >= confThreshold)
{
float x = rawOutput[0]; //x
float y = rawOutput[1]; //y
float w = rawOutput[2]; //w
float h = rawOutput[3]; //h
// float left = (x - param[3] - 0.5 * w) * param[1];
// float top = (y - param[2] - 0.5 * h) * param[0];
int left = (x - 0.5 * w) * param[1];
int top = (y - 0.5 * h) * param[0];
w = w * param[1];
h = h * param[0];
clsIDs.push_back(clsID.x);
confidences.push_back(maxConf * boxScore);
boxes.push_back(cv::Rect(int(left), int(top), int(w), int(h)));
}
}
rawOutput += netWidth;
}
}
}
}
cv::Mat img = srcImg.clone();
vector<int> resultNMS;
NMSBoxes(boxes, confidences, nmsScoreThreshold, nmsThreshold, resultNMS);
Rect refineRect(0, 0, img.cols, img.rows);
// std::vector<OutputDet> tempOutput;
for (int i = 0; i < resultNMS.size(); ++i) {
int ix = resultNMS[i];
OutputDet predOut;
predOut.cls = clsIDs[ix];
predOut.confidence = confidences[ix];
predOut.bndbox = boxes[ix] & refineRect;
output.push_back(predOut);
}
if (output.size())
return true;
else
return false;
}
void YoloDet::Visvalize(cv::Mat img, vector<OutputDet> predOuts) {
for (int i = 0; i < predOuts.size(); i++) {
int left, top;
left = predOuts[i].bndbox.x;
top = predOuts[i].bndbox.y;
rectangle(img, predOuts[i].bndbox, colors[predOuts[i].cls], 2, 8);
string label = classNames[predOuts[i].cls] + ":" + to_string(predOuts[i].confidence);
int baseLine;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
top = max(top, labelSize.height);
putText(img, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 1, colors[predOuts[i].cls], 2);
}
// cv::namedWindow(kWinName, WINDOW_NORMAL);
cv::imshow(kWinName, img);
cv::waitKey(0);
cv::destroyAllWindows();
}
// 不支持动态尺寸
int runner(std::string modelPath, std::string imgPath, std::string classFile){
YoloDet detector;
cv::dnn::Net net;
if (detector.modelInit(net, modelPath, classFile, true)) {
cout << "model loaded successfully!" << endl;
}
else {
cout << "model loaded failed!" << endl;
return -1;
}
std::vector<std::string> imgFiles;
GetImgFilenames(imgPath, imgFiles);
cout << imgFiles.size() << endl;
for (int i = 0; i < imgFiles.size(); i++)
{
vector<OutputDet> predOuts;
Mat img = imread(imgFiles[i]);
if (detector.Detect(img, net, predOuts)) {
detector.Visvalize(img, predOuts);
}
else {
cout << "Detect Failed!" << endl;
}
}
return 0;
}
int main() {
std::string imgPath = "images/";
std::string modelPath = "yolov5s.onnx";
std::string classFile = "class.names";
runner(modelPath, imgPath, classFile);
return 0;
}
3. 运行结果
![]() | ![]() | ![]() |