OpenCV加载YOLOv3-tiny模型实现目标检测
YOLOv3-tiny是YOLOv3移动端支持的网络模型版本,是可以在CPU端实时运行的对象检测网络。
首先加载网络,如果需要输出各层信息可将bool变量debug设为true值。
//load model
Net cnn_net = readcnn_netFromDarkcnn_net(yolo_tiny_cfg, yolo_tiny_model);
vector<String> outNames = cnn_net.getUnconnectedOutLayersNames();
if(debug)
for (int i = 0; i < outNames.size(); i++)
{
cout<<"output layer name : "<<outNames[i].c_str())<<endl;
}
然后从类名文件中读取类名:
//read classes names
vector<string> classNamesVec;
ifstream classNamesFile("./object_detection_classes_yolov3.txt");
if (classNamesFile.is_open())
{
string className = "";
while (etline(classNamesFile, className))
classNamesVec.push_back(className);
}
因为YOLOv3是多个层的合并输出,所以在OpenCV中调用时候必须显示声明哪些层是输出层,对此OpenCV提供了一个API:
vector<String> cv::dnn::Net::getUnconnectedOutLayersNames()const
来获取所有的输出层名称,该函数返回所有非连接的输出层。
调用时候,必须显式通过输入参数完成推断,相关API如下:
void cv::dnn::Net::forward(
OutputArrayOfArrays outputBlobs,
const std::vector< String > & outBlobNames
)
其中outputBlobs是调用之后的输出,outBlobNames是所有输出层的名称。
YOLO的输出前四个为 [center_x, center_y, width, height],后面的是所有类别的得分,这个时候只要得到score最大的对应对象类别,得到检测结果。
完整代码如下:
#include <fstream>
#include <iostream>
#include <algorithm>
#include <cstdlib>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
using namespace std;
using namespace cv;
using namespace cv::dnn;
//model path
String yolo_tiny_model = "./yolov3-tiny.weights";
String yolo_tiny_cfg = "./yolov3-tiny.cfg";
const bool debug = true
int main(int argc, char** argv)
{
//load model
Net cnn_net = readcnn_netFromDarkcnn_net(yolo_tiny_cfg, yolo_tiny_model);
vector<String> outNames = cnn_net.getUnconnectedOutLayersNames();
if(debug)
for (int i = 0; i < outNames.size(); i++)
{
cout<<"output layer name : "<<outNames[i].c_str())<<endl;
}
//read classes names
vector<string> classNamesVec;
ifstream classNamesFile("./object_detection_classes_yolov3.txt");
if (classNamesFile.is_open())
{
string className = "";
while (etline(classNamesFile, className))
classNamesVec.push_back(className);
}
//load image
Mat input_image = imread("./test.jpg");
Mat inputBlob = blobFromImage(input_image, 1 / 255.F, Size(416, 416), Scalar(), true, false);
cnn_net.setInput(inputBlob);
// detection
ector<Mat> outs;
cnn_net.forward(outs, outNames);
vector<double> layersTimings;
double freq = getTickFrequency() / 1000;
double time = cnn_net.getPerfProfile(layersTimings) / freq;
vector<Rect> boxes;
vector<int> classIds;
vector<float> confidences;
for (size_t i = 0; i<outs.size(); ++i)
{
float* data = (float*)outs[i].data;
for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
{
Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
Point classIdPoint;
double confidence;
minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
if (confidence > 0.5)
{
int centerX = (int)(data[0] * input_image.cols);
int centerY = (int)(data[1] * input_image.rows);
int width = (int)(data[2] * input_image.cols);
int height = (int)(data[3] * input_image.rows);
int left = centerX - width / 2;
int top = centerY - height / 2;
classIds.push_back(classIdPoint.x);
confidences.push_back((float)confidence);
boxes.push_back(Rect(left, top, width, height));
}
}
}
vector<int> indices;
NMSBoxes(boxes, confidences, 0.5, 0.2, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
Rect box = boxes[idx];
String className = classNamesVec[classIds[idx]];
putText(input_image, className.c_str(), box.tl(), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(255, 0, 0), 2, 8);
rectangle(input_image, box, Scalar(0, 0, 255), 2, 8, 0);
}
imshow("detections", input_image);
waitKey(0);
return 0;
}
本文代码需在OpenCV4.0以上版本运行。