代码解析
1.加载模型
// load image from file
cv::Mat img = cv::imread("./images/img1.png");
// load class names from file
string yoloBasePath = "./dat/yolo/";
string yoloClassesFile = yoloBasePath + "coco.names";
string yoloModelConfiguration = yoloBasePath + "yolov3.cfg";
string yoloModelWeights = yoloBasePath + "yolov3.weights";
vector<string> classes;
ifstream ifs(yoloClassesFile.c_str());
string line;
while (getline(ifs, line)) classes.push_back(line);
// load neural network
cv::dnn::Net net = cv::dnn::readNetFromDarknet(yoloModelConfiguration, yoloModelWeights);
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
加载网络后,DNN后端将设置为DNN_BACKEND_OPENCV。如果OpenCV是使用英特尔的推理引擎构建的,则应改用DNN_BACKEND_INFERENCE_ENGINE。DNN_TARGET_OPENCL是(Intel)GPU可用时的选择方法,代码中默认是CPU。
2.加载并处理图片
// generate 4D blob from input image
cv::Mat blob;
double scalefactor = 1/255.0;
cv::Size size = cv::Size(416, 416);
cv::Scalar mean = cv::Scalar(0,0,0);
bool swapRB = false;
bool crop = false;
cv::dnn::blobFromImage(img, blob, scalefactor, size, mean, swapRB, crop);
当数据流经网络时,YOLO将信息存储为“斑点”,进行通信和操纵:斑点是许多框架(包括Caffe)的标准阵列和统一内存接口。Blob是对正在处理和传递的实际数据的包装,并且还提供CPU和GPU之间的同步功能。从数学上讲,斑点是以C连续方式存储的N维数组。用于批处理图像数据的常规Blob尺寸为N×通道C×高H×宽W。在此命名法中,N为数据的批大小。批处理可实现更好的通信和设备处理吞吐量。对于256幅图像的训练批次,N将为256。参数C表示特征尺寸,例如对于RGB图像C =3。在OpenCV中,Blob以NCHW尺寸顺序存储为4维cv :: Mat数组。有关Blob的更多详细信息,可以在这里找到:
关于 cv::dnn::blobFromImage函数,参考博客
3.前向传播
下一步,我们必须将刚创建的Blob作为输入传递给网络。然后,我们运行OpenCV的正向功能以通过网络执行单个正向传递。为此,我们需要确定网络的最后一层,并为该功能提供相关的内部名称。这可以通过使用OpenCV函数“ getUnconnectedOutLayers”来完成,该函数给出所有未连接输出层的名称,这些输出层实际上是网络的最后一层。
// Get names of output layers
vector<cv::String> names;
vector<int> outLayers = net.getUnconnectedOutLayers(); // get indices of output layers, i.e. layers with unconnected outputs
vector<cv::String> layersNames = net.getLayerNames(); // get names of all layers in the network
names.resize(outLayers.size());
for (size_t i = 0; i < outLayers.size(); ++i) // Get the names of the output layers in names
{
names[i] = layersNames[outLayers[i] - 1];
}
// invoke forward propagation through network
vector<cv::Mat> netOutput;
net.setInput(blob);
net.forward(netOutput, names);
前向传播的结果是一个物体类别向量,每个类元素的前四个元素分别代表边界框中心的X,Y坐标,高度和宽度。第五个元素是置信度。
以下代码显示了如何扫描网络结果以及如何将具有足够高置信度得分的边界框组装到向量中。该函数cv::minMaxLoc使用在整个数组中搜索的极值来找到最小和最大元素值及其位置
// Scan through all bounding boxes and keep only the ones with high confidence
float confThreshold = 0.20;
vector<int> classIds;
vector<float> confidences;
vector<cv::Rect> boxes;
for (size_t i = 0; i < netOutput.size(); ++i)
{
float* data = (float*)netOutput[i].data;
for (int j = 0; j < netOutput[i].rows; ++j, data += netOutput[i].cols)
{
cv::Mat scores = netOutput[i].row(j).colRange(5, netOutput[i].cols);
cv::Point classId;
double confidence;
// Get the value and location of the maximum score
cv::minMaxLoc(scores, 0, &confidence, 0, &classId);
if (confidence > confThreshold)
{
cv::Rect box; int cx, cy;
cx = (int)(data[0] * img.cols);
cy = (int)(data[1] * img.rows);
box.width = (int)(data[2] * img.cols);
box.height = (int)(data[3] * img.rows);
box.x = cx - box.width/2; // left
box.y = cy - box.height/2; // top
boxes.push_back(box);
classIds.push_back(classId.x);
confidences.push_back((float)confidence);
}
}
}
4.网络后端处理
使用非极大值抑制删除多余边框,主要使用库函数NMSBoxes
// perform non-maxima suppression
float nmsThreshold = 0.4; // Non-maximum suppression threshold
vector<int> indices;
cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
完整代码
#include <iostream>
#include <numeric>
#include <fstream>
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/dnn.hpp>
#include "dataStructures.h"
using namespace std;
void detectObjects2()
{
// load image from file
cv::Mat img = cv::imread("../images/bottle.jpg");
// load class names from file
string yoloBasePath = "../dat/yolo/";
string yoloClassesFile = yoloBasePath + "coco.names";
string yoloModelConfiguration = yoloBasePath + "yolov3.cfg";
string yoloModelWeights = yoloBasePath + "yolov3.weights";
vector<string> classes;
ifstream ifs(yoloClassesFile.c_str());
string line;
while (getline(ifs, line)) classes.push_back(line);
// load neural network
cv::dnn::Net net = cv::dnn::readNetFromDarknet(yoloModelConfiguration, yoloModelWeights);
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
net.setPreferableTarget(cv::dnn::DNN_TARGET_OPENCL);
// generate 4D blob from input image
cv::Mat blob;
double scalefactor = 1/255.0;
cv::Size size = cv::Size(416, 416);
cv::Scalar mean = cv::Scalar(0,0,0);
bool swapRB = false;
bool crop = false;
cv::dnn::blobFromImage(img, blob, scalefactor, size, mean, swapRB, crop);
// Get names of output layers
vector<cv::String> names;
vector<int> outLayers = net.getUnconnectedOutLayers(); // get indices of output layers, i.e. layers with unconnected outputs
vector<cv::String> layersNames = net.getLayerNames(); // get names of all layers in the network
names.resize(outLayers.size());
for (size_t i = 0; i < outLayers.size(); ++i) // Get the names of the output layers in names
{
names[i] = layersNames[outLayers[i] - 1];
}
// invoke forward propagation through network
vector<cv::Mat> netOutput;
net.setInput(blob);
net.forward(netOutput, names);
// Scan through all bounding boxes and keep only the ones with high confidence
float confThreshold = 0.20;
vector<int> classIds;
vector<float> confidences;
vector<cv::Rect> boxes;
for (size_t i = 0; i < netOutput.size(); ++i)
{
float* data = (float*)netOutput[i].data;
for (int j = 0; j < netOutput[i].rows; ++j, data += netOutput[i].cols)
{
cv::Mat scores = netOutput[i].row(j).colRange(5, netOutput[i].cols);
cv::Point classId;
double confidence;
// Get the value and location of the maximum score
cv::minMaxLoc(scores, 0, &confidence, 0, &classId);
if (confidence > confThreshold)
{
cv::Rect box; int cx, cy;
cx = (int)(data[0] * img.cols);
cy = (int)(data[1] * img.rows);
box.width = (int)(data[2] * img.cols);
box.height = (int)(data[3] * img.rows);
box.x = cx - box.width/2; // left
box.y = cy - box.height/2; // top
boxes.push_back(box);
classIds.push_back(classId.x);
confidences.push_back((float)confidence);
}
}
}
// perform non-maxima suppression
float nmsThreshold = 0.4; // Non-maximum suppression threshold
vector<int> indices;
cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
std::vector<BoundingBox> bBoxes;
for (auto it = indices.begin(); it != indices.end(); ++it)
{
BoundingBox bBox;
bBox.roi = boxes[*it];
bBox.classID = classIds[*it];
bBox.confidence = confidences[*it];
bBox.boxID = (int)bBoxes.size(); // zero-based unique identifier for this bounding box
bBoxes.push_back(bBox);
}
// show results
cv::Mat visImg = img.clone();
for (auto it = bBoxes.begin(); it != bBoxes.end(); ++it)
{
// Draw rectangle displaying the bounding box
int top, left, width, height;
top = (*it).roi.y;
left = (*it).roi.x;
width = (*it).roi.width;
height = (*it).roi.height;
cv::rectangle(visImg, cv::Point(left, top), cv::Point(left + width, top + height), cv::Scalar(0, 255, 0), 2);
string label = cv::format("%.2f", (*it).confidence);
label = classes[((*it).classID)] + ":" + label;
// Display label at the top of the bounding box
int baseLine;
cv::Size labelSize = getTextSize(label, cv::FONT_ITALIC, 0.5, 1, &baseLine);
top = max(top, labelSize.height);
rectangle(visImg, cv::Point(left, top - round(1.5 * labelSize.height)), cv::Point(left + round(1.5 * labelSize.width), top + baseLine), cv::Scalar(255, 255, 255), cv::FILLED);
cv::putText(visImg, label, cv::Point(left, top), cv::FONT_ITALIC, 0.75, cv::Scalar(0, 0, 0), 1);
}
string windowName = "Object classification";
cv::namedWindow( windowName, 1 );
cv::imshow( windowName, visImg );
cv::waitKey(0); // wait for key to be pressed
}
int main()
{
detectObjects2();
}
注意,OPENCV版本大于3.4.2才有相关的YOLO算法。
代码数据下载:
https://github.com/jhzhang19/sensor_fusion
权重文件自己下载放到路径dat/yolo下
下载地址