opencv c++ 加载dacknet 网络模型之后,完成目标检索预测并框选目标
前言
网络上千篇一律都是一样的代码,我的也不列外,在此加了部分注释,网络加载在另一篇文章中
float confThreshold, nmsThreshold;
std::vector<std::string> classes;
/// <summary>
/// 回去权重文件对应的分类名称集合
/// </summary>
/// <param name="filename">分类名文件</param>
/// <returns>分类名称集合</returns>
std::vector<std::string> objects_names_from_file(std::string const filename) {
std::ifstream file(filename);
std::vector<std::string> file_lines;
if (!file.is_open()) return file_lines;
for (std::string line; getline(file, line);) file_lines.push_back(line);
std::cout << "object names loaded \n";
return file_lines;
}
/// <summary>
/// 使用对应的 网络模型和权重文件预测图像
/// </summary>
/// <param name="img">预测图像</param>
/// <param name="Insize">模型输入图像的大小,width=height=Insize</param>
/// <param name="net">加载的网络模型</param>
/// <param name="class_labels">标签集合</param>
/// <param name="confidenceThreshold">对识别结果的筛选阈值,小于该阈值将会被筛除</param>
/// <param name="reNetVal">包含预测结果的集合</param>
/// <returns></returns>
void darknetDetection(Mat& img, int Insize, cv::dnn::Net net, std::vector<std::string> class_labels, float confidenceThreshold, vector<darkNetRet>& reNetVal)
{
nmsThreshold = 0.1;
confThreshold = confidenceThreshold;
classes = class_labels;
std::vector<String> outNames = net.getUnconnectedOutLayersNames();
int initialConf = (int)(confidenceThreshold * 100);
// Open a video file or an image file or a camera stream.
Mat blob;
blobFromImage(img, blob, 1 / 255.0, Size(Insize, Insize), Scalar(0, 0, 0), true, false);
// Run a model.
net.setInput(blob);
if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN
{
resize(img, img, Size(Insize, Insize));
Mat imInfo = (Mat_<float>(1, 3) << Insize, Insize, 1.6f);
net.setInput(imInfo, "im_info");
}
std::vector<Mat> outs;
net.forward(outs, outNames);
postprocess(img, outs, net, reNetVal);
// Put efficiency information.
std::vector<double> layersTimes;
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
std::string label = format("Inference time: %.2f ms", t);
putText(img, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
}
/// <summary>
/// 解析预测结果
/// </summary>
/// <param name="frame">输入图像</param>
/// <param name="outs">预测结果的原始数据</param>
/// <param name="net">网络模型</param>
/// <param name="reNetVal">解析处理后的预测结果</param>
void postprocess(Mat frame, const std::vector<Mat>& outs, Net& net, vector<darkNetRet>& reNetVal)
{
static std::vector<int> outLayers = net.getUnconnectedOutLayers();
static std::string outLayerType = net.getLayer(outLayers[0])->type;
std::vector<int> classIds;
std::vector<float> confidences;
std::vector<Rect> boxes;
if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN
{
// Network produces output blob with a shape 1x1xNx7 where N is a number of
// detections and an every detection is a vector of values
// [batchId, classId, confidence, left, top, right, bottom]
CV_Assert(outs.size() == 1);
float* data = (float*)outs[0].data;
for (size_t i = 0; i < outs[0].total(); i += 7)
{
float confidence = data[i + 2];
if (confidence > confThreshold)
{
int left = (int)data[i + 3];
int top = (int)data[i + 4];
int right = (int)data[i + 5];
int bottom = (int)data[i + 6];
int width = right - left + 1;
int height = bottom - top + 1;
classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
boxes.push_back(Rect(left, top, width, height));
confidences.push_back(confidence);
}
}
}
else if (outLayerType == "DetectionOutput")
{
// Network produces output blob with a shape 1x1xNx7 where N is a number of
// detections and an every detection is a vector of values
// [batchId, classId, confidence, left, top, right, bottom]
CV_Assert(outs.size() == 1);
float* data = (float*)outs[0].data;
for (size_t i = 0; i < outs[0].total(); i += 7)
{
float confidence = data[i + 2];
if (confidence > confThreshold)
{
int left = (int)(data[i + 3] * frame.cols);
int top = (int)(data[i + 4] * frame.rows);
int right = (int)(data[i + 5] * frame.cols);
int bottom = (int)(data[i + 6] * frame.rows);
int width = right - left + 1;
int height = bottom - top + 1;
classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
boxes.push_back(Rect(left, top, width, height));
confidences.push_back(confidence);
}
}
}
else if (outLayerType == "Region")
{
for (size_t i = 0; i < outs.size(); ++i)
{
// Network produces output blob with a shape NxC where N is a number of
// detected objects and C is a number of classes + 4 where the first 4
// numbers are [center_x, center_y, width, height]
float* data = (float*)outs[i].data;
for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
{
Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
Point classIdPoint;
double confidence;
minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
if (confidence >= confThreshold)
{
int centerX = (int)(data[0] * frame.cols);
int centerY = (int)(data[1] * frame.rows);
int width = (int)(data[2] * frame.cols);
int height = (int)(data[3] * frame.rows);
int left = centerX - width / 2;
int top = centerY - height / 2;
classIds.push_back(classIdPoint.x);
confidences.push_back((float)confidence);
boxes.push_back(Rect(left, top, width, height));
}
}
}
}
else
CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);
std::vector<int> indices;
NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
Rect box = boxes[idx];
darkNetRet ret;
ret.rect = box;
ret.id = classIds[idx];
ret.confidences = confidences[idx];
reNetVal.push_back(ret);
}
}
/// <summary>
/// 在图像上框选预测结果并显示说明
/// </summary>
/// <param name="label">解释文字</param>
/// <param name="leftTop">左上角的点坐标</param>
/// <param name="RightBottom">右下角的点坐标</param>
/// <param name="frame">图像</param>
/// <returns></returns>
void drawPred( string label, Point leftTop, Point RightBottom, Mat frame)
{
rectangle(frame, leftTop, RightBottom, Scalar(0, 255, 0));
int baseLine;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
leftTop.y = max(leftTop.y, labelSize.height);
/* rectangle(frame, Point(left, top - labelSize.height),
Point(left + labelSize.width, top + baseLine), Scalar::all(255), FILLED);*/
putText(frame, label, leftTop, FONT_HERSHEY_SIMPLEX, 0.6, Scalar::all(255));
}