opencv DNN模块之使用SSD(caffe)模型实现对象检测_调用caffe模型进行图片识别定位-CSDN博客

本文链接：https://blog.csdn.net/z961968549/article/details/104244836

上一节介绍了使用googlenet caffe模型实现对象分类，只能告诉图片中有什么，不能确定其位置，这一节使用Mobilenet SSD模型实现对象的检测，基于Pascal VOC数据集训练，能识别20个类别(刨除背景层)

原理

自行百度，本人不擅长

每种网络模型对应的输入数据

可以查看每种模型的模型二进制文件名称，网络描述文件名称，缩放尺度，中心化处理参数，数据样本尺寸，描述label文件名称，rgb通道顺序以及典型应用场景等信息，以及下载链接
链接地址:https://github.com/opencv/opencv/blob/master/samples/dnn/models.yml

################################################################################
# Object detection models.
################################################################################
..........
# OpenCV's face detection network
opencv_fd:
  model: "opencv_face_detector.caffemodel"
  

# Caffe implementation of SSD model from https://github.com/chuanqi305/MobileNet-SSD
ssd_caffe:
  model: "MobileNetSSD_deploy.caffemodel"
  config: "MobileNetSSD_deploy.prototxt"
  mean: [127.5, 127.5, 127.5]
  scale: 0.007843
  width: 300
  height: 300
  rgb: false
  classes: "object_detection_classes_pascal_voc.txt"
  sample: "object_detection"

# TensorFlow implementation of SSD model from 
.......

网络输入输出

输入图像 [Nx3x300x300]通道顺序：BGR、均值127.5,放缩：2/255
输出结果[1x1xNx7]一张图一个通道 N行(检测出的N种可能性) 7列，七个维度浮点数代表如下：
[image_id,label,conf,x_min,y_min,x_max,ymax]
[当前图片索引，识别对应标签，置信度，对应矩形框比例位置]

代码演示

#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <math.h>

using namespace std;
using namespace cv;
using namespace cv::dnn;

#define PIC_PATH "/work/opencv_pic/"
#define PIC_NAME "horse.jpg"

const size_t width = 300;
const size_t height = 300;
string bin_model = "/work/opencv_dnn/ssd/MobileNetSSD_deploy.caffemodel";
string protxt = "/work/opencv_dnn/ssd/MobileNetSSD_deploy.prototxt";


string objNames[] = {"background", "aeroplane", "bicycle", "bird", "boat",
 "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
 "dog", "horse", "motorbike", "person", "pottedplant", "sheep",
 "sofa", "train", "tvmonitor"};

int main(void)
{
    string pic = string(PIC_PATH)+string(PIC_NAME);
    Mat src;
    src = imread(pic);
    if(src.empty())
    {
        printf("pic read err\n");
        return -1;
    }

    namedWindow("input image",WINDOW_AUTOSIZE);
    //创建并载入神经网络
    Net net = readNetFromCaffe(protxt,bin_model);
    if(net.empty())
    {
        printf("read caffe model data err\n");
        return -1;
    }


    //设置计算后台
    net.setPreferableBackend(DNN_BACKEND_OPENCV);
    net.setPreferableTarget(DNN_TARGET_CPU);

    //获取各层信息
    vector<string> layers_names = net.getLayerNames();

    for(size_t i=0;i<layers_names.size();i++)
    {
        int id = net.getLayerId(layers_names[i]);
        auto layer = net.getLayer(id);
        printf("layer id:%d,type:%s,name:%s\n",id,layer->type.c_str(),layer->name.c_str());
    }

    //图片格式转换
    Mat blobimage = blobFromImage(src, 0.007843, Size(width, height), Scalar(127.5, 127.5, 127.5), false, false);

    //网络输入数据
    net.setInput(blobimage,"data");

    //获取识别数据
    Mat detection = net.forward("detection_out");


    //图像reshap 输出图像1x1xNx7  中的N参数代表多少种可能 7列参数 类型为浮点型
    Mat detectionMat(detection.size[2],detection.size[3],CV_32F,detection.ptr<float>());
    RNG rng(123785);
    float confidence_threshold = 0.2;   //过滤阈值  有N种可能 尽量过滤不需要的
    for(int i=0;i<detectionMat.rows;i++)
    {
        float confidence = detectionMat.at<float>(i,2);   //获取可能性
        if(confidence>confidence_threshold)
        {
            size_t objindex = (size_t)(detectionMat.at<float>(i,1));   //获取label索引
            cout << "objindex" << objindex <<endl;

            //获取矩形框的坐标  获取到的是相对图片的比例值  这里应该放缩获取真实值
            //左上坐标
            float tl_x = detectionMat.at<float>(i,3)*src.cols;
            float tl_y = detectionMat.at<float>(i,4)*src.rows;

            //右下坐标
            float br_x = detectionMat.at<float>(i,5)*src.cols;
            float br_y = detectionMat.at<float>(i,6)*src.rows;
            //绘制矩形框
            Rect object_box((int)(tl_x),(int)(tl_y),(int)(br_x-tl_x),(int)(br_y-tl_y));
            rectangle(src,object_box, Scalar(rng.uniform(0,255),rng.uniform(0,255), rng.uniform(0,255)),2,8,0);

            //显示识别对象名称
            putText(src,objNames[objindex].c_str(),Point(tl_x, tl_y), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(rng.uniform(0,255),rng.uniform(0,255), rng.uniform(0,255)), 2);
        }
    }
    imshow("input image",src);

    waitKey(0);
    destroyAllWindows();
    return 0;
}

效果

在这里插入图片描述