上一节介绍了使用
googlenet caffe
模型实现对象分类,只能告诉图片中有什么,不能确定其位置,这一节使用Mobilenet SSD
模型实现对象的检测,基于Pascal VOC
数据集训练,能识别20
个类别(刨除背景层)
原理
自行百度,本人不擅长
每种网络模型对应的输入数据
可以查看每种模型的模型二进制文件名称,网络描述文件名称,缩放尺度,中心化处理参数,数据样本尺寸,描述label文件名称,rgb通道顺序以及典型应用场景等信息 ,以及下载链接
链接地址:https://github.com/opencv/opencv/blob/master/samples/dnn/models.yml
################################################################################
# Object detection models.
################################################################################
..........
# OpenCV's face detection network
opencv_fd:
model: "opencv_face_detector.caffemodel"
# Caffe implementation of SSD model from https://github.com/chuanqi305/MobileNet-SSD
ssd_caffe:
model: "MobileNetSSD_deploy.caffemodel"
config: "MobileNetSSD_deploy.prototxt"
mean: [127.5, 127.5, 127.5]
scale: 0.007843
width: 300
height: 300
rgb: false
classes: "object_detection_classes_pascal_voc.txt"
sample: "object_detection"
# TensorFlow implementation of SSD model from
.......
网络输入输出
- 输入图像
[Nx3x300x300]
通道顺序:BGR、均值127.5,放缩:2/255 - 输出结果
[1x1xNx7]
一张图 一个通道 N行(检测出的N种可能性) 7列,七个维度浮点数代表如下:
[image_id
,label,conf
,x_min
,y_min
,x_max
,ymax
]
[当前图片索引,识别对应标签,置信度,对应矩形框比例位置]
代码演示
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <math.h>
using namespace std;
using namespace cv;
using namespace cv::dnn;
#define PIC_PATH "/work/opencv_pic/"
#define PIC_NAME "horse.jpg"
const size_t width = 300;
const size_t height = 300;
string bin_model = "/work/opencv_dnn/ssd/MobileNetSSD_deploy.caffemodel";
string protxt = "/work/opencv_dnn/ssd/MobileNetSSD_deploy.prototxt";
string objNames[] = {"background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"};
int main(void)
{
string pic = string(PIC_PATH)+string(PIC_NAME);
Mat src;
src = imread(pic);
if(src.empty())
{
printf("pic read err\n");
return -1;
}
namedWindow("input image",WINDOW_AUTOSIZE);
//创建并载入神经网络
Net net = readNetFromCaffe(protxt,bin_model);
if(net.empty())
{
printf("read caffe model data err\n");
return -1;
}
//设置计算后台
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(DNN_TARGET_CPU);
//获取各层信息
vector<string> layers_names = net.getLayerNames();
for(size_t i=0;i<layers_names.size();i++)
{
int id = net.getLayerId(layers_names[i]);
auto layer = net.getLayer(id);
printf("layer id:%d,type:%s,name:%s\n",id,layer->type.c_str(),layer->name.c_str());
}
//图片格式转换
Mat blobimage = blobFromImage(src, 0.007843, Size(width, height), Scalar(127.5, 127.5, 127.5), false, false);
//网络输入数据
net.setInput(blobimage,"data");
//获取识别数据
Mat detection = net.forward("detection_out");
//图像reshap 输出图像1x1xNx7 中的N参数代表多少种可能 7列参数 类型为浮点型
Mat detectionMat(detection.size[2],detection.size[3],CV_32F,detection.ptr<float>());
RNG rng(123785);
float confidence_threshold = 0.2; //过滤阈值 有N种可能 尽量过滤不需要的
for(int i=0;i<detectionMat.rows;i++)
{
float confidence = detectionMat.at<float>(i,2); //获取可能性
if(confidence>confidence_threshold)
{
size_t objindex = (size_t)(detectionMat.at<float>(i,1)); //获取label索引
cout << "objindex" << objindex <<endl;
//获取矩形框的坐标 获取到的是相对图片的比例值 这里应该放缩获取真实值
//左上坐标
float tl_x = detectionMat.at<float>(i,3)*src.cols;
float tl_y = detectionMat.at<float>(i,4)*src.rows;
//右下坐标
float br_x = detectionMat.at<float>(i,5)*src.cols;
float br_y = detectionMat.at<float>(i,6)*src.rows;
//绘制矩形框
Rect object_box((int)(tl_x),(int)(tl_y),(int)(br_x-tl_x),(int)(br_y-tl_y));
rectangle(src,object_box, Scalar(rng.uniform(0,255),rng.uniform(0,255), rng.uniform(0,255)),2,8,0);
//显示识别对象名称
putText(src,objNames[objindex].c_str(),Point(tl_x, tl_y), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(rng.uniform(0,255),rng.uniform(0,255), rng.uniform(0,255)), 2);
}
}
imshow("input image",src);
waitKey(0);
destroyAllWindows();
return 0;
}