上一节写的是使用caffe的SSD模型来识别物体,能检测20种,本节使用tensflow的faster rcnn模型基于COCO数据集能检测80个类别,代码很相似
原理
本人不太懂,自行百度
每种网络模型对应的输入数据
可以查看每种模型的模型二进制文件名称,网络描述文件名称,中至化处理参数,数据样本尺寸,描述label文件名称,rgb通道顺序以及典型应用场景等信息
链接地址:https://github.com/opencv/opencv/blob/master/samples/dnn/models.yml
################################################################################
# Object detection models.
################################################################################
。。。。
# TensorFlow implementation of Faster-RCNN model from https://github.com/tensorflow/models/tree/master/research/object_detection
faster_rcnn_tf:
model: "faster_rcnn_inception_v2_coco_2018_01_28.pb"
config: "faster_rcnn_inception_v2_coco_2018_01_28.pbtxt"
mean: [0, 0, 0]
scale: 1.0
width: 800
height: 600
rgb: true
sample: "object_detection"
。。。。
模型文件下载地址:https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API
网络输入输出
- 输入图像 [1x3x300x300]通道顺序:RGB、均值为0、放缩:1.0
- 输出数据 [1x1xNx7] 输出1通道 N行 7列数据,N代表N种检测出的可能性,表述如下:
[image_id
,label,conf
,x_min
,y_min
,x_max
,ymax
]
[当前图片索引,识别对应标签,置信度,对应矩形框比例位置]
代码
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
using namespace std;
using namespace cv;
using namespace cv::dnn;
#define PIC_PATH "/work/opencv_pic/"
#define PIC_NAME "horse.jpg"
const size_t width = 300;
const size_t height = 300;
string bin_model = "/work/opencv_dnn/faster_rcnn/frozen_inference_graph.pb";
string protxt = "/work/opencv_dnn/faster_rcnn/faster_rcnn_resnet50_coco_2018_01_28.pbtxt";
string label_map = "/work/opencv_dnn/faster_rcnn/mscoco_label_map.pbtxt";
std::map<int, string> readLabelMaps(void);
int dnn_fast_rcnn(void)
{
string pic = string(PIC_PATH)+string(PIC_NAME);
Mat src;
src = imread(pic);
if(src.empty())
{
printf("pic read err\n");
return -1;
}
namedWindow("input image",WINDOW_AUTOSIZE);
//创建并载入神经网络
//Net net = readNetFromCaffe(protxt,bin_model);
Net net = readNetFromTensorflow(bin_model,protxt);
if(net.empty())
{
printf("read caffe model data err\n");
return -1;
}
//设置计算后台
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(DNN_TARGET_CPU);
//获取各层信息
vector<string> layers_names = net.getLayerNames();
for(size_t i=0;i<layers_names.size();i++)
{
int id = net.getLayerId(layers_names[i]);
auto layer = net.getLayer(id);
printf("layer id:%d,type:%s,name:%s\n",id,layer->type.c_str(),layer->name.c_str());
}
//图片格式转换
Mat blobimage = blobFromImage(src,1, Size(width, height), Scalar(127.5, 127.5, 127.5), true, false);
//网络输入数据
net.setInput(blobimage);
//获取识别数据
Mat detection = net.forward();
//获取名称索引
map<int, string> names = readLabelMaps();
//图像reshap 输出图像1x1xNx7 中的N参数代表多少种可能 7列参数 类型为浮点型
Mat detectionMat(detection.size[2],detection.size[3],CV_32F,detection.ptr<float>());
RNG rng(123785);
float confidence_threshold = 0.5; //过滤阈值 有N种可能 尽量过滤不需要的
for(int i=0;i<detectionMat.rows;i++)
{
float confidence = detectionMat.at<float>(i,2); //获取可能性
if(confidence>confidence_threshold)
{
size_t objindex = (size_t)(detectionMat.at<float>(i,1))+1; //获取label索引
cout << "objindex:" << objindex <<endl;
//获取矩形框的坐标 获取到的是相对图片的比例值 这里应该放缩获取真实值
//左上坐标
float tl_x = detectionMat.at<float>(i,3)*src.cols;
float tl_y = detectionMat.at<float>(i,4)*src.rows;
//右下坐标
float br_x = detectionMat.at<float>(i,5)*src.cols;
float br_y = detectionMat.at<float>(i,6)*src.rows;
//绘制矩形框
Rect object_box((int)(tl_x),(int)(tl_y),(int)(br_x-tl_x),(int)(br_y-tl_y));
rectangle(src,object_box, Scalar(rng.uniform(0,255),rng.uniform(0,255), rng.uniform(0,255)),2,8,0);
map<int, string>::iterator it = names.find(objindex);
//显示识别对象名称
putText(src,(it->second).c_str(),Point(tl_x, tl_y), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(rng.uniform(0,255),rng.uniform(0,255), rng.uniform(0,255)), 2);
}
}
imshow("input image",src);
waitKey(0);
destroyAllWindows();
return 0;
}
std::map<int, string> readLabelMaps()
{
std::map<int, string> labelNames;
std::ifstream fp(label_map);
if (!fp.is_open())
{
printf("could not open file...\n");
exit(-1);
}
string one_line;
string display_name;
while (!fp.eof())
{
std::getline(fp, one_line);
std::size_t found = one_line.find("id:");
if (found != std::string::npos) {
int index = found;
string id = one_line.substr(index + 4, one_line.length() - index);
std::getline(fp, display_name);
std::size_t found = display_name.find("display_name:");
index = found + 15;
string name = display_name.substr(index, display_name.length() - index);
name = name.replace(name.length() - 1, name.length(), "");
// printf("id : %d, name: %s \n", stoi(id.c_str()), name.c_str());
labelNames[stoi(id)] = name;
}
}
fp.close();
return labelNames;
}