最近学习了SSD,想自己实现SSD后面检测的过程(虽然SSD已经有提供代码,但是自己练练手)。
SSD怎么由先验框得到bounding box的框:
//prior_data_mean表示的是先验框的位置的均值,prior_data_std表示先验框的位置的方差,loc_pre表示预测
prior_w = prior_data_mean[i*4+2] - prior_data_mean[0+i*4];//先验的宽
prior_h = prior_data_mean[i*4+3] - prior_data_mean[1+i*4];//先验的高
prior_x = (prior_data_mean[i*4+2] + prior_data_mean[0+i*4])/2;//先验的中心位置x坐标
prior_y = (prior_data_mean[i*4+1] + prior_data_mean[3+i*4])/2;//先验的中心位置y坐标
bb_x = prior_data_std[i*4+0]*loc_pre[i*4+0]*prior_w + prior_x;
bb_y = prior_data_std[i*4+1]*loc_pre[i*4+1]*prior_h + prior_y;
bb_w = exp(prior_data_std[i*4+2]*loc_pre[i*4+2])*prior_w;
bb_h = exp(prior_data_std[i*4+3]*loc_pre[i*4+3])*prior_h;
自己添加的层MyDetect(功能是对于输入进来的prior bounding box, location predict, confidence predict进行处理,输出top[0]维度是1×1×num×6(num表示的是输出目标框的个数,这个是层进行forward的时候回进行调整的,后面维度6分别表示一个框的xmin,ymin,xmax,ymax,class,confidence)
#ifndef CAFFE_MY_DETECT_LAYER_HPP_
#define CAFFE_MY_DETECT_LAYER_HPP_
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include <vector>
#include <string>
namespace caffe{
template <typename Dtype>
class MyDetectLayer : public Layer<Dtype>{
public:
explicit MyDetectLayer(const LayerParameter& param)
:Layer<Dtype>(param){}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "MyDetect";}
virtual inline int ExactNumBottomBlobs() const { return 3; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& top);
int prior_num;
};
}
#endif
#include "caffe/layers/mydetect.hpp"
#include <vector>
namespace caffe{
template<typename Dtype>
Dtype lap(Dtype x1_min,Dtype x1_max,Dtype x2_min,Dtype x2_max){
if(x1_min < x2_min){
if(x1_max < x2_min){
return 0;
}else{
if(x1_max > x2_min){
if(x1_max < x2_max){
return x1_max - x2_min;
}else{
return x2_max - x2_min;
}
}else{
return 0;
}
}
}else{
if(x1_min < x2_max){
if(x1_max < x2_max)
return x1_max-x1_min;
else
return x2_max-x1_min;
}else{
return 0;
}
}
}
template<typename Dtype>
Dtype jaccard(vector<Dtype> x1, vector<Dtype>x2){
Dtype lap1 = lap<Dtype>(x1[0],x1[2],x2[0],x2[2]);
Dtype lap2 = lap<Dtype>(x1[1],x1[3],x2[1],x2[3]);
if(lap1 < Dtype(0.00000001) || lap2 < Dtype(0.00000001))
return Dtype(0);
else
return lap1*lap2/((x1[2]-x1[0])*(x1[3]-x1[1])+(x2[2]-x2[0])*(x2[3]-x2[1])-lap1*lap2);
}
template<typename Dtype>
void MyDetectLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top){
CHECK_EQ(bottom.size(), 3);
CHECK_EQ(top.size(), 1);
prior_num = bottom[2]->height()/4;
CHECK_EQ(prior_num*4, bottom[0]->channels())
<< "Number of priors must match number of location predictions.";
CHECK_EQ(prior_num*21, bottom[1]->channels())
<< "Number of priors must match number of confidence predictions.";
}
template<typename Dtype>
void MyDetectLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top){
vector<int> shape;
shape.push_back(1);
shape.push_back(1);
shape.push_back(1);
shape.push_back(6);
top[0]->Reshape(shape);
}
template<typename Dtype>
void MyDetectLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top){
std::vector<std::pair<std::pair<int,int>,Dtype> > idx_class_conf;
const Dtype* conf_data = bottom[1]->cpu_data();
const Dtype* prior_data_mean = bottom[2]->cpu_data();
const Dtype* prior_data_std = bottom[2]->cpu_data()+prior_num*4;
const Dtype* loc_pre = bottom[0]->cpu_data();
for(int prior_idx = 0; prior_idx < prior_num; ++prior_idx){
int idx = prior_idx*21;
Dtype max = 0;
int max_idx = -1;
for(int class_idx = 1; class_idx < 21; ++class_idx){//class_idx = 0 is background
if(conf_data[idx+class_idx] > max){
max = conf_data[idx+class_idx];
max_idx = class_idx;
}
}
if(max > 0.5){//threshold
idx_class_conf.push_back(make_pair(make_pair(prior_idx,max_idx),conf_data[idx+max_idx]));
}
}
vector<vector<Dtype> > bboxes;
for(int i = 0; i < prior_num; ++i){
vector<Dtype> temp;
Dtype prior_x,prior_y,prior_w,prior_h;
prior_w = prior_data_mean[i*4+2] - prior_data_mean[0+i*4];
prior_h = prior_data_mean[i*4+3] - prior_data_mean[1+i*4];
prior_x = (prior_data_mean[i*4+2] + prior_data_mean[0+i*4])/2;
prior_y = (prior_data_mean[i*4+1] + prior_data_mean[3+i*4])/2;
Dtype bb_x,bb_y,bb_w,bb_h;
bb_x = prior_data_std[i*4+0]*loc_pre[i*4+0]*prior_w + prior_x;
bb_y = prior_data_std[i*4+1]*loc_pre[i*4+1]*prior_h + prior_y;
bb_w = exp(prior_data_std[i*4+2]*loc_pre[i*4+2])*prior_w;
bb_h = exp(prior_data_std[i*4+3]*loc_pre[i*4+3])*prior_h;
temp.push_back(bb_x-bb_w/2);
temp.push_back(bb_y-bb_h/2);
temp.push_back(bb_x+bb_w/2);
temp.push_back(bb_y+bb_h/2);
temp[0] = temp[0] < 0 ? 0:temp[0];
temp[1] = temp[1] < 0 ? 0:temp[1];
temp[2] = temp[2] < 0 ? 0:temp[2];
temp[3] = temp[3] < 0 ? 0:temp[3];
temp[0] = temp[0] > 1 ? 1:temp[0];
temp[1] = temp[1] > 1 ? 1:temp[1];
temp[2] = temp[2] > 1 ? 1:temp[2];
temp[3] = temp[3] > 1 ? 1:temp[3];
bboxes.push_back(temp);
}
for(int i = 0; i < idx_class_conf.size(); ++i){
for(int j = idx_class_conf.size()-1; j > i; --j){
if(idx_class_conf[i].first.second == idx_class_conf[j].first.second){
//如果iou大于0.5,并且confidence小的删除,如果是第i个小,并且要break,不用进行后面的比较,--i,否这不用。
if(jaccard(bboxes[idx_class_conf[i].first.first],bboxes[idx_class_conf[j].first.first]) > 0.5){
if(idx_class_conf[i].second < idx_class_conf[j].second){
idx_class_conf.erase(idx_class_conf.begin()+i);
--i;
break;
}else{
idx_class_conf.erase(idx_class_conf.begin()+j);
}
}
}
}
}
vector<int> top_shape;
top_shape.push_back(1);
top_shape.push_back(1);
top_shape.push_back(idx_class_conf.size());
top_shape.push_back(6);
top[0]->Reshape(top_shape);
Dtype* top_data = top[0]->mutable_cpu_data();
int top_idx = 0;
for(int i = 0 ; i < idx_class_conf.size(); ++i){
vector<Dtype> bbox = bboxes[idx_class_conf[i].first.first];
top_data[top_idx++] = bbox[0];
top_data[top_idx++] = bbox[1];
top_data[top_idx++] = bbox[2];
top_data[top_idx++] = bbox[3];
top_data[top_idx++] = idx_class_conf[i].first.second;
top_data[top_idx++] = idx_class_conf[i].second;
}
}
template<typename Dtype>
void MyDetectLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& top){
NOT_IMPLEMENTED;
}
template float jaccard(vector<float> x1, vector<float> x2);
template double jaccard(vector<double> x1, vector<double> x2);
template double lap(double x1_min, double x1_max, double x2_min, double x2_max);
template float lap(float x1_min, float x1_max, float x2_min, float x2_max);
#ifdef CPU_ONLY
STUB_GPU_FORWARD(MyDetect, Forward);
#endif
INSTANTIATE_CLASS(MyDetectLayer);
REGISTER_LAYER_CLASS(MyDetect);
}
检测与画框程序
#include <string>
#include <vector>
#include "boost/algorithm/string.hpp"
#include "google/protobuf/text_format.h"
#include <opencv2/opencv.hpp>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/common.hpp"
#include "caffe/net.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/db.hpp"
#include "caffe/util/format.hpp"
#include "caffe/util/io.hpp"
#include <stdio.h>
#include <malloc.h>
#include <fstream>
#include <boost/progress.hpp>
#include <boost/math/special_functions/next.hpp>
#include <boost/random.hpp>
#include <limits>
#include "caffe/common.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"
//#include "caffe/util/math_functions.hpp"
using caffe::Blob;
using caffe::Caffe;
using caffe::Datum;
using caffe::Net;
using caffe::Layer;
using std::string;
namespace db = caffe::db;
void loaddata(boost::shared_ptr<Net<float> >& net, std::string image_path);
//int lap(int x1_min,int x1_max,int x2_min,int x2_max);
int main(int argc, char** argv){
char *labelname[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
Caffe::set_mode(Caffe::GPU);
boost::shared_ptr<Net<float> > net(new Net<float>(argv[1], caffe::TEST));
net->CopyTrainedLayersFromBinaryProto(argv[2]);
loaddata(net,std::string(argv[3]));
net->Forward();
Blob<float>* output_layer = net->output_blobs()[0];
const float* begin = output_layer->cpu_data();
const float* end = begin + output_layer->channels()*output_layer->height()*output_layer->width();
std::vector<float> result(begin, end);
cv::Mat image = cv::imread(argv[3]);
for(int i = 0; i < output_layer->height(); ++i){
cv::Point point1(result[i*6+0]*image.cols, result[i*6+1]*image.rows);
cv::Point point2(result[i*6+2]*image.cols, result[i*6+3]*image.rows);
cv::rectangle(image, cv::Rect(point1,point2),cv::Scalar(0,result[i*6+4]/20.0*225,255),result[i*6+5]*50/8);
char ch[100];
sprintf(ch,"%s %.2f",labelname[int(result[i*6+4]-1)], result[i*6+5]*1.0);
std::string temp(ch);
cv::putText(image,temp,point1,CV_FONT_HERSHEY_COMPLEX,0.4,cv::Scalar(255,255,255));
}
cv::imshow("SSD",image);
cv::waitKey(0);
return 1;
}
void loaddata(boost::shared_ptr<Net<float> >& net, std::string image_path){
Blob<float>* input_layer = net->input_blobs()[0];
int width, height;
width = input_layer->width();
height = input_layer->height();
int size = width*height;
cv::Mat image = cv::imread(image_path,-1);
cv::Mat image_resized;
cv::resize(image, image_resized, cv::Size(height, width));
float* input_data = input_layer->mutable_cpu_data();
int temp,idx;
for(int i = 0; i < height; ++i){
uchar* pdata = image_resized.ptr<uchar>(i);
for(int j = 0; j < width; ++j){
temp = 3*j;
idx = i*width+j;
input_data[idx] = (pdata[temp+2]);
input_data[idx+size] = (pdata[temp+1]);
input_data[idx+2*size] = (pdata[temp+0]);
}
}
//cv::imshow("image",image_resized);
}
结果展示: