SSD MyDetect记录

最新推荐文章于 2023-12-14 22:34:47 发布

小_小_杨_

最新推荐文章于 2023-12-14 22:34:47 发布

阅读量4.3k

点赞数 3

分类专栏： ssd caffe C++ 文章标签： ssd

本文链接：https://blog.csdn.net/u012235274/article/details/52230630

版权

caffe 同时被 3 个专栏收录

29 篇文章 0 订阅

订阅专栏

C++

23 篇文章 0 订阅

订阅专栏

ssd

4 篇文章 0 订阅

订阅专栏

最近学习了SSD，想自己实现SSD后面检测的过程（虽然SSD已经有提供代码，但是自己练练手）。

SSD怎么由先验框得到bounding box的框：

//prior_data_mean表示的是先验框的位置的均值，prior_data_std表示先验框的位置的方差，loc_pre表示预测
    prior_w = prior_data_mean[i*4+2] - prior_data_mean[0+i*4];//先验的宽
    prior_h = prior_data_mean[i*4+3] - prior_data_mean[1+i*4];//先验的高
    prior_x = (prior_data_mean[i*4+2] + prior_data_mean[0+i*4])/2;//先验的中心位置x坐标
    prior_y = (prior_data_mean[i*4+1] + prior_data_mean[3+i*4])/2;//先验的中心位置y坐标

    bb_x = prior_data_std[i*4+0]*loc_pre[i*4+0]*prior_w + prior_x;
    bb_y = prior_data_std[i*4+1]*loc_pre[i*4+1]*prior_h + prior_y;
    bb_w = exp(prior_data_std[i*4+2]*loc_pre[i*4+2])*prior_w;
    bb_h = exp(prior_data_std[i*4+3]*loc_pre[i*4+3])*prior_h;

自己添加的层MyDetect（功能是对于输入进来的prior bounding box， location predict， confidence predict进行处理，输出top[0]维度是1×1×num×6（num表示的是输出目标框的个数，这个是层进行forward的时候回进行调整的，后面维度6分别表示一个框的xmin，ymin，xmax，ymax，class，confidence）

#ifndef CAFFE_MY_DETECT_LAYER_HPP_
#define CAFFE_MY_DETECT_LAYER_HPP_

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include <vector>
#include <string>

namespace caffe{

template <typename Dtype>
class MyDetectLayer : public Layer<Dtype>{
public:
    explicit MyDetectLayer(const LayerParameter& param)
        :Layer<Dtype>(param){}
    virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
            const vector<Blob<Dtype>*>& top);
    virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
            const vector<Blob<Dtype>*>& top);

    virtual inline const char* type() const { return "MyDetect";}
    virtual inline int ExactNumBottomBlobs() const { return 3; }
    virtual inline int ExactNumTopBlobs() const { return 1; }

protected:
    virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
            const vector<Blob<Dtype>*>& top);
    virtual void Backward_cpu(const vector<Blob<Dtype>*>& bottom,
            const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& top);
    int prior_num;
};

}

#endif


#include "caffe/layers/mydetect.hpp"
#include <vector>

namespace caffe{

template<typename Dtype>
Dtype lap(Dtype x1_min,Dtype x1_max,Dtype x2_min,Dtype x2_max){
    if(x1_min < x2_min){
        if(x1_max < x2_min){
            return 0;
        }else{
            if(x1_max > x2_min){
                if(x1_max < x2_max){
                    return x1_max - x2_min;
                }else{
                    return x2_max - x2_min;
                }
            }else{
                return 0;
            }
        }
    }else{
        if(x1_min < x2_max){
            if(x1_max < x2_max)
                return x1_max-x1_min;
            else
                return x2_max-x1_min;
        }else{
            return 0;
        }
    }
}

template<typename Dtype>
Dtype jaccard(vector<Dtype> x1, vector<Dtype>x2){
    Dtype lap1 = lap<Dtype>(x1[0],x1[2],x2[0],x2[2]);
    Dtype lap2 = lap<Dtype>(x1[1],x1[3],x2[1],x2[3]);
    if(lap1 < Dtype(0.00000001) || lap2 < Dtype(0.00000001))
        return Dtype(0);
    else
        return lap1*lap2/((x1[2]-x1[0])*(x1[3]-x1[1])+(x2[2]-x2[0])*(x2[3]-x2[1])-lap1*lap2);
}

template<typename Dtype>
void MyDetectLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
        const vector<Blob<Dtype>*>& top){
    CHECK_EQ(bottom.size(), 3);
    CHECK_EQ(top.size(), 1);

    prior_num = bottom[2]->height()/4;

    CHECK_EQ(prior_num*4, bottom[0]->channels())
        << "Number of priors must match number of location predictions.";
    CHECK_EQ(prior_num*21, bottom[1]->channels())
    << "Number of priors must match number of confidence predictions.";
}

template<typename Dtype>
void MyDetectLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
        const vector<Blob<Dtype>*>& top){
    vector<int> shape;
    shape.push_back(1);
    shape.push_back(1);
    shape.push_back(1);
    shape.push_back(6);
    top[0]->Reshape(shape);
}

template<typename Dtype>
void MyDetectLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
        const vector<Blob<Dtype>*>& top){
    std::vector<std::pair<std::pair<int,int>,Dtype> > idx_class_conf;
    const Dtype* conf_data = bottom[1]->cpu_data();
    const Dtype* prior_data_mean = bottom[2]->cpu_data();
    const Dtype* prior_data_std = bottom[2]->cpu_data()+prior_num*4;
    const Dtype* loc_pre = bottom[0]->cpu_data();

    for(int prior_idx = 0; prior_idx < prior_num; ++prior_idx){
        int idx = prior_idx*21;
        Dtype max = 0;
        int max_idx = -1;
        for(int class_idx = 1; class_idx < 21; ++class_idx){//class_idx = 0 is background
            if(conf_data[idx+class_idx] > max){
                max = conf_data[idx+class_idx];
                max_idx = class_idx;
            }
        }
        if(max > 0.5){//threshold
            idx_class_conf.push_back(make_pair(make_pair(prior_idx,max_idx),conf_data[idx+max_idx]));
        }
    }
    vector<vector<Dtype> > bboxes;
    for(int i = 0; i < prior_num; ++i){
        vector<Dtype> temp;
        Dtype prior_x,prior_y,prior_w,prior_h;
        prior_w = prior_data_mean[i*4+2] - prior_data_mean[0+i*4];
        prior_h = prior_data_mean[i*4+3] - prior_data_mean[1+i*4];
        prior_x = (prior_data_mean[i*4+2] + prior_data_mean[0+i*4])/2;
        prior_y = (prior_data_mean[i*4+1] + prior_data_mean[3+i*4])/2;
        Dtype bb_x,bb_y,bb_w,bb_h;
        bb_x = prior_data_std[i*4+0]*loc_pre[i*4+0]*prior_w + prior_x;
        bb_y = prior_data_std[i*4+1]*loc_pre[i*4+1]*prior_h + prior_y;
        bb_w = exp(prior_data_std[i*4+2]*loc_pre[i*4+2])*prior_w;
        bb_h = exp(prior_data_std[i*4+3]*loc_pre[i*4+3])*prior_h;
        temp.push_back(bb_x-bb_w/2);
        temp.push_back(bb_y-bb_h/2);
        temp.push_back(bb_x+bb_w/2);
        temp.push_back(bb_y+bb_h/2);
        temp[0] = temp[0] < 0 ? 0:temp[0];
        temp[1] = temp[1] < 0 ? 0:temp[1];
        temp[2] = temp[2] < 0 ? 0:temp[2];
        temp[3] = temp[3] < 0 ? 0:temp[3];

        temp[0] = temp[0] > 1 ? 1:temp[0];
        temp[1] = temp[1] > 1 ? 1:temp[1];
        temp[2] = temp[2] > 1 ? 1:temp[2];
        temp[3] = temp[3] > 1 ? 1:temp[3];
        bboxes.push_back(temp);
    }
    for(int i = 0; i < idx_class_conf.size(); ++i){
        for(int j = idx_class_conf.size()-1; j > i; --j){
            if(idx_class_conf[i].first.second == idx_class_conf[j].first.second){
                //如果iou大于0.5，并且confidence小的删除，如果是第i个小，并且要break，不用进行后面的比较，--i，否这不用。
                if(jaccard(bboxes[idx_class_conf[i].first.first],bboxes[idx_class_conf[j].first.first]) > 0.5){
                    if(idx_class_conf[i].second < idx_class_conf[j].second){
                        idx_class_conf.erase(idx_class_conf.begin()+i);
                        --i;
                        break;
                    }else{
                        idx_class_conf.erase(idx_class_conf.begin()+j);
                    }
                }
            }
        }
    }
    vector<int> top_shape;
    top_shape.push_back(1);
    top_shape.push_back(1);
    top_shape.push_back(idx_class_conf.size());
    top_shape.push_back(6);
    top[0]->Reshape(top_shape);
    Dtype* top_data = top[0]->mutable_cpu_data();
    int top_idx = 0;
    for(int i = 0 ; i < idx_class_conf.size(); ++i){
        vector<Dtype> bbox = bboxes[idx_class_conf[i].first.first];
        top_data[top_idx++] = bbox[0];
        top_data[top_idx++] = bbox[1];
        top_data[top_idx++] = bbox[2];
        top_data[top_idx++] = bbox[3];
        top_data[top_idx++] = idx_class_conf[i].first.second;
        top_data[top_idx++] = idx_class_conf[i].second;
    }
}
template<typename Dtype>
void MyDetectLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& bottom,
        const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& top){
    NOT_IMPLEMENTED;
}

template float jaccard(vector<float> x1, vector<float> x2);
template double jaccard(vector<double> x1, vector<double> x2);
template double lap(double x1_min, double x1_max, double x2_min, double x2_max);
template float lap(float x1_min, float x1_max, float x2_min, float x2_max);

#ifdef CPU_ONLY
STUB_GPU_FORWARD(MyDetect, Forward);
#endif

INSTANTIATE_CLASS(MyDetectLayer);
REGISTER_LAYER_CLASS(MyDetect);

}

检测与画框程序

#include <string>
#include <vector>

#include "boost/algorithm/string.hpp"
#include "google/protobuf/text_format.h"
#include <opencv2/opencv.hpp>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/common.hpp"
#include "caffe/net.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/db.hpp"
#include "caffe/util/format.hpp"
#include "caffe/util/io.hpp"
#include <stdio.h>
#include <malloc.h>
#include <fstream>
#include <boost/progress.hpp>


#include <boost/math/special_functions/next.hpp>
#include <boost/random.hpp>

#include <limits>

#include "caffe/common.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"


//#include "caffe/util/math_functions.hpp"

using caffe::Blob;
using caffe::Caffe;
using caffe::Datum;
using caffe::Net;
using caffe::Layer;
using std::string;
namespace db = caffe::db;

void loaddata(boost::shared_ptr<Net<float> >& net, std::string image_path);
//int lap(int x1_min,int x1_max,int x2_min,int x2_max);
int main(int argc, char** argv){
  char *labelname[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
  Caffe::set_mode(Caffe::GPU);
  boost::shared_ptr<Net<float> > net(new Net<float>(argv[1], caffe::TEST));
  net->CopyTrainedLayersFromBinaryProto(argv[2]);
  loaddata(net,std::string(argv[3]));
  net->Forward();
  Blob<float>* output_layer = net->output_blobs()[0];
  const float* begin = output_layer->cpu_data();
  const float* end = begin + output_layer->channels()*output_layer->height()*output_layer->width();
  std::vector<float> result(begin, end);
  cv::Mat image = cv::imread(argv[3]);
  for(int i = 0; i < output_layer->height(); ++i){
      cv::Point point1(result[i*6+0]*image.cols, result[i*6+1]*image.rows);
      cv::Point point2(result[i*6+2]*image.cols, result[i*6+3]*image.rows);
      cv::rectangle(image, cv::Rect(point1,point2),cv::Scalar(0,result[i*6+4]/20.0*225,255),result[i*6+5]*50/8);
      char ch[100];
      sprintf(ch,"%s %.2f",labelname[int(result[i*6+4]-1)], result[i*6+5]*1.0);
      std::string temp(ch);
      cv::putText(image,temp,point1,CV_FONT_HERSHEY_COMPLEX,0.4,cv::Scalar(255,255,255));
  }
  cv::imshow("SSD",image);
  cv::waitKey(0);
  return 1;
}

void loaddata(boost::shared_ptr<Net<float> >& net, std::string image_path){
  Blob<float>* input_layer = net->input_blobs()[0];
  int width, height;
  width = input_layer->width();
  height = input_layer->height();
  int size = width*height;
  cv::Mat image = cv::imread(image_path,-1);
  cv::Mat image_resized;
  cv::resize(image, image_resized, cv::Size(height, width));
  float* input_data = input_layer->mutable_cpu_data();
  int temp,idx;
  for(int i = 0; i < height; ++i){
    uchar* pdata = image_resized.ptr<uchar>(i);
    for(int j = 0; j < width; ++j){
      temp = 3*j;
      idx = i*width+j;
      input_data[idx] = (pdata[temp+2]);
      input_data[idx+size] = (pdata[temp+1]);
      input_data[idx+2*size] = (pdata[temp+0]);
    }
  }
  //cv::imshow("image",image_resized);
}

结果展示：
这里写图片描述

小_小_杨_

关注

3
点赞
踩
7

收藏

觉得还不错? 一键收藏
11
评论
SSD MyDetect记录

最近学习了SSD，想自己实现SSD后面检测的过程（虽然SSD已经有提供代码，但是自己练练手）。自己添加的层MyDetect（功能是对于输入进来的prior bounding box， location predict， confidence predict进行处理，输出top[0]维度是1×1×100*6（默认最多识别的框不超过100，后面的6维度是xmin，ymin，xmax，ymax，class
复制链接

扫一扫

专栏目录