目标检测特殊层:RFCN中PSROIPooling层

RFCN中PSROIPooling的代码:

     每一层表示的是一种特征,输入是7*7*2*ROI_area, 输出是7*7*2的大小的卷积核;把每一层的特征相加得到一个特征,总共49层可以的得到7*7的大小的map,之后ROI的框内采用插值的方式,得到7*7*2的特征map;

    开始进入ROI pooling操作了,上面一层,有两个输入:rfcn_cls(1*98*38*63)是预测的结果,rois(1*5*1*1)是ROI,生成1*2*7*7的结果。下面一层是均值池化,得到1*2*1*1(cls_score),就是论文中vote的过程。

这里写图片描述

#include <cfloat>
#include <algorithm>

#include <string>
#include <utility>
#include <vector>

#include "caffe/layers/psroi_pooling_layer.hpp"
#include "caffe/util/math_functions.hpp"

using std::max;
using std::min;
using std::floor;
using std::ceil;

namespace caffe {
  template <typename Dtype>
  void PSROIPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
    PSROIPoolingParameter psroi_pooling_param =
      this->layer_param_.psroi_pooling_param();
    spatial_scale_ = psroi_pooling_param.spatial_scale();
    LOG(INFO) << "Spatial scale: " << spatial_scale_;

    CHECK_GT(psroi_pooling_param.output_dim(), 0)
      << "output_dim must be > 0";
    CHECK_GT(psroi_pooling_param.group_size(), 0)
      << "group_size must be > 0";

    output_dim_ = psroi_pooling_param.output_dim();
    group_size_ = psroi_pooling_param.group_size();
    pooled_height_ = group_size_;
    pooled_width_ = group_size_;
  }

  template <typename Dtype>
  void PSROIPoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
    LOG(INFO)<<"psroipooling reshape";
    channels_ = bottom[0]->channels();
    CHECK_EQ(channels_, output_dim_*group_size_*group_size_)
      << "input channel number does not match layer parameters";
    height_ = bottom[0]->height();
    width_ = bottom[0]->width();
    top[0]->Reshape(
      bottom[1]->num(), output_dim_, pooled_height_, pooled_width_);
    mapping_channel_.Reshape(
      bottom[1]->num(), output_dim_, pooled_height_, pooled_width_);
  }


  template <typename Dtype>
  static void PSROIPoolingForward(
    const int num,
    const Dtype* bottom_data,
    const Dtype spatial_scale,
    const int channels,
    const int height, const int width,
    const int pooled_height, const int pooled_width,
    const Dtype* bottom_rois,
    const int output_dim,
    const int group_size,
    Dtype* top_data,
    int* mapping_channel) {
      LOG(INFO)<<"psroipooling cpu_forward";
     for (int n = 0; n < num; ++n) {
         int roi_add = n*5;
        // [start, end) interval for spatial sampling
        int roi_batch_ind = bottom_rois[roi_add];
        Dtype roi_start_w =
          static_cast<Dtype>(round(bottom_rois[roi_add + 1])) * spatial_scale;
        Dtype roi_start_h =
          static_cast<Dtype>(round(bottom_rois[roi_add + 2])) * spatial_scale;
        Dtype roi_end_w =
          static_cast<Dtype>(round(bottom_rois[roi_add + 3]) + 1.) * spatial_scale;
        Dtype roi_end_h =
          static_cast<Dtype>(round(bottom_rois[roi_add + 4]) + 1.) * spatial_scale;

        // Force too small ROIs to be 1x1
        Dtype roi_width = max<Dtype>(roi_end_w - roi_start_w, 0.1);  // avoid 0
        Dtype roi_height = max<Dtype>(roi_end_h - roi_start_h, 0.1);

        // Compute w and h at bottom
        Dtype bin_size_h = roi_height / static_cast<Dtype>(pooled_height);
        Dtype bin_size_w = roi_width / static_cast<Dtype>(pooled_width);

      for (int ctop = 0; ctop < output_dim; ++ctop) {
        for (int ph = 0; ph < pooled_height; ++ph) {
          for (int pw = 0; pw < pooled_width; ++pw) {
            int index = n*output_dim*pooled_height*pooled_width + ctop*pooled_height*pooled_width + ph*pooled_width + pw;
      // The output is in order (n, ctop, ph, pw)

        int hstart = floor(static_cast<Dtype>(ph) * bin_size_h
                            + roi_start_h);
        int wstart = floor(static_cast<Dtype>(pw)* bin_size_w
                            + roi_start_w);
        int hend = ceil(static_cast<Dtype>(ph + 1) * bin_size_h
                          + roi_start_h);
        int wend = ceil(static_cast<Dtype>(pw + 1) * bin_size_w
                        + roi_start_w);
         // Add roi offsets and clip to input boundaries
         hstart = min(max(hstart, 0), height);
         hend = min(max(hend, 0), height);
         wstart = min(max(wstart, 0), width);
         wend = min(max(wend, 0), width);
         bool is_empty = (hend <= hstart) || (wend <= wstart);

      int gw = pw;
      int gh = ph;
      int c = (ctop*group_size + gh)*group_size + gw;

//      bottom_data += (roi_batch_ind * channels + c) * height * width;
      Dtype out_sum = 0;
      for (int h = hstart; h < hend; ++h) {
        for (int w = wstart; w < wend; ++w) {
          int bottom_index = h*width + w;
          out_sum += bottom_data[(roi_batch_ind * channels + c) * height * width + bottom_index];
        }
      }

        Dtype bin_area = (hend - hstart)*(wend - wstart);
        if (is_empty){
          top_data[index] = 0;
        }
        else{
          top_data[index] = out_sum/bin_area;
        }

        mapping_channel[index] = c;
        }
      }
    }
  }

}


  template <typename Dtype>
  void PSROIPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
    const Dtype* bottom_data = bottom[0]->cpu_data();
    const Dtype* bottom_rois = bottom[1]->cpu_data();
    Dtype* top_data = top[0]->mutable_cpu_data();
    int* mapping_channel_ptr = mapping_channel_.mutable_cpu_data();
    int count = top[0]->count();
    caffe_set(count, Dtype(0), top_data);
    caffe_set(count, -1, mapping_channel_ptr);
    // NOLINT_NEXT_LINE(whitespace/operators)
    PSROIPoolingForward(bottom[1]->num(), bottom_data, spatial_scale_,
      channels_, height_, width_, pooled_height_,
      pooled_width_, bottom_rois, output_dim_, group_size_,
      top_data, mapping_channel_ptr);
  }


  template <typename Dtype>
  void PSROIPoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  NOT_IMPLEMENTED; 
  }


#ifdef CPU_ONLY
  STUB_GPU(PSROIPoolingLayer);
#endif

  INSTANTIATE_CLASS(PSROIPoolingLayer);
  REGISTER_LAYER_CLASS(PSROIPooling);

}  // namespace caffe

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值