代码阅读:R-FCN

国际惯例:https://github.com/daijifeng001/R-FCN
这个matlab版本的代码,RPN是弄好的只是训练position-senstive RoI pooling那一块。我们也主要看着一块

prototxt

#--------------position sensitive RoI pooling--------------
layer {
    bottom: "rfcn_cls"
    bottom: "rois"
    top: "psroipooled_cls_rois"
    name: "psroipooled_cls_rois"
    type: "PSROIPooling"
    psroi_pooling_param {
        spatial_scale: 0.0625
        output_dim: 21
        group_size: 7
    }
}

layer {
    bottom: "psroipooled_cls_rois"
    top: "cls_score"
    name: "ave_cls_score_rois"
    type: "Pooling"
    pooling_param {
        pool: AVE
        kernel_size: 7
        stride: 7
    }
}


layer {
    bottom: "rfcn_bbox"
    bottom: "rois"
    top: "psroipooled_loc_rois"
    name: "psroipooled_loc_rois"
    type: "PSROIPooling"
    psroi_pooling_param {
        spatial_scale: 0.0625
        output_dim: 8
        group_size: 7
    }
}

layer {
    bottom: "psroipooled_loc_rois"
    top: "bbox_pred"
    name: "ave_bbox_pred_rois"
    type: "Pooling"
    pooling_param {
        pool: AVE
        kernel_size: 7
        stride: 7
    }
}

PSROIPooling

这是作者自己加的一种pooling方法,我们来看怎么实现的吧。
caffe.proto 里添加了

这里写图片描述

这里写图片描述

头文件,这个没啥好说的

template <typename Dtype>
  class PSROIPoolingLayer : public Layer<Dtype> {
  public:
    explicit PSROIPoolingLayer(const LayerParameter& param)
      : Layer<Dtype>(param) {}
    virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
    virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);

    virtual inline const char* type() const { return "PSROIPooling"; }

    virtual inline int MinBottomBlobs() const { return 2; }
    virtual inline int MaxBottomBlobs() const { return 2; }
    virtual inline int MinTopBlobs() const { return 1; }
    virtual inline int MaxTopBlobs() const { return 1; }

  protected:
    virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
    virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
    virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
    virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

    Dtype spatial_scale_;
    int output_dim_;
    int group_size_;

    int channels_;
    int height_;
    int width_;

    int pooled_height_;
    int pooled_width_;
    Blob<int> mapping_channel_;
  };

Forward_gpu:

template <typename Dtype>
  void PSROIPoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
    const Dtype* bottom_data = bottom[0]->gpu_data(); //获取图片数据
    const Dtype* bottom_rois = bottom[1]->gpu_data();  //获取训练好的roi信息
    Dtype* top_data = top[0]->mutable_gpu_data();//获取top_data的指针
    int* mapping_channel_ptr = mapping_channel_.mutable_gpu_data(); //获取mapping_channel_指针,用以记录channel对应关系
    int count = top[0]->count();//top的计数: 256×21×7×7 256是mini-batch的大小
    caffe_gpu_set(count, Dtype(0), top_data);
    caffe_gpu_set(count, -1, mapping_channel_ptr);
    // NOLINT_NEXT_LINE(whitespace/operators)
    PSROIPoolingForward<Dtype> << <CAFFE_GET_BLOCKS(count),
      CAFFE_CUDA_NUM_THREADS >> >(count, bottom_data, spatial_scale_,
      channels_, height_, width_, pooled_height_,
      pooled_width_, bottom_rois, output_dim_, group_size_,
      top_data, mapping_channel_ptr); //调用PSROIPoolingForward
    CUDA_POST_KERNEL_CHECK;
  }

PSROIPoolingForward:

template <typename Dtype>
  __global__ void PSROIPoolingForward(
    const int nthreads,
    const Dtype* bottom_data,
    const Dtype spatial_scale,
    const int channels,
    const int height, const int width,
    const int pooled_height, const int pooled_width,
    const Dtype* bottom_rois,
    const int output_dim,
    const int group_size,
    Dtype* top_data,
    int* mapping_channel) {
    CUDA_KERNEL_LOOP(index, nthreads) {
      // The output is in order (n, ctop, ph, pw)
      //第n个roi,第c个类别,第(i,j)个类别的下标为:index=n×c×7×7+c×7×7+i×7+j(线程id与bin的标号对应)
      int pw = index % pooled_width;//对7取余,为j,就是bin的横坐标
      int ph = (index / pooled_width) % pooled_height;//i,bin的纵坐标
      int ctop = (index / pooled_width / pooled_height) % output_dim;//c,第几个类
      int n = index / pooled_width / pooled_height / output_dim;//n,第几个roi

      // [start, end) interval for spatial sampling
      bottom_rois += n * 5;  //获取roi的参数。(batch_index,x1,y1,x2,y2)
      int roi_batch_ind = bottom_rois[0];

      //计算坐标对应到feature map上的坐标 spatial_scale为0.0625  
      Dtype roi_start_w =
        static_cast<Dtype>(round(bottom_rois[1])) * spatial_scale;
      Dtype roi_start_h =
        static_cast<Dtype>(round(bottom_rois[2])) * spatial_scale;
      Dtype roi_end_w =
        static_cast<Dtype>(round(bottom_rois[3]) + 1.) * spatial_scale;
      Dtype roi_end_h =
        static_cast<Dtype>(round(bottom_rois[4]) + 1.) * spatial_scale;

      // Force too small ROIs to be 1x1
      Dtype roi_width = max(roi_end_w - roi_start_w, 0.1);  // avoid 0
      Dtype roi_height = max(roi_end_h - roi_start_h, 0.1);

      // 计算bin的长和宽
      Dtype bin_size_h = roi_height / static_cast<Dtype>(pooled_height);
      Dtype bin_size_w = roi_width / static_cast<Dtype>(pooled_width);

      //该bin的起始和重点坐标
      int hstart = floor(static_cast<Dtype>(ph) * bin_size_h
                          + roi_start_h);
      int wstart = floor(static_cast<Dtype>(pw)* bin_size_w
                          + roi_start_w);
      int hend = ceil(static_cast<Dtype>(ph + 1) * bin_size_h
                        + roi_start_h);
      int wend = ceil(static_cast<Dtype>(pw + 1) * bin_size_w
                        + roi_start_w);
      // Add roi offsets and clip to input boundaries
      hstart = min(max(hstart, 0), height);
      hend = min(max(hend, 0), height);
      wstart = min(max(wstart, 0), width);
      wend = min(max(wend, 0), width);
      bool is_empty = (hend <= hstart) || (wend <= wstart);

      int gw = pw;
      int gh = ph;
      //ctop*7*7+gh*7+gw
      int c = (ctop*group_size + gh)*group_size + gw;
      //data指针移动到位置处
      bottom_data += (roi_batch_ind * channels + c) * height * width;
      Dtype out_sum = 0;
      //bin求和
      for (int h = hstart; h < hend; ++h) {
        for (int w = wstart; w < wend; ++w) {
          int bottom_index = h*width + w;
          out_sum += bottom_data[bottom_index];
        }
      }
      //bin面积 
      Dtype bin_area = (hend - hstart)*(wend - wstart);
      //如果不是empty,就做个average pooling
      top_data[index] = is_empty? 0. : out_sum/bin_area;
      //记录下处理的index开始位置,方便以后用
      mapping_channel[index] = c;
    }
  }
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值