【PSROIPooling】源码解析

// ------------------------------------------------------------------
// R-FCN
// Copyright (c) 2016 Microsoft
// Licensed under The MIT License [see r-fcn/LICENSE for details]
// Written by Yi Li
// ------------------------------------------------------------------
 
#include <cfloat>
 
#include "caffe/rfcn_layers.hpp"
#include "caffe/util/gpu_util.cuh"
 
using std::max;
using std::min;
 
namespace caffe {
 
  template <typename Dtype>
  __global__ void PSROIPoolingForward(
    const int nthreads,
    const Dtype* bottom_data,  //输入的feature map的像素值
    const Dtype spatial_scale, //由之前所有卷积层的strides决定,此处为16
    const int channels,  //feature map的channels=k*k*(C+1)
    const int height,   //feature map的宽度
    const int width,  //feature map的高度
    const int pooled_height,  //==k=7
    const int pooled_width,   //==k=7
    const Dtype* bottom_rois,  //输入的roi,包括[batch_ind,x1,y1,x2,y2]
    const int output_dim,  //输出score map的dim,psroipooled_cls_rois时为21,psroipooled_loc_rois时为8
    const int group_size,   //==k=7
    Dtype* top_data, //socre map的输出
    int* mapping_channel) {
    //使用CUDA多线程计算
    CUDA_KERNEL_LOOP(index, nthreads) {   //index为最终score map上所有,共有(C+1)*k*k个值
      // The output is in order (n, ctop, ph, pw),类似于图像的BIL逐行扫描
      int pw = index % pooled_width;   //score map上第i=[0,k-1]列
      int ph = (index / pooled_width) % pooled_height;   //score map上第j=[0,k-1]行
      int ctop = (index / pooled_width / pooled_height) % output_dim;   //score map上第ctop个层(class)
      int n = index / pooled_width / pooled_height / output_dim;   //第n个roi
 
      // [start, end) interval for spatial sampling
      bottom_rois += n * 5;    //bottom_rois每次移动5
      int roi_batch_ind = bottom_rois[0];   //bottom_rois第0个位置存放的是batch_ind
      //此处计算的是roi在feature_map上的坐标范围
      Dtype roi_start_w = static_cast<Dtype>(round(bottom_rois[1])) * spatial_scale;
      Dtype roi_start_h = static_cast<Dtype>(round(bottom_rois[2])) * spatial_scale;
      Dtype roi_end_w = static_cast<Dtype>(round(bottom_rois[3]) + 1.) * spatial_scale;
      Dtype roi_end_h = static_cast<Dtype>(round(bottom_rois[4]) + 1.) * spatial_scale;
 
      // Force too small ROIs to be 1x1
      Dtype roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0
      Dtype roi_height = max(roi_end_h - roi_start_h, 0.1);
 
      // Compute w and h at bottom 
      //计算bin桶的大小
      Dtype bin_size_h = roi_height / static_cast<Dtype>(pooled_height);
      Dtype bin_size_w = roi_width / static_cast<Dtype>(pooled_width);
 
      //计算第(i,j)个bin桶在feature map上的坐标范围
      int hstart = floor(static_cast<Dtype>(ph) * bin_size_h
                          + roi_start_h);
      int wstart = floor(static_cast<Dtype>(pw)* bin_size_w
                          + roi_start_w);
      int hend = ceil(static_cast<Dtype>(ph + 1) * bin_size_h
                        + roi_start_h);
      int wend = ceil(static_cast<Dtype>(pw + 1) * bin_size_w
                        + roi_start_w);
      // Add roi offsets and clip to input boundaries
      // 超出feature map尺寸的范围舍弃
      hstart = min(max(hstart, 0), height);
      hend = min(max(hend, 0), height);
      wstart = min(max(wstart, 0),width);
      wend = min(max(wend, 0), width);
      bool is_empty = (hend <= hstart) || (wend <= wstart);
 
      int gw = pw;
      int gh = ph;
      //ctop*group_size*group_size+gh*gh*group_size+gw,计算得到的是第ctop类的(ph,pw)位置索引
      //例如,score map上第ctop=1类的第(i,j)=(1,1)位置,c=1*49+1*7+1,对于feature map上第c个颜色层中(实际包含C=21层)的第2(ctop+1)层
      int c = (ctop*group_size + gh)*group_size + gw;  
 
      //每次只计算一层的avg pooling得分
      //第roi_batch_ind个roi的时候,bottom_data需要移动roi_batch_ind*channels层(每层需移动height * width)
      //score map上第(i,j,class)=(ph,pw,ctop)位置(索引为c),移动c层
      //bottom_data为数据指针,此处是在移动指针,而不是代数求和
      bottom_data += (roi_batch_ind * channels + c) * height * width;   
      Dtype out_sum = 0;
      for (int h = hstart; h < hend; ++h){
        for (int w = wstart; w < wend; ++w){
          int bottom_index = h*width + w;
          out_sum += bottom_data[bottom_index];
        }
      }
 
      Dtype bin_area = (hend - hstart)*(wend - wstart);  
      //计算第(i,j)个bin桶在feature map上的面积
      top_data[index] = is_empty? 0. : out_sum/bin_area;   
      //如果第(i,j)个bin桶宽高不合乎逻辑,则输出为0,否则为平均池化值
      mapping_channel[index] = c;
      //记录第index次迭代计算socre map上索引位置
    }
  }
 
  template <typename Dtype>
  void PSROIPoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
    const Dtype* bottom_data = bottom[0]->gpu_data();
    const Dtype* bottom_rois = bottom[1]->gpu_data();
    Dtype* top_data = top[0]->mutable_gpu_data();
    int* mapping_channel_ptr = mapping_channel_.mutable_gpu_data();
    int count = top[0]->count();
    caffe_gpu_set(count, Dtype(0), top_data);
    caffe_gpu_set(count, -1, mapping_channel_ptr);
    // NOLINT_NEXT_LINE(whitespace/operators)
    PSROIPoolingForward<Dtype> << <CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS >> >(
      count, bottom_data, spatial_scale_, channels_, height_, width_, pooled_height_,
      pooled_width_, bottom_rois, output_dim_, group_size_, top_data, mapping_channel_ptr);
    //调研上面的PSROIPoolingForward函数,传入线程数量及其他需要的数据
    CUDA_POST_KERNEL_CHECK;
  }
}

 

参考:https://blog.csdn.net/wfei101/article/details/79598143

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值