RoiAlign源码解析

最新推荐文章于 2023-07-14 15:09:26 发布

zhuikefeng

最新推荐文章于 2023-07-14 15:09:26 发布

阅读量2.4k

点赞数

分类专栏：深度学习源码解析文章标签：深度学习机器学习 roiAlign

本文链接：https://blog.csdn.net/zhuikefeng/article/details/121933479

版权

深度学习同时被 2 个专栏收录

10 篇文章 0 订阅

订阅专栏

源码解析

7 篇文章 0 订阅

订阅专栏

/*
* 参数解释
* bottom_data：输入的特征图数据，shape是(n,c,h,w)
* bottom_rois：输入的roi数据，shape是(num_rois,5),第一维表示共有多少个rois，第二维是[batch_index,x1,y1,x2,y2]，
第一个值表示rois所在的batch_id，后四个是rois所在原图的坐标值，可以根据spatial_scale对应到特征图上
* nthreads：等于roipooling后输出的size，即num_rois*channels*pooled_height*pooled_width
* pooled_height：RoIAlign后的height
* pooled_width：RoIAlign后的width\
* channels：输入特征图的channels，输入输出前后不变
* height：输入特征图的height
* width：输入特征图的width
* spatial_scale：原图与特征图比例，等于特征图/原图，如1/16
* sampling_ratio：bin中高和宽方向的采样率，论文中默认是2，即每个bin采样2*2=4个点
* top_data：roipooling后的输入，shape是(num_rois,channels,pooled_height,pooled_width)
*/
template <typename T>
__global__ void RoIAlignForward(
    const int nthreads,
    const T* bottom_data,
    const T spatial_scale,
    const int channels,
    const int height,
    const int width,
    const int pooled_height,
    const int pooled_width,
    const int sampling_ratio,
    const T* bottom_rois,
    T* top_data) {
  CUDA_1D_KERNEL_LOOP(index, nthreads) {
    // (n, c, ph, pw) is an element in the pooled output
    int pw = index % pooled_width;
    int ph = (index / pooled_width) % pooled_height;
    int c = (index / pooled_width / pooled_height) % channels;
    int n = index / pooled_width / pooled_height / channels;
    //index对应的索引，即(n,c,ph,pw)分别代表(num_rois,channels,pooled_height,pooled_width)的索引

    const T* offset_bottom_rois = bottom_rois + n * 5;
    int roi_batch_ind = offset_bottom_rois[0];//0维表示batch_id

    // Do not using rounding; this implementation detail is critical
    T roi_start_w = offset_bottom_rois[1] * spatial_scale;//将原图的坐标映射到特征图上的坐标
    T roi_start_h = offset_bottom_rois[2] * spatial_scale;
    T roi_end_w = offset_bottom_rois[3] * spatial_scale;
    T roi_end_h = offset_bottom_rois[4] * spatial_scale;
    // T roi_start_w = round(offset_bottom_rois[1] * spatial_scale);不使用round，就没有引入量化损失
    // T roi_start_h = round(offset_bottom_rois[2] * spatial_scale);
    // T roi_end_w = round(offset_bottom_rois[3] * spatial_scale);
    // T roi_end_h = round(offset_bottom_rois[4] * spatial_scale);

    // Force malformed ROIs to be 1x1
    T roi_width = max(roi_end_w - roi_start_w, (T)1.);//特征图上的roi的宽
    T roi_height = max(roi_end_h - roi_start_h, (T)1.);//特征图上的roi的高
    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);//将rois划分成pooled_height*pooled_width个bin
    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);//计算得到每个bin的h和w

    const T* offset_bottom_data =
        bottom_data + (roi_batch_ind * channels + c) * height * width;

    // We use roi_bin_grid to sample the grid and mimic integral
    // 采样率，论文中默认是2，如果没有设置则等于ceil(roi_height / pooled_height)，大概约等于每个bin里有几个格子就采样几个点
    int roi_bin_grid_h = (sampling_ratio > 0)
        ? sampling_ratio
        : ceil(roi_height / pooled_height); // e.g., = 2
    int roi_bin_grid_w =
        (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);

    // We do average (integral) pooling inside a bin
    const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4

    T output_val = 0.;
    for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1
    {
      // 在height方向采样
      const T y = roi_start_h + ph * bin_size_h +
          static_cast<T>(iy + .5f) * bin_size_h /
              static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
      // 在width方向采样
      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
        const T x = roi_start_w + pw * bin_size_w +
            static_cast<T>(ix + .5f) * bin_size_w /
                static_cast<T>(roi_bin_grid_w);
       // 被采样到的点由于坐标是浮点数，其对应位置的值需要双线性插值获取(最近的4个点得到)
        T val = bilinear_interpolate(
            offset_bottom_data, height, width, y, x, index);
        output_val += val;
      }
    }
    output_val /= count;

    top_data[index] = output_val;
  }
}

} // namespace

zhuikefeng

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
RoiAlign源码解析

/** 参数解释* bottom_data：输入的特征图数据，shape是(n,c,h,w)* bottom_rois：输入的roi数据，shape是(num_rois,5),第一维表示共有多少个rois，第二维是[batch_index,x1,y1,x2,y2]，第一个值表示rois所在的batch_id，后四个是rois所在原图的坐标值，可以根据spatial_scale对应到特征图上* nthreads：等于roipooling后输出的size，即num_rois*channels*pool.
复制链接

扫一扫