【caffe】【特殊层】RoiPooling层

1.  RoiPooling层是对Caffe层的扩展,目的:把ROI区域归一化到相同尺寸大小,以便于后面的全连接层处理。

2. 部分网络协议:
layer {

	name: "conv5"

	type: "Convolution"

	bottom: "conv4"

	top: "conv5"

	param { lr_mult: 1.0 }

	param { lr_mult: 2.0 }

	convolution_param {

		num_output: 256

		kernel_size: 3

		pad: 1

		stride: 1

	}

}

layer {

	name: "relu5"

	type: "ReLU"

	bottom: "conv5"

	top: "conv5"

}



#========= RCNN ============



layer {

  name: "roi_pool_conv5"

  type: "ROIPooling"

  bottom: "conv5"

  bottom: "rois"

  top: "roi_pool_conv5"

  roi_pooling_param {

    pooled_w: 6

    pooled_h: 6

    spatial_scale: 0.0625 # 1/16

  }

}

layer {

  name: "fc6"

  type: "InnerProduct"

  bottom: "roi_pool_conv5"

  top: "fc6"

  param { lr_mult: 1.0 }

  param { lr_mult: 2.0 }

  inner_product_param {

    num_output: 4096

  }

}

layer {

  name: "relu6"

  type: "ReLU"

  bottom: "fc6"

  top: "fc6"

}

分析可见:
<1> roipooling的输入是conv5(featuremap)和rois(一系列目标框,大小不一);
<2>输出为roi_pool_conv5,即目标区域映射到featuremap的归一化特征pool_w×pool_h×nchannels,思想是将原图的目标区域映射到featuremap区域,然后featuremap区域划分成pool_w×pool_h块,每一块做max_pooling。
3.  源码解读:
// ------------------------------------------------------------------
// Fast R-CNN
// Copyright (c) 2015 Microsoft
// Licensed under The MIT License [see fast-rcnn/LICENSE for details]
// Written by Ross Girshick
// ------------------------------------------------------------------

#include <cfloat>

#include <string>
#include <utility>
#include <vector>

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
//#include "caffe/vision_layers.hpp"
#include "caffe/fast_rcnn_layers.hpp"
#include "caffe/proto/caffe.pb.h"

using std::max;
using std::min;
using std::floor;
using std::ceil;

#if _MSC_VER < 1800
inline double round(double x) {
	return (x > 0.0) ? floor(x + 0.5) : ceil(x - 0.5);
}
#endif

namespace caffe {

	template <typename Dtype>
	void ROIPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
		const vector<Blob<Dtype>*>& top) {
		ROIPoolingParameter roi_pool_param = this->layer_param_.roi_pooling_param();
		CHECK_GT(roi_pool_param.pooled_h(), 0)
			<< "pooled_h must be > 0";
		CHECK_GT(roi_pool_param.pooled_w(), 0)
			<< "pooled_w must be > 0";
		pooled_height_ = roi_pool_param.pooled_h(); //roi_pooling的宽和高
		pooled_width_ = roi_pool_param.pooled_w();
		spatial_scale_ = roi_pool_param.spatial_scale(); // roi_pooling的空间映射尺寸,大小为featuremap/原图
		LOG(INFO) << "Spatial scale: " << spatial_scale_;
	}

	template <typename Dtype>
	void ROIPoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
		const vector<Blob<Dtype>*>& top) {
		channels_ = bottom[0]->channels();
		height_ = bottom[0]->height();
		width_ = bottom[0]->width();
		top[0]->Reshape(bottom[1]->num(), channels_, pooled_height_,
			pooled_width_);  //输出top的尺度大小(bottom[1]_num,channels_, pooled_height_, pooled_width)
		max_idx_.Reshape(bottom[1]->num(), channels_, pooled_height_,
			pooled_width_);
	}

	template <typename Dtype>
	void ROIPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
		const vector<Blob<Dtype>*>& top) {
		const Dtype* bottom_data = bottom[0]->cpu_data();
		const Dtype* bottom_rois = bottom[1]->cpu_data(); 
		// Number of ROIs
		int num_rois = bottom[1]->num(); // 目标区域个数
		int batch_size = bottom[0]->num(); // 处理图像的batch个数
		int top_count = top[0]->count();
		Dtype* top_data = top[0]->mutable_cpu_data();
		caffe_set(top_count, Dtype(-FLT_MAX), top_data);
		int* argmax_data = max_idx_.mutable_cpu_data();
		caffe_set(top_count, -1, argmax_data);

		// For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
		for (int n = 0; n < num_rois; ++n) {//处理每一个目标区域得到归一化的maxpool特征映射
			int roi_batch_ind = bottom_rois[0];
			int roi_start_w = round(bottom_rois[1] * spatial_scale_);
			int roi_start_h = round(bottom_rois[2] * spatial_scale_); //roi映射到featuremap区域坐标
			int roi_end_w = round(bottom_rois[3] * spatial_scale_);
			int roi_end_h = round(bottom_rois[4] * spatial_scale_);
			CHECK_GE(roi_batch_ind, 0); //roi_batch_ind间于0到batch_size之间
			CHECK_LT(roi_batch_ind, batch_size);

			int roi_height = max(roi_end_h - roi_start_h + 1, 1);//映射区域宽高
			int roi_width = max(roi_end_w - roi_start_w + 1, 1);
			const Dtype bin_size_h = static_cast<Dtype>(roi_height)// 每一个pool块对应的高度和宽度
				/ static_cast<Dtype>(pooled_height_);
			const Dtype bin_size_w = static_cast<Dtype>(roi_width)
				/ static_cast<Dtype>(pooled_width_);

			const Dtype* batch_data = bottom_data + bottom[0]->offset(roi_batch_ind);//指向对应featuremap数据

			for (int c = 0; c < channels_; ++c) {
				for (int ph = 0; ph < pooled_height_; ++ph) {
					for (int pw = 0; pw < pooled_width_; ++pw) {
						// Compute pooling region for this output unit:
						//  start (included) = floor(ph * roi_height / pooled_height_)
						//  end (excluded) = ceil((ph + 1) * roi_height / pooled_height_)
						// 做以0为起点的坐标映射
						int hstart = static_cast<int>(floor(static_cast<Dtype>(ph)
							* bin_size_h));
						int wstart = static_cast<int>(floor(static_cast<Dtype>(pw)
							* bin_size_w));
						int hend = static_cast<int>(ceil(static_cast<Dtype>(ph + 1)
							* bin_size_h));
						int wend = static_cast<int>(ceil(static_cast<Dtype>(pw + 1)
							* bin_size_w));
						// 映射为目标区域的坐标映射(同时保证不越界)
						hstart = min(max(hstart + roi_start_h, 0), height_);
						hend = min(max(hend + roi_start_h, 0), height_);
						wstart = min(max(wstart + roi_start_w, 0), width_);
						wend = min(max(wend + roi_start_w, 0), width_);
						//计算每一个映射特征块的最大值作为特征,有问题时赋值0和-1
						bool is_empty = (hend <= hstart) || (wend <= wstart);

						const int pool_index = ph * pooled_width_ + pw;
						if (is_empty) {
							top_data[pool_index] = 0;
							argmax_data[pool_index] = -1;
						}

						for (int h = hstart; h < hend; ++h) {
							for (int w = wstart; w < wend; ++w) {
								const int index = h * width_ + w;
								if (batch_data[index] > top_data[pool_index]) {
									top_data[pool_index] = batch_data[index];
									argmax_data[pool_index] = index;
								}
							}
						}
					}
				}
				// Increment all data pointers by one channel
				batch_data += bottom[0]->offset(0, 1);
				top_data += top[0]->offset(0, 1);
				argmax_data += max_idx_.offset(0, 1);
			}
			// Increment ROI data pointer
			bottom_rois += bottom[1]->offset(1);
		}
	}

	template <typename Dtype>
	void ROIPoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
		const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
		NOT_IMPLEMENTED;
	}


#ifdef CPU_ONLY
	STUB_GPU(ROIPoolingLayer);
#endif

	INSTANTIATE_CLASS(ROIPoolingLayer);
	REGISTER_LAYER_CLASS(ROIPooling);

}  // namespace caffe

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值