卷积层ConvolutionLayer正向传导的目标层往往是池化层PoolingLayer。池化层通过降采样来降低卷积层输出的特征向量,同时改善结果,不易出现过拟合。最常用的降采样方法有均值采样(取区域平均值作为降采样值)、最大值采样(取区域最大值作为降采样值)和随机采样(取区域内随机一个像素)等。
PoolingLayer类从Layer基类单一继承而来,没有派生其它子类。具体定义在pooling_layer.hpp中,
- template <typename Dtype>
- class PoolingLayer : public Layer<Dtype> {
- public:
- explicit PoolingLayer(const LayerParameter& param)
- : Layer<Dtype>(param) {}
- virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual inline const char* type() const { return "Pooling"; }
- virtual inline int ExactNumBottomBlobs() const { return 1; }
- virtual inline int MinTopBlobs() const { return 1; }
- // 最大值采样可以额外输出一个Blob,所以MaxTopBlobs返回2
- virtual inline int MaxTopBlobs() const {
- return (this->layer_param_.pooling_param().pool() ==
- PoolingParameter_PoolMethod_MAX) ? 2 : 1;
- }
- protected:
- virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
- virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
- // 卷积区域尺寸
- int kernel_h_, kernel_w_;
- // 卷积平移步幅
- int stride_h_, stride_w_;
- // 图像补齐像素数
- int pad_h_, pad_w_;
- // 通道
- int channels_;
- // 输入图像尺寸
- int height_, width_;
- // 池化后尺寸
- int pooled_height_, pooled_width_;
- // 是否全区域池化(将整幅图像降采样为1x1)
- bool global_pooling_;
- // 随机采样点索引
- Blob<Dtype> rand_idx_;
- // 最大值采样点索引
- Blob<int> max_idx_;
- };
具体实现在pooling_layer.cpp中,
- template <typename Dtype>
- void PoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- PoolingParameter pool_param = this->layer_param_.pooling_param();
- if (pool_param.global_pooling()) {
- CHECK(!(pool_param.has_kernel_size() ||
- pool_param.has_kernel_h() || pool_param.has_kernel_w()))
- << "With Global_pooling: true Filter size cannot specified";
- } else {
- CHECK(!pool_param.has_kernel_size() !=
- !(pool_param.has_kernel_h() && pool_param.has_kernel_w()))
- << "Filter size is kernel_size OR kernel_h and kernel_w; not both";
- CHECK(pool_param.has_kernel_size() ||
- (pool_param.has_kernel_h() && pool_param.has_kernel_w()))
- << "For non-square filters both kernel_h and kernel_w are required.";
- }
- CHECK((!pool_param.has_pad() && pool_param.has_pad_h()
- && pool_param.has_pad_w())
- || (!pool_param.has_pad_h() && !pool_param.has_pad_w()))
- << "pad is pad OR pad_h and pad_w are required.";
- CHECK((!pool_param.has_stride() && pool_param.has_stride_h()
- && pool_param.has_stride_w())
- || (!pool_param.has_stride_h() && !pool_param.has_stride_w()))
- << "Stride is stride OR stride_h and stride_w are required.";
- global_pooling_ = pool_param.global_pooling();
- // 设置卷积区域尺寸
- if (global_pooling_) {
- // 如果全区域池化,则区域尺寸等于输入图像尺寸
- kernel_h_ = bottom[0]->height();
- kernel_w_ = bottom[0]->width();
- } else {
- if (pool_param.has_kernel_size()) {
- kernel_h_ = kernel_w_ = pool_param.kernel_size();
- } else {
- kernel_h_ = pool_param.kernel_h();
- kernel_w_ = pool_param.kernel_w();
- }
- }
- CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero.";
- CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero.";
- // 设置图像补齐像素
- if (!pool_param.has_pad_h()) {
- pad_h_ = pad_w_ = pool_param.pad();
- } else {
- pad_h_ = pool_param.pad_h();
- pad_w_ = pool_param.pad_w();
- }
- // 设置卷积平移步幅
- if (!pool_param.has_stride_h()) {
- stride_h_ = stride_w_ = pool_param.stride();
- } else {
- stride_h_ = pool_param.stride_h();
- stride_w_ = pool_param.stride_w();
- }
- if (global_pooling_) {
- CHECK(pad_h_ == 0 && pad_w_ == 0 && stride_h_ == 1 && stride_w_ == 1)
- << "With Global_pooling: true; only pad = 0 and stride = 1";
- }
- if (pad_h_ != 0 || pad_w_ != 0) {
- CHECK(this->layer_param_.pooling_param().pool()
- == PoolingParameter_PoolMethod_AVE
- || this->layer_param_.pooling_param().pool()
- == PoolingParameter_PoolMethod_MAX)
- << "Padding implemented only for average and max pooling.";
- CHECK_LT(pad_h_, kernel_h_);
- CHECK_LT(pad_w_, kernel_w_);
- }
- }
- template <typename Dtype>
- void PoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
- << "corresponding to (num, channels, height, width)";
- channels_ = bottom[0]->channels();
- height_ = bottom[0]->height();
- width_ = bottom[0]->width();
- if (global_pooling_) {
- kernel_h_ = bottom[0]->height();
- kernel_w_ = bottom[0]->width();
- }
- // 计算降采样后图像尺寸
- pooled_height_ = static_cast<int>(ceil(static_cast<float>(
- height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;
- pooled_width_ = static_cast<int>(ceil(static_cast<float>(
- width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
- if (pad_h_ || pad_w_) {
- // 如果有图像补齐,则需要确保不发生越界,否则不做最后一个采样点
- if ((pooled_height_ - 1) * stride_h_ >= height_ + pad_h_) {
- --pooled_height_;
- }
- if ((pooled_width_ - 1) * stride_w_ >= width_ + pad_w_) {
- --pooled_width_;
- }
- CHECK_LT((pooled_height_ - 1) * stride_h_, height_ + pad_h_);
- CHECK_LT((pooled_width_ - 1) * stride_w_, width_ + pad_w_);
- }
- top[0]->Reshape(bottom[0]->num(), channels_, pooled_height_,
- pooled_width_);
- if (top.size() > 1) {
- top[1]->ReshapeLike(*top[0]);
- }
- // 如果是最大值采样,则初始化最大值采样点索引
- if (this->layer_param_.pooling_param().pool() ==
- PoolingParameter_PoolMethod_MAX && top.size() == 1) {
- max_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
- pooled_width_);
- }
- // 如果是随机采样,则初始化随机采样点索引
- if (this->layer_param_.pooling_param().pool() ==
- PoolingParameter_PoolMethod_STOCHASTIC) {
- rand_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
- pooled_width_);
- }
- }
- // CPU正向传导
- // TODO(Yangqing): 池化操作还可以更快吗?
- template <typename Dtype>
- void PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- const Dtype* bottom_data = bottom[0]->cpu_data();
- Dtype* top_data = top[0]->mutable_cpu_data();
- const int top_count = top[0]->count();
- // 如果top.size() > 1,则额外输出一个Blob到top[1]
- const bool use_top_mask = top.size() > 1;
- int* mask = NULL; // suppress warnings about uninitalized variables
- Dtype* top_mask = NULL;
- // switch不同的降采样方法
- // 将swtich放在for循环外用来提高运行速度,虽然这样会增加代码量
- switch (this->layer_param_.pooling_param().pool()) {
- // 最大值采样
- case PoolingParameter_PoolMethod_MAX:
- // 查找区域最大值前,将数组值初始化为-1
- if (use_top_mask) {
- top_mask = top[1]->mutable_cpu_data();
- caffe_set(top_count, Dtype(-1), top_mask);
- } else {
- mask = max_idx_.mutable_cpu_data();
- caffe_set(top_count, -1, mask);
- }
- caffe_set(top_count, Dtype(-FLT_MAX), top_data);
- // 循环遍历区域最大值
- for (int n = 0; n < bottom[0]->num(); ++n) {
- for (int c = 0; c < channels_; ++c) {
- for (int ph = 0; ph < pooled_height_; ++ph) {
- for (int pw = 0; pw < pooled_width_; ++pw) {
- int hstart = ph * stride_h_ - pad_h_;
- int wstart = pw * stride_w_ - pad_w_;
- int hend = min(hstart + kernel_h_, height_);
- int wend = min(wstart + kernel_w_, width_);
- hstart = max(hstart, 0);
- wstart = max(wstart, 0);
- const int pool_index = ph * pooled_width_ + pw;
- for (int h = hstart; h < hend; ++h) {
- for (int w = wstart; w < wend; ++w) {
- const int index = h * width_ + w;
- if (bottom_data[index] > top_data[pool_index]) {
- top_data[pool_index] = bottom_data[index];
- if (use_top_mask) {
- top_mask[pool_index] = static_cast<Dtype>(index);
- } else {
- // 位置记录在max_idx_索引中
- mask[pool_index] = index;
- }
- }
- }
- }
- }
- }
- // 加上偏移,跳转到下一幅图像
- bottom_data += bottom[0]->offset(0, 1);
- top_data += top[0]->offset(0, 1);
- if (use_top_mask) {
- top_mask += top[0]->offset(0, 1);
- } else {
- mask += top[0]->offset(0, 1);
- }
- }
- }
- break;
- // 平均值采样
- case PoolingParameter_PoolMethod_AVE:
- for (int i = 0; i < top_count; ++i) {
- top_data[i] = 0;
- }
- // 循环遍历计算区域平均值
- for (int n = 0; n < bottom[0]->num(); ++n) {
- for (int c = 0; c < channels_; ++c) {
- for (int ph = 0; ph < pooled_height_; ++ph) {
- for (int pw = 0; pw < pooled_width_; ++pw) {
- int hstart = ph * stride_h_ - pad_h_;
- int wstart = pw * stride_w_ - pad_w_;
- int hend = min(hstart + kernel_h_, height_ + pad_h_);
- int wend = min(wstart + kernel_w_, width_ + pad_w_);
- int pool_size = (hend - hstart) * (wend - wstart);
- hstart = max(hstart, 0);
- wstart = max(wstart, 0);
- hend = min(hend, height_);
- wend = min(wend, width_);
- for (int h = hstart; h < hend; ++h) {
- for (int w = wstart; w < wend; ++w) {
- top_data[ph * pooled_width_ + pw] +=
- bottom_data[h * width_ + w];
- }
- }
- top_data[ph * pooled_width_ + pw] /= pool_size;
- }
- }
- // 加上偏移,跳转到下一幅图像
- bottom_data += bottom[0]->offset(0, 1);
- top_data += top[0]->offset(0, 1);
- }
- }
- break;
- // 随机采样尚未在CPU端实现
- case PoolingParameter_PoolMethod_STOCHASTIC:
- NOT_IMPLEMENTED;
- break;
- default:
- LOG(FATAL) << "Unknown pooling method.";
- }
- }
- // CPU反向传导
- template <typename Dtype>
- void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
- if (!propagate_down[0]) {
- return;
- }
- const Dtype* top_diff = top[0]->cpu_diff();
- Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
- // 和正向传导代码类似,将switch放在for循环外部
- caffe_set(bottom[0]->count(), Dtype(0), bottom_diff);
- // 如果top.size() > 1,则额外输出一个Blob到top[1]
- const bool use_top_mask = top.size() > 1;
- const int* mask = NULL; // suppress warnings about uninitialized variables
- const Dtype* top_mask = NULL;
- switch (this->layer_param_.pooling_param().pool()) {
- // 最大值采样
- case PoolingParameter_PoolMethod_MAX:
- // 开始循环
- if (use_top_mask) {
- top_mask = top[1]->cpu_data();
- } else {
- mask = max_idx_.cpu_data();
- }
- for (int n = 0; n < top[0]->num(); ++n) {
- for (int c = 0; c < channels_; ++c) {
- for (int ph = 0; ph < pooled_height_; ++ph) {
- for (int pw = 0; pw < pooled_width_; ++pw) {
- const int index = ph * pooled_width_ + pw;
- // 从采样点索引数组中取出反向传导的目的索引
- const int bottom_index =
- use_top_mask ? top_mask[index] : mask[index];
- bottom_diff[bottom_index] += top_diff[index];
- }
- }
- bottom_diff += bottom[0]->offset(0, 1);
- top_diff += top[0]->offset(0, 1);
- if (use_top_mask) {
- top_mask += top[0]->offset(0, 1);
- } else {
- mask += top[0]->offset(0, 1);
- }
- }
- }
- break;
- // 平均值采样
- case PoolingParameter_PoolMethod_AVE:
- // 开始循环
- for (int n = 0; n < top[0]->num(); ++n) {
- for (int c = 0; c < channels_; ++c) {
- for (int ph = 0; ph < pooled_height_; ++ph) {
- for (int pw = 0; pw < pooled_width_; ++pw) {
- int hstart = ph * stride_h_ - pad_h_;
- int wstart = pw * stride_w_ - pad_w_;
- int hend = min(hstart + kernel_h_, height_ + pad_h_);
- int wend = min(wstart + kernel_w_, width_ + pad_w_);
- int pool_size = (hend - hstart) * (wend - wstart);
- hstart = max(hstart, 0);
- wstart = max(wstart, 0);
- hend = min(hend, height_);
- wend = min(wend, width_);
- for (int h = hstart; h < hend; ++h) {
- for (int w = wstart; w < wend; ++w) {
- // 将top偏导平均分配到bottom各点上
- bottom_diff[h * width_ + w] +=
- top_diff[ph * pooled_width_ + pw] / pool_size;
- }
- }
- }
- }
- // 加上偏移,跳转到下一幅图像
- bottom_diff += bottom[0]->offset(0, 1);
- top_diff += top[0]->offset(0, 1);
- }
- }
- break;
- // 随机采样尚未在CPU端实现
- case PoolingParameter_PoolMethod_STOCHASTIC:
- NOT_IMPLEMENTED;
- break;
- default:
- LOG(FATAL) << "Unknown pooling method.";
- }
- }
- // 如果CPU_ONLY模式则禁止Forward_gpu和Backward_gpu函数
- #ifdef CPU_ONLY
- STUB_GPU(PoolingLayer);
- #endif