caffe 最大池化 pooling_layer

caffe 最大池化 pooling_layer

原理

首先上理论:
在这里插入图片描述

池化层是夹在连续的卷积层的中间层,池化层可以非常有效地缩小矩阵的尺寸。从而减少最后全连接层中的参数。使用池化层既可以加快计算速度也有防止过拟合问题的作用。池化层前向传播的过程中也是通过一个类似过滤器的结构完成的,池化层中的计算不是节点的加权和,而是采用了更加简单的最大值或者平均值计算。使用最大值操作的池化层被称之为最大池化层(max pooling),使用平均值操作的池化层称之为平均池化层(average pooling),总的来说,池化层的作用是可以压缩数据和参数的量, 减小过拟合。
如下图展示最大池化层的计算过程:
在这里插入图片描述

代码注释

src/caffe/layers/pooling_layer.cpp

#include <algorithm>
#include <cfloat>
#include <vector>

#include "caffe/layers/pooling_layer.hpp"
#include "caffe/util/math_functions.hpp"
/*
    // 卷积区域尺寸
  int kernel_h_, kernel_w_;
  // 卷积平移步幅
  int stride_h_, stride_w_;
  // 图像补齐像素数
  int pad_h_, pad_w_;
  // 通道
  int channels_;
  // 输入图像尺寸
  int height_, width_;
  // 池化后尺寸
  int pooled_height_, pooled_width_;
  // 是否全区域池化(将整幅图像降采样为1x1)
  bool global_pooling_;
  // 随机采样点索引
  Blob<Dtype> rand_idx_;
  // 最大值采样点索引
  Blob<int> max_idx_;
*/
namespace caffe {

using std::min;
using std::max;

template <typename Dtype>
void PoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  PoolingParameter pool_param = this->layer_param_.pooling_param();
  if (pool_param.global_pooling()) {
    CHECK(!(pool_param.has_kernel_size() ||  //核大小
      pool_param.has_kernel_h() || pool_param.has_kernel_w()))
      << "With Global_pooling: true Filter size cannot specified";
  } else {
    CHECK(!pool_param.has_kernel_size() !=
      !(pool_param.has_kernel_h() && pool_param.has_kernel_w()))
      << "Filter size is kernel_size OR kernel_h and kernel_w; not both";
    CHECK(pool_param.has_kernel_size() ||
      (pool_param.has_kernel_h() && pool_param.has_kernel_w()))
      << "For non-square filters both kernel_h and kernel_w are required.";
  }
  CHECK((!pool_param.has_pad() && pool_param.has_pad_h()
      && pool_param.has_pad_w())
      || (!pool_param.has_pad_h() && !pool_param.has_pad_w()))
      << "pad is pad OR pad_h and pad_w are required.";
  CHECK((!pool_param.has_stride() && pool_param.has_stride_h() // 步长
      && pool_param.has_stride_w())
      || (!pool_param.has_stride_h() && !pool_param.has_stride_w()))
      << "Stride is stride OR stride_h and stride_w are required.";
  global_pooling_ = pool_param.global_pooling();
  round_mode_ = pool_param.round_mode();
  
  // 设置卷积区域尺寸
  if (global_pooling_) {
    // 如果全区域池化,则区域尺寸等于输入图像尺寸
    kernel_h_ = bottom[0]->height();
    kernel_w_ = bottom[0]->width();
  } else {
    if (pool_param.has_kernel_size()) {
      kernel_h_ = kernel_w_ = pool_param.kernel_size();//如果设置kernelsize则默认为kernel为方的
    } else {
      kernel_h_ = pool_param.kernel_h();
      kernel_w_ = pool_param.kernel_w();
    }
  }
  CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero.";
  CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero.";
  // 设置图像补齐像素
  if (!pool_param.has_pad_h()) {
    pad_h_ = pad_w_ = pool_param.pad();//默认为0
  } else {
    pad_h_ = pool_param.pad_h();
    pad_w_ = pool_param.pad_w();
  }
  // 设置卷积平移步幅
  if (!pool_param.has_stride_h()) {
    stride_h_ = stride_w_ = pool_param.stride();
  } else {
    stride_h_ = pool_param.stride_h();
    stride_w_ = pool_param.stride_w();
  }
  if (global_pooling_) {
    CHECK(pad_h_ == 0 && pad_w_ == 0 && stride_h_ == 1 && stride_w_ == 1)
      << "With Global_pooling: true; only pad = 0 and stride = 1";
  }
  if (pad_h_ != 0 || pad_w_ != 0) {
    CHECK(this->layer_param_.pooling_param().pool()
        == PoolingParameter_PoolMethod_AVE
        || this->layer_param_.pooling_param().pool()
        == PoolingParameter_PoolMethod_MAX)
        << "Padding implemented only for average and max pooling.";
    CHECK_LT(pad_h_, kernel_h_);
    CHECK_LT(pad_w_, kernel_w_);
  }
}

template <typename Dtype>
void PoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
      << "corresponding to (num, channels, height, width)";
  channels_ = bottom[0]->channels(); // 通道
  height_ = bottom[0]->height(); // 高
  width_ = bottom[0]->width();// 宽
  if (global_pooling_) {
    kernel_h_ = bottom[0]->height();
    kernel_w_ = bottom[0]->width();
  }
  //具体计算pooling后featuremap的2D大小
  switch (round_mode_) {
  case PoolingParameter_RoundMode_CEIL:
    pooled_height_ = static_cast<int>(ceil(static_cast<float>(
        height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;
    pooled_width_ = static_cast<int>(ceil(static_cast<float>(
        width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
    break;
  case PoolingParameter_RoundMode_FLOOR:
    pooled_height_ = static_cast<int>(floor(static_cast<float>(
        height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;
    pooled_width_ = static_cast<int>(floor(static_cast<float>(
        width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
    break;
  default:
    LOG(FATAL) << "Unknown rounding mode.";
  }
  if (pad_h_ || pad_w_) {
    // If we have padding, ensure that the last pooling starts strictly
    // inside the image (instead of at the padding); otherwise clip the last.
    // 如果有图像补齐,则需要确保不发生越界,否则不做最后一个采样点
    if ((pooled_height_ - 1) * stride_h_ >= height_ + pad_h_) {
      --pooled_height_;
    }
    if ((pooled_width_ - 1) * stride_w_ >= width_ + pad_w_) {
      --pooled_width_;
    }
    CHECK_LT((pooled_height_ - 1) * stride_h_, height_ + pad_h_);
    CHECK_LT((pooled_width_ - 1) * stride_w_, width_ + pad_w_);
  }
  top[0]->Reshape(bottom[0]->num(), channels_, pooled_height_,
      pooled_width_);
  if (top.size() > 1) {
    top[1]->ReshapeLike(*top[0]);
  }
  // If max pooling, we will initialize the vector index part.
  // 如果是最大值采样,则初始化最大值采样点索引
  if (this->layer_param_.pooling_param().pool() ==
      PoolingParameter_PoolMethod_MAX && top.size() == 1) {
    max_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
        pooled_width_);
  }
  // If stochastic pooling, we will initialize the random index part.
  // 如果是随机采样,则初始化随机采样点索引
  if (this->layer_param_.pooling_param().pool() ==
      PoolingParameter_PoolMethod_STOCHASTIC) {
    rand_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
      pooled_width_);
  }
}

// TODO(Yangqing): Is there a faster way to do pooling in the channel-first
// case?
// CPU正向传导
template <typename Dtype>
void PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();//拿到输入数据的const指针
  Dtype* top_data = top[0]->mutable_cpu_data();//拿到输出数据的const指针
  const int top_count = top[0]->count();
  // We'll output the mask to top[1] if it's of size >1.
  // 如果top.size() > 1,则额外输出一个Blob到top[1]
  const bool use_top_mask = top.size() > 1;
  int* mask = NULL;  // suppress warnings about uninitialized variables
  Dtype* top_mask = NULL;
  // Different pooling methods. We explicitly do the switch outside the for
  // loop to save time, although this results in more code.
    // switch不同的降采样方法
  // 将swtich放在for循环外用来提高运行速度,虽然这样会增加代码量
  
  switch (this->layer_param_.pooling_param().pool()) {
  case PoolingParameter_PoolMethod_MAX: // 最大采样方法
    // Initialize     // 查找区域最大值前,将数组值初始化为-1
    if (use_top_mask) {
      top_mask = top[1]->mutable_cpu_data();
      caffe_set(top_count, Dtype(-1), top_mask);
    } else {
      mask = max_idx_.mutable_cpu_data();//pooling的kernel窗口中最大值的索引
      caffe_set(top_count, -1, mask);//将所有输出初始化为负无穷大
    }
    caffe_set(top_count, Dtype(-FLT_MAX), top_data);
    // The main loop     // 循环遍历区域最大值
    for (int n = 0; n < bottom[0]->num(); ++n) {//batch_size数
      for (int c = 0; c < channels_; ++c) {//通道数
        for (int ph = 0; ph < pooled_height_; ++ph) {//对原窗口向下扫(竖向)扫输出窗口的宽次
          for (int pw = 0; pw < pooled_width_; ++pw) {//对原窗口向右扫(横向)扫输出窗口的高次
            int hstart = ph * stride_h_ - pad_h_;//kernel窗口的行头
            int wstart = pw * stride_w_ - pad_w_;//kernel窗口的列头
            int hend = min(hstart + kernel_h_, height_);//kernel窗口的行尾 hstart加上kernel_h_
            int wend = min(wstart + kernel_w_, width_);//kernel窗口的列尾
            hstart = max(hstart, 0);
            wstart = max(wstart, 0);//[hstart,hend][wsatart,wend]标记了kernel窗口
            const int pool_index = ph * pooled_width_ + pw;//输出窗口的索引值
            for (int h = hstart; h < hend; ++h) {//kernel的窗口行 [h,w]表征了kernel窗口的具体点位置
              for (int w = wstart; w < wend; ++w) {//kernel的窗口列
                const int index = h * width_ + w;//kernel窗口中的点在输入featuremap的位置
                if (bottom_data[index] > top_data[pool_index]) {
                  top_data[pool_index] = bottom_data[index];//输出窗口赋值
                  if (use_top_mask) {
                    top_mask[pool_index] = static_cast<Dtype>(index);//在kernel范围内记录最大的输入值索引
                  } else {
                    mask[pool_index] = index;   // 位置记录在max_idx_索引中
                  }
                }
              }
            }
          }
        }
        // compute offset// compute offset 扫完一个通道后,跳转到下一幅图像
        bottom_data += bottom[0]->offset(0, 1);//bottom_data指向一个样本输入featuremap的下一个通道
        top_data += top[0]->offset(0, 1);
        if (use_top_mask) {
          top_mask += top[0]->offset(0, 1);
        } else {
          mask += top[0]->offset(0, 1);
        }
      }
    }
    break;
  case PoolingParameter_PoolMethod_AVE: //平均值采样
    for (int i = 0; i < top_count; ++i) {
      top_data[i] = 0;
    }
    // The main loop    // 循环遍历计算区域平均值
    for (int n = 0; n < bottom[0]->num(); ++n) {
      for (int c = 0; c < channels_; ++c) {
        for (int ph = 0; ph < pooled_height_; ++ph) {
          for (int pw = 0; pw < pooled_width_; ++pw) {
            int hstart = ph * stride_h_ - pad_h_;
            int wstart = pw * stride_w_ - pad_w_;
            int hend = min(hstart + kernel_h_, height_ + pad_h_);
            int wend = min(wstart + kernel_w_, width_ + pad_w_);
            int pool_size = (hend - hstart) * (wend - wstart);
            hstart = max(hstart, 0);
            wstart = max(wstart, 0);
            hend = min(hend, height_);
            wend = min(wend, width_);
            for (int h = hstart; h < hend; ++h) { // 核范围内算平均
              for (int w = wstart; w < wend; ++w) {
                top_data[ph * pooled_width_ + pw] +=
                    bottom_data[h * width_ + w];
              }
            }
            top_data[ph * pooled_width_ + pw] /= pool_size;
          }
        }
        // compute offset         // 加上偏移,跳转到下一幅图像
        bottom_data += bottom[0]->offset(0, 1);
        top_data += top[0]->offset(0, 1);
      }
    }
    break;
 // 随机采样尚未在CPU端实现
  case PoolingParameter_PoolMethod_STOCHASTIC:
    NOT_IMPLEMENTED;
    break;
  default:
    LOG(FATAL) << "Unknown pooling method.";
  }
}
// CPU反向传导
template <typename Dtype>
void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) {
    return;
  }
  const Dtype* top_diff = top[0]->cpu_diff();//拿到该层的输入梯度指针
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();//拿到要计算的传播到下一层的梯度指针
  // Different pooling methods. We explicitly do the switch outside the for
  // loop to save time, although this results in more codes.
  caffe_set(bottom[0]->count(), Dtype(0), bottom_diff);//清零
  // We'll output the mask to top[1] if it's of size >1.
  const bool use_top_mask = top.size() > 1;
  const int* mask = NULL;  // suppress warnings about uninitialized variables
  const Dtype* top_mask = NULL;
  switch (this->layer_param_.pooling_param().pool()) {
  case PoolingParameter_PoolMethod_MAX:
    // The main loop
    if (use_top_mask) {
      top_mask = top[1]->cpu_data();
    } else {
      mask = max_idx_.cpu_data();//max索引表征输入featuremap哪一个神经元贡献损失
    }
    for (int n = 0; n < top[0]->num(); ++n) {//batch_size数
      for (int c = 0; c < channels_; ++c) {//通道数
        for (int ph = 0; ph < pooled_height_; ++ph) {//定位顶层的输入梯度位置 2D
          for (int pw = 0; pw < pooled_width_; ++pw) {
            const int index = ph * pooled_width_ + pw;
            // 从采样点索引数组中取出反向传导的目的索引
            const int bottom_index =
                use_top_mask ? top_mask[index] : mask[index];
            bottom_diff[bottom_index] += top_diff[index];//计算梯度并更新原始bottom_diff
          }
        }
        bottom_diff += bottom[0]->offset(0, 1);
        top_diff += top[0]->offset(0, 1);
        if (use_top_mask) {
          top_mask += top[0]->offset(0, 1);
        } else {
          mask += top[0]->offset(0, 1);
        }
      }
    }
    break;
  case PoolingParameter_PoolMethod_AVE:
    // The main loop
    for (int n = 0; n < top[0]->num(); ++n) {
      for (int c = 0; c < channels_; ++c) {
        for (int ph = 0; ph < pooled_height_; ++ph) {
          for (int pw = 0; pw < pooled_width_; ++pw) {
            int hstart = ph * stride_h_ - pad_h_;
            int wstart = pw * stride_w_ - pad_w_;
            int hend = min(hstart + kernel_h_, height_ + pad_h_);
            int wend = min(wstart + kernel_w_, width_ + pad_w_);
            int pool_size = (hend - hstart) * (wend - wstart);
            hstart = max(hstart, 0);
            wstart = max(wstart, 0);
            hend = min(hend, height_);
            wend = min(wend, width_);
            for (int h = hstart; h < hend; ++h) {
              for (int w = wstart; w < wend; ++w) {
                // 将top偏导平均分配到bottom各点上
                bottom_diff[h * width_ + w] += //采样层输出的残传播给输入。由于是最大采样方法,输出存的都是输入范围内最大的值,所以残差传播的时候也只有范围内最大的值受影响
                  top_diff[ph * pooled_width_ + pw] / pool_size;
              }
            }
          }
        }
        // offset
        bottom_diff += bottom[0]->offset(0, 1);
        top_diff += top[0]->offset(0, 1);
      }
    }
    break;
  case PoolingParameter_PoolMethod_STOCHASTIC:  // 随机采样尚未在CPU端实现
    NOT_IMPLEMENTED;
    break;
  default:
    LOG(FATAL) << "Unknown pooling method.";
  }
}

// 如果CPU_ONLY模式则禁止Forward_gpu和Backward_gpu函数
#ifdef CPU_ONLY
STUB_GPU(PoolingLayer);
#endif

INSTANTIATE_CLASS(PoolingLayer);

}  // namespace caffe

参考资料

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

听雨听风眠

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值