caffe源码 之 池化层

本文主要解析caffe源码文件/src/caffe/layers/Pooling_layer,该文件主要实现卷积神经网络的池化层。

综述:::
在传入的训练图像较大时,如果所有的图像特征拿来做训练会导致计算量太大。而且容易出现过拟合,因为图像所有的特征用来训练的话会让最终的参数过于匹配现有的图像,池化能降低图像的一些细节特征对最终参数的影响。又因为图像总是具有局部拟合性, 所以caffe中实现在每个卷积层之间加入了pooling层。所谓的池化就是一种图像降采样,最常用的降采样方法有均值采样(取区域平均值作为降采样值)、最大值采样(取区域最大值作为降采样值)和随机采样(取区域内随机一个像素)等。

具体池化层如何实现,见下述代码注释::::

Pooling_layer.hpp:::::

#ifndef CAFFE_POOLING_LAYER_HPP_
#define CAFFE_POOLING_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {

/**
 * @brief Pools the input image by taking the max, average, etc. within regions.
 *
 * TODO(dox): thorough documentation for Forward, Backward, and proto params.
 */
template <typename Dtype>
class PoolingLayer : public Layer<Dtype> {
 public:
    /*构造函数*/
  explicit PoolingLayer(const LayerParameter& param)
      : Layer<Dtype>(param) {}
    /*层设置函数*/
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
    /*blob形状形状reshape函数*/
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
    /*返回当前层的类型*/
  virtual inline const char* type() const { return "Pooling"; }
    /*返回输入层的blob个数*/
  virtual inline int ExactNumBottomBlobs() const { return 1; }
    /*返回输入层的最少的blob个数*/
  virtual inline int MinTopBlobs() const { return 1; }
  // MAX POOL layers can output an extra top blob for the mask;
  // others can only output the pooled inputs.
  /*因为最大值采样可以额外输出一个掩码的Blob, 所以要多返回一个blob*/
  virtual inline int MaxTopBlobs() const {
    return (this->layer_param_.pooling_param().pool() ==
            PoolingParameter_PoolMethod_MAX) ? 2 : 1;
  }

 protected:
    /*cpu运行的pooling层的前向传播函数*/
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
    /*gpu运行的pooling层的前向传播函数*/
  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
    /*cpu运行的pooling层的后向传播函数*/
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
    /*gpu运行的pooling层的后向传播函数*/
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

  /*卷积核宽高*/
  int kernel_h_, kernel_w_;
  /*卷积核的平移步幅*/
  int stride_h_, stride_w_;
  /*图像的扩边的*/
  int pad_h_, pad_w_;
  /*图像的通道数*/
  int channels_;
  /*输入图像尺寸*/
  int height_, width_;
  /*池化后图像尺寸*/
  int pooled_height_, pooled_width_;
  /*是否全区域池化(将整幅图像降采样为1x1)  */
  bool global_pooling_;
  /*随机采样点索引*/
  Blob<Dtype> rand_idx_;
  /*最大值采样点索引*/  
  Blob<int> max_idx_;
};

}  // namespace caffe

#endif  // CAFFE_POOLING_LAYER_HPP_

Pooling_layer.cpp::::

#include <algorithm>
#include <cfloat>
#include <vector>

#include "caffe/layers/pooling_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

using std::min;
using std::max;

template <typename Dtype>
void PoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
      /*将solover.prototxt中的pooling层参数传入*/
  PoolingParameter pool_param = this->layer_param_.pooling_param();
      /*入参检查*/
  if (pool_param.global_pooling()) {
    CHECK(!(pool_param.has_kernel_size() ||
      pool_param.has_kernel_h() || pool_param.has_kernel_w()))
      << "With Global_pooling: true Filter size cannot specified";
  } else {
    CHECK(!pool_param.has_kernel_size() !=
      !(pool_param.has_kernel_h() && pool_param.has_kernel_w()))
      << "Filter size is kernel_size OR kernel_h and kernel_w; not both";
    CHECK(pool_param.has_kernel_size() ||
      (pool_param.has_kernel_h() && pool_param.has_kernel_w()))
      << "For non-square filters both kernel_h and kernel_w are required.";
  }
  CHECK((!pool_param.has_pad() && pool_param.has_pad_h()
      && pool_param.has_pad_w())
      || (!pool_param.has_pad_h() && !pool_param.has_pad_w()))
      << "pad is pad OR pad_h and pad_w are required.";
  CHECK((!pool_param.has_stride() && pool_param.has_stride_h()
      && pool_param.has_stride_w())
      || (!pool_param.has_stride_h() && !pool_param.has_stride_w()))
      << "Stride is stride OR stride_h and stride_w are required.";
      /*传入参数检查*/
  global_pooling_ = pool_param.global_pooling();  /*判断是否全区域池化*/
  if (global_pooling_) {  //如果全区域池化,则核的宽高等于输入图像宽高
    kernel_h_ = bottom[0]->height();
    kernel_w_ = bottom[0]->width();
  } else {
    if (pool_param.has_kernel_size()) {
      kernel_h_ = kernel_w_ = pool_param.kernel_size();   /*将核宽高赋值给核宽高*/
    } else {
      kernel_h_ = pool_param.kernel_h();
      kernel_w_ = pool_param.kernel_w();
    }
  }
  CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero.";
  CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero.";
  if (!pool_param.has_pad_h()) {
    pad_h_ = pad_w_ = pool_param.pad();
  } else {
    pad_h_ = pool_param.pad_h();   //设置扩边的宽高
    pad_w_ = pool_param.pad_w();
  }
  if (!pool_param.has_stride_h()) {
    stride_h_ = stride_w_ = pool_param.stride();  //设置卷积核卷积时平移的步幅
  } else {
    stride_h_ = pool_param.stride_h();
    stride_w_ = pool_param.stride_w();
  }
  /*如果全区域卷积时,要满足下述条件*/
  if (global_pooling_) {
    CHECK(pad_h_ == 0 && pad_w_ == 0 && stride_h_ == 1 && stride_w_ == 1)
      << "With Global_pooling: true; only pad = 0 and stride = 1";
  }
  if (pad_h_ != 0 || pad_w_ != 0) {
    CHECK(this->layer_param_.pooling_param().pool()
        == PoolingParameter_PoolMethod_AVE
        || this->layer_param_.pooling_param().pool()
        == PoolingParameter_PoolMethod_MAX)
        << "Padding implemented only for average and max pooling.";
    CHECK_LT(pad_h_, kernel_h_);
    CHECK_LT(pad_w_, kernel_w_);
  }
}

template <typename Dtype>
void PoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
      /*检查输入图像的blob轴的个数, (num, channels, height, width)表示图像有这4个轴*/
  CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
      << "corresponding to (num, channels, height, width)";
  channels_ = bottom[0]->channels();  /*获得图像通道数*/
  height_ = bottom[0]->height(); /*获得图像高*/
  width_ = bottom[0]->width();   /*获得图像宽*/
  if (global_pooling_) {
    kernel_h_ = bottom[0]->height();
    kernel_w_ = bottom[0]->width();
  }
  /*计算图像池化后的宽高*/
  pooled_height_ = static_cast<int>(ceil(static_cast<float>(
      height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;
  pooled_width_ = static_cast<int>(ceil(static_cast<float>(
      width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
  /*如果有扩边,则需要确保不发生越界,否则不做最后一个采样点  */
  if (pad_h_ || pad_w_) {
    // If we have padding, ensure that the last pooling starts strictly
    // inside the image (instead of at the padding); otherwise clip the last.
    if ((pooled_height_ - 1) * stride_h_ >= height_ + pad_h_) {
      --pooled_height_;
    }
    if ((pooled_width_ - 1) * stride_w_ >= width_ + pad_w_) {
      --pooled_width_;
    }
    CHECK_LT((pooled_height_ - 1) * stride_h_, height_ + pad_h_);
    CHECK_LT((pooled_width_ - 1) * stride_w_, width_ + pad_w_);
  }
  //用计算得到的池化后的宽高来reshape输入的图像blob
  top[0]->Reshape(bottom[0]->num(), channels_, pooled_height_,
      pooled_width_);
  /*如果输出个数大于1,则top[1]用top[0]来reshape*/
  if (top.size() > 1) {
    top[1]->ReshapeLike(*top[0]);
  }
  // If max pooling, we will initialize the vector index part.
  // 如果是最大值采样,则初始化最大值采样点索引  
  if (this->layer_param_.pooling_param().pool() ==
      PoolingParameter_PoolMethod_MAX && top.size() == 1) {
    max_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
        pooled_width_);
  }
  // If stochastic pooling, we will initialize the random index part.
  // 如果是随机采样,则初始化随机采样点索引  
  if (this->layer_param_.pooling_param().pool() ==
      PoolingParameter_PoolMethod_STOCHASTIC) {
    rand_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
      pooled_width_);
  }
}

// TODO(Yangqing): Is there a faster way to do pooling in the channel-first
// case?
// CPU正向传播函数  
// 
template <typename Dtype>
void PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  const int top_count = top[0]->count();
  // We'll output the mask to top[1] if it's of size >1.
  // 如果top.size() > 1,则额外输出一个mask的Blob到top[1]  
  const bool use_top_mask = top.size() > 1;
  int* mask = NULL;  // suppress warnings about uninitalized variables
  Dtype* top_mask = NULL;
  // Different pooling methods. We explicitly do the switch outside the for
  // loop to save time, although this results in more code.
  // 上面这段注释的翻译:::针对不同的池化方法,将switch放到for循环的外面
  switch (this->layer_param_.pooling_param().pool()) { // switch不同的池化方法:最大池化,均值池化
  case PoolingParameter_PoolMethod_MAX:  //最大值池化
    // Initialize
    if (use_top_mask) {
      top_mask = top[1]->mutable_cpu_data();
      caffe_set(top_count, Dtype(-1), top_mask);
    } else {
      mask = max_idx_.mutable_cpu_data();
      caffe_set(top_count, -1, mask);
    }
    caffe_set(top_count, Dtype(-FLT_MAX), top_data);
    // The main loop
    // 循环寻找最大值
    for (int n = 0; n < bottom[0]->num(); ++n) {
      for (int c = 0; c < channels_; ++c) {
        for (int ph = 0; ph < pooled_height_; ++ph) {
          for (int pw = 0; pw < pooled_width_; ++pw) {
            int hstart = ph * stride_h_ - pad_h_;
            int wstart = pw * stride_w_ - pad_w_;
            int hend = min(hstart + kernel_h_, height_);
            int wend = min(wstart + kernel_w_, width_);
            hstart = max(hstart, 0);
            wstart = max(wstart, 0);
            const int pool_index = ph * pooled_width_ + pw;
            for (int h = hstart; h < hend; ++h) {
              for (int w = wstart; w < wend; ++w) {
                const int index = h * width_ + w;
                if (bottom_data[index] > top_data[pool_index]) {
                  top_data[pool_index] = bottom_data[index];  //将输入图像的核中max像素的值赋值给输出图像相应位置
                  if (use_top_mask) {
                    //max像素位置记录在top_mask中
                    top_mask[pool_index] = static_cast<Dtype>(index);
                  } else {
                    // max像素位置记录在max_idx_ 
                    mask[pool_index] = index;
                  }
                }
              }
            }
          }
        }
        // compute offset
        // 加上偏移,跳转到下一幅图像  
        bottom_data += bottom[0]->offset(0, 1);
        top_data += top[0]->offset(0, 1);
        if (use_top_mask) {
          top_mask += top[0]->offset(0, 1);
        } else {
          mask += top[0]->offset(0, 1);
        }
      }
    }
    break;
  case PoolingParameter_PoolMethod_AVE:   //均值池化
    for (int i = 0; i < top_count; ++i) {
      top_data[i] = 0;   //初始化
    }
    // The main loop
    for (int n = 0; n < bottom[0]->num(); ++n) {
      for (int c = 0; c < channels_; ++c) {
        for (int ph = 0; ph < pooled_height_; ++ph) {
          for (int pw = 0; pw < pooled_width_; ++pw) {
            int hstart = ph * stride_h_ - pad_h_;
            int wstart = pw * stride_w_ - pad_w_;
            int hend = min(hstart + kernel_h_, height_ + pad_h_);
            int wend = min(wstart + kernel_w_, width_ + pad_w_);
            int pool_size = (hend - hstart) * (wend - wstart);
            hstart = max(hstart, 0);
            wstart = max(wstart, 0);
            hend = min(hend, height_);
            wend = min(wend, width_);
            for (int h = hstart; h < hend; ++h) {
              for (int w = wstart; w < wend; ++w) {
                top_data[ph * pooled_width_ + pw] +=   //先把输入图像中的核内元素累加
                    bottom_data[h * width_ + w];
              }
            }
            top_data[ph * pooled_width_ + pw] /= pool_size;  //计算得到均值并赋值给输出图像
          }
        }
        // compute offset
        // 加上偏移,跳转到下一幅图像  
        bottom_data += bottom[0]->offset(0, 1);
        top_data += top[0]->offset(0, 1);
      }
    }
    break;
  case PoolingParameter_PoolMethod_STOCHASTIC:  //随机值池化,未实现
    NOT_IMPLEMENTED;
    break;
  default:     //未定义池化方法
    LOG(FATAL) << "Unknown pooling method.";
  }
}

// cpu反向传播
template <typename Dtype>
void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) {
    return;
  }
  const Dtype* top_diff = top[0]->cpu_diff();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  // Different pooling methods. We explicitly do the switch outside the for
  // loop to save time, although this results in more codes.
  caffe_set(bottom[0]->count(), Dtype(0), bottom_diff);
  // We'll output the mask to top[1] if it's of size >1.
  const bool use_top_mask = top.size() > 1;
  const int* mask = NULL;  // suppress warnings about uninitialized variables
  const Dtype* top_mask = NULL;
  // switch放到外面,提高性能
  switch (this->layer_param_.pooling_param().pool()) {
  case PoolingParameter_PoolMethod_MAX:  //如果是最大值池化的情况
    // The main loop
    if (use_top_mask) {
      top_mask = top[1]->cpu_data();  //与前向传播差不多
    } else {
      mask = max_idx_.cpu_data();
    }
    for (int n = 0; n < top[0]->num(); ++n) {
      for (int c = 0; c < channels_; ++c) {
        for (int ph = 0; ph < pooled_height_; ++ph) {
          for (int pw = 0; pw < pooled_width_; ++pw) {
            const int index = ph * pooled_width_ + pw;
            const int bottom_index =
                use_top_mask ? top_mask[index] : mask[index];
            bottom_diff[bottom_index] += top_diff[index];  //返回传播的是数据的偏差(导数)
          }
        }
        bottom_diff += bottom[0]->offset(0, 1);
        top_diff += top[0]->offset(0, 1);
        if (use_top_mask) {
          top_mask += top[0]->offset(0, 1);
        } else {
          mask += top[0]->offset(0, 1);
        }
      }
    }
    break;
  case PoolingParameter_PoolMethod_AVE:    //均值池化的情况 
    // The main loop
    for (int n = 0; n < top[0]->num(); ++n) {
      for (int c = 0; c < channels_; ++c) {
        for (int ph = 0; ph < pooled_height_; ++ph) {
          for (int pw = 0; pw < pooled_width_; ++pw) {
            int hstart = ph * stride_h_ - pad_h_;
            int wstart = pw * stride_w_ - pad_w_;
            int hend = min(hstart + kernel_h_, height_ + pad_h_);
            int wend = min(wstart + kernel_w_, width_ + pad_w_);
            int pool_size = (hend - hstart) * (wend - wstart);
            hstart = max(hstart, 0);
            wstart = max(wstart, 0);
            hend = min(hend, height_);
            wend = min(wend, width_);
            for (int h = hstart; h < hend; ++h) {
              for (int w = wstart; w < wend; ++w) {
                bottom_diff[h * width_ + w] +=
                  top_diff[ph * pooled_width_ + pw] / pool_size; //返回传播的是数据的偏差(导数)的均值
              }
            }
          }
        }
        // offset
        bottom_diff += bottom[0]->offset(0, 1);
        top_diff += top[0]->offset(0, 1);
      }
    }
    break;
  case PoolingParameter_PoolMethod_STOCHASTIC:  //随机的未实现
    NOT_IMPLEMENTED;
    break;
  default:
    LOG(FATAL) << "Unknown pooling method.";
  }
}


#ifdef CPU_ONLY
STUB_GPU(PoolingLayer);
#endif

INSTANTIATE_CLASS(PoolingLayer);

}  // namespace caffe
  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
【为什么要学习这门课程】深度学习框架如TensorFlow和Pytorch掩盖了深度学习底层实现方法,那能否能用Python代码从零实现来学习深度学习原理呢?本课程就为大家提供了这个可能,有助于深刻理解深度学习原理。左手原理、右手代码,双管齐下!本课程详细讲解深度学习原理并进行Python代码实现深度学习网络。课程内容涵盖感知机、多层感知机、卷积神经网络、循环神经网络,并使用Python 3及Numpy、Matplotlib从零实现上述神经网络。本课程还讲述了神经网络的训练方法与实践技巧,且开展了代码实践演示。课程对于核心内容讲解深入细致,如基于计算图理解反向传播算法,并用数学公式推导反向传播算法;另外还讲述了卷积加速方法im2col。【课程收获】本课程力求使学员通过深度学习原理、算法公式及Python代码的对照学习,摆脱框架而掌握深度学习底层实现原理与方法。本课程将给学员分享深度学习的Python实现代码。课程代码通过Jupyter Notebook演示,可在Windows、ubuntu等系统上运行,且不需GPU支持。【优惠说明】 课程正在优惠中!  备注:购课后可加入白勇老师课程学习交流QQ群:957519975【相关课程】学习本课程的前提是会使用Python语言以及Numpy和Matplotlib库。相关课程链接如下:《Python编程的术与道:Python语言入门》https://edu.csdn.net/course/detail/27845《玩转Numpy计算库》https://edu.csdn.net/lecturer/board/28656《玩转Matplotlib数据绘图库》https://edu.csdn.net/lecturer/board/28720【课程内容导图及特色】

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值