Caffe源码中Pooling Layer文件分析

Caffe源码(caffe version commit: 09868ac , date: 2015.08.15)中有一些重要的头文件,这里介绍下include/caffe/vision_layers文件中PoolingLayer类,在最新版caffe中,PoolingLayer类被单独放在了include/caffe/layers/pooling_layer.hpp文件中,这两个文件中PoolingLayer类的内容及实现是完全一致的:

1.  include文件:

(1)、<caffe/blob.hpp>:此文件的介绍可以参考:http://blog.csdn.net/fengbingchun/article/details/59106613

(2)、<caffe/layer.hpp>:此文件的介绍可以参考:http://blog.csdn.net/fengbingchun/article/details/60871052

(3)、<caffe/proto/caffe.pb.h>:此文件的介绍可以参考:http://blog.csdn.net/fengbingchun/article/details/55267162

2.  类PoolingLayer:池化层,Layer类的子类

Pooling layer的主要作用是降维,缩小feature map,图像降采样,方法有:

(1)、均值采样:取区域平均值作为降采样值;

(2)、最大值采样:取区域最大值作为降采样值;

(3)、随机采样:取区域内随机一个像素值。

<caffe/layers/pooling_layer.hpp>文件的详细介绍如下:

#ifndef CAFFE_POOLING_LAYER_HPP_
#define CAFFE_POOLING_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {
/**
 * @brief Pools the input image by taking the max, average, etc. within regions.
 *
 * TODO(dox): thorough documentation for Forward, Backward, and proto params.
 */
// 池化层,Layer类的子类,图像降采样,有三种Pooling方法:Max、Avx、Stochastic
template <typename Dtype>
class PoolingLayer : public Layer<Dtype> {
 public:
// 显示构造函数
  explicit PoolingLayer(const LayerParameter& param) : Layer<Dtype>(param) {}
// 参数初始化,通过类PoolingParameter获取成员变量值,包括:
// global_pooling_、kernel_h_、kernel_w_、pad_h_、pad_w_、stride_h_、stride_w_
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
// 调整top blobs的shape,并有可能会reshape rand_idx_或max_idx_;
// 获取成员变量值,包括:channels_、height_、width_、pooled_height_、pooled_width_
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
// 获得Pooling layer的类型: Pooling
  virtual inline const char* type() const { return "Pooling"; }
// 获得Pooling layer所需的bottom blobs的个数: 1
  virtual inline int ExactNumBottomBlobs() const { return 1; }
// 获得Pooling layer所需的bottom blobs的最少个数: 1
  virtual inline int MinTopBlobs() const { return 1; }
  // MAX POOL layers can output an extra top blob for the mask;
  // others can only output the pooled inputs.
// 获得Pooling layer所需的bottom blobs的最多个数: Max为2,其它(Avg, Stochastic)为1
  virtual inline int MaxTopBlobs() const {
    return (this->layer_param_.pooling_param().pool() ==
            PoolingParameter_PoolMethod_MAX) ? 2 : 1;
  }

 protected:
// CPU实现Pooling layer的前向传播,仅有Max和Ave两种方法实现
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
// GPU实现Pooling layer的前向传播,Max、Ave、Stochastic三种方法实现
  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
// CPU实现Pooling layer的反向传播,仅有Max和Ave两种方法实现
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
// GPU实现Pooling layer的反向传播,Max、Ave、Stochastic三种方法实现
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

// Caffe中类的成员变量名都带有后缀"_",这样就容易区分临时变量和类成员变量
  int kernel_h_, kernel_w_; // 滤波器(卷积核)大小
  int stride_h_, stride_w_; // 步长大小
  int pad_h_, pad_w_; // 图像扩充大小
  int channels_; // 图像通道数
  int height_, width_; // 图像高、宽
// 池化后图像高、宽
// pooled_height_ = (height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1
// pooled_width_ = (width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1
  int pooled_height_, pooled_width_;
  bool global_pooling_; // 是否全区域池化(将整幅图像降采样为1*1)
  Blob<Dtype> rand_idx_; // 随机采样索引,Pooling方法为STOCHASTIC时用到并会Reshape
  Blob<int> max_idx_; // 最大值采样索引,Pooling方法为MAX时用到并会Reshape
};

}  // namespace caffe

#endif  // CAFFE_POOLING_LAYER_HPP_
在caffe.proto文件中,有一个message是与pooling layer 相关的,如下:
message PoolingParameter { // Pooling层参数类
  enum PoolMethod { // 枚举类型,Pooling的方法:Max(最大值采样)、AVE(均值采样)、STOCHASTIC(随机采样)
    MAX = 0;
    AVE = 1;
    STOCHASTIC = 2;
  }
  optional PoolMethod pool = 1 [default = MAX]; // The pooling method, pooling方法
  // Pad, kernel size, and stride are all given as a single value for equal
  // dimensions in height and width or as Y, X pairs.
  optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X),图像扩充大小(添加图像边界的像素大小)
  optional uint32 pad_h = 9 [default = 0]; // The padding height,图像扩充大小,Y
  optional uint32 pad_w = 10 [default = 0]; // The padding width,图像扩充大小,X
  optional uint32 kernel_size = 2; // The kernel size (square),滤波器(卷积核、滑动窗)的大小(高=宽)
  optional uint32 kernel_h = 5; // The kernel height,滤波器(卷积核、滑动窗)的高
  optional uint32 kernel_w = 6; // The kernel width,滤波器(卷积核、滑动窗)的宽
  optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X),滑动步长(高=宽),卷积核卷积时平移的步幅
  optional uint32 stride_h = 7; // The stride height,滑动步长,高
  optional uint32 stride_w = 8; // The stride width,滑动步长,宽
  enum Engine {
    DEFAULT = 0;
    CAFFE = 1;
    CUDNN = 2;
  }
  optional Engine engine = 11 [default = DEFAULT]; //
  // If global_pooling then it will pool over the size of the bottom by doing
  // kernel_h = bottom->height and kernel_w = bottom->width
  optional bool global_pooling = 12 [default = false]; // 是否是全区域池化
}
pooling layer的测试代码如下:

#include "funset.hpp"
#include <string>
#include <vector>
#include "common.hpp"

int test_caffe_layer_pooling()
{
	caffe::Caffe::set_mode(caffe::Caffe::CPU); // set run caffe mode

	// set layer parameter
	caffe::LayerParameter layer_param;
	layer_param.set_phase(caffe::Phase::TRAIN);

	// cv::Mat -> caffe::Blob
	std::string image_name = "E:/GitCode/Caffe_Test/test_data/images/a.jpg";
	cv::Mat mat1 = cv::imread(image_name, 1);
	if (!mat1.data) {
		fprintf(stderr, "read image fail: %s\n", image_name.c_str());
		return -1;
	}
	mat1.convertTo(mat1, CV_32FC3);
	std::vector<cv::Mat> mat2;
	cv::split(mat1, mat2);
	std::vector<int> mat_reshape{ 1, (int)mat2.size(), mat2[0].rows, mat2[0].cols };

	caffe::Blob<float> blob;
	blob.Reshape(mat_reshape);
	size_t size = mat2[0].rows * mat2[0].cols;
	float* data = new float[mat2.size() * size];
	memcpy(data, mat2[0].data, size * sizeof(float));
	memcpy(data + size, mat2[1].data, size * sizeof(float));
	memcpy(data + 2 * size, mat2[2].data, size * sizeof(float));
	blob.set_cpu_data(data);

	for (int method = 0; method < 2; ++method) {
		// set pooling parameter
		caffe::PoolingParameter* pooling_param = layer_param.mutable_pooling_param();
		if (method == 0) pooling_param->set_pool(caffe::PoolingParameter::MAX);
		else pooling_param->set_pool(caffe::PoolingParameter::AVE);
		pooling_param->set_kernel_size(3);
		pooling_param->set_pad(2);
		pooling_param->set_stride(2);
		pooling_param->set_global_pooling(false);

		std::vector<caffe::Blob<float>*> bottom_blob{ &blob }, top_blob{ &caffe::Blob<float>()/*, &caffe::Blob<float>() */ };

		// test PoolingLayer function
		caffe::PoolingLayer<float> pooling_layer(layer_param);
		pooling_layer.SetUp(bottom_blob, top_blob);
		fprintf(stderr, "top blob info: channels: %d, height: %d, width: %d\n",
			top_blob[0]->channels(), top_blob[0]->height(), top_blob[0]->width());

		pooling_layer.Forward(bottom_blob, top_blob);

		int height = top_blob[0]->height();
		int width = top_blob[0]->width();
		const float* p = top_blob[0]->cpu_data();
		std::vector<cv::Mat> mat3{ cv::Mat(height, width, CV_32FC1, (float*)p),
			cv::Mat(height, width, CV_32FC1, (float*)(p + height * width)),
			cv::Mat(height, width, CV_32FC1, (float*)(p + height * width * 2)) };
		cv::Mat mat4;
		cv::merge(mat3, mat4);
		mat4.convertTo(mat4, CV_8UC3);
		if (method == 0) image_name = "E:/GitCode/Caffe_Test/test_data/images/forward0.jpg";
		else image_name = "E:/GitCode/Caffe_Test/test_data/images/forward1.jpg";
		cv::imwrite(image_name, mat4);

		for (int i = 0; i < bottom_blob[0]->count(); ++i)
			bottom_blob[0]->mutable_cpu_diff()[i] = bottom_blob[0]->cpu_data()[i];
		for (int i = 0; i < top_blob[0]->count(); ++i)
			top_blob[0]->mutable_cpu_diff()[i] = top_blob[0]->cpu_data()[i];

		std::vector<bool> propagate_down{ true };
		pooling_layer.Backward(top_blob, propagate_down, bottom_blob);

		height = bottom_blob[0]->height();
		width = bottom_blob[0]->width();
		p = bottom_blob[0]->cpu_diff();
		std::vector<cv::Mat> mat5{ cv::Mat(height, width, CV_32FC1, (float*)p),
			cv::Mat(height, width, CV_32FC1, (float*)(p + height * width)),
			cv::Mat(height, width, CV_32FC1, (float*)(p + height * width * 2)) };
		cv::Mat mat6;
		cv::merge(mat5, mat6);
		mat6.convertTo(mat6, CV_8UC3);
		if (method == 0) image_name = "E:/GitCode/Caffe_Test/test_data/images/backward0.jpg";
		else image_name = "E:/GitCode/Caffe_Test/test_data/images/backward1.jpg";
		cv::imwrite(image_name, mat6);
	}

	delete[] data;
	return 0;
}
执行结果如下图:


图像结果(Lena.jpg)如下:前向传播、反向传播,前两幅为Max,后两幅为Avg方法



GitHubhttps://github.com/fengbingchun/Caffe_Test

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值