caffe架构中conv和pooling层输出尺寸的计算方法
conv层计算方法
Outputsize=(int)[(Inputsize+Padding∗2−Kernelsize)/Stride]+1
注意: 根据caffe源码是对"int"型变量进行的操作,因此结果输出也是"int"型,而int型变量是相当于“floor"操作,可以理解为向下取整。
pooling层计算方法
Outputsize=Ceil[(Inputsize+Padding∗2−Kernelsize)/Stride]+1
注意:是对向上取整。
caffe源码
#conv层
// ./src/caffe/layers/conv_layer.cpp
void ConvolutionLayer<Dtype>::compute_output_shape() {
const int* kernel_shape_data = this->kernel_shape_.cpu_data();
const int* stride_data = this->stride_.cpu_data();
const int* pad_data = this->pad_.cpu_data();
const int* dilation_data = this->dilation_.cpu_data();
this->output_shape_.clear();
for (int i = 0; i < this->num_spatial_axes_; ++i) {
// i + 1 to skip channel axis
const int input_dim = this->input_shape(i + 1);
const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
/* 计算输出尺寸 */
const int output_dim = (input_dim + 2 * pad_data[i] - kernel_extent) / stride_data[i] + 1;
this->output_shape_.push_back(output_dim);
}
}
#poooling层
// ./src/caffe/layers/pooling_layer.cpp
void PoolingLayer<dtype>::Reshape(const vector<blob<dtype>*>& bottom, const vector<blob<dtype>*>& top) {
/*检查输入图像的blob轴的个数, (num, channels, height, width)表示图像有这4个轴*/
CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " << "corresponding to (num, channels, height, width)";
channels_ = bottom[0]->channels(); /*获得图像通道数*/
height_ = bottom[0]->height(); /*获得图像高*/
width_ = bottom[0]->width(); /*获得图像宽*/
if (global_pooling_) {
kernel_h_ = bottom[0]->height();
kernel_w_ = bottom[0]->width();
}
/*计算图像池化后的宽高: ceil */
pooled_height_ = static_cast<int>(ceil(static_cast<float>(height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;
pooled_width_ = static_cast<int>(ceil(static_cast<float>(width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
...
}