Caffe源码解读：conv_layer的前向传播与反向传播

最新推荐文章于 2021-08-11 14:12:00 发布

faithenXX

最新推荐文章于 2021-08-11 14:12:00 发布

阅读量714

点赞数

分类专栏： caffe

本文链接：https://blog.csdn.net/zyf19930610/article/details/71429885

版权

caffe 专栏收录该内容

14 篇文章 0 订阅

订阅专栏

正向传播原理请见： http://blog.csdn.net/xg123321123/article/details/53319080

误差反向传播原理请见：https://zhuanlan.zhihu.com/p/22860936

下面直接上conv_layer.cpp代码:

//前向传播
template <typename Dtype>
void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  //blobs_[0]保存权值, blobs_[1]保存偏置
  const Dtype* weight = this->blobs_[0]->cpu_data();
  //bottom.size()是bottom中blob的数量，等于top中blob的数量
  for (int i = 0; i < bottom.size(); ++i) {
    //获取输入，输出数据指针
    const Dtype* bottom_data = bottom[i]->cpu_data();
    Dtype* top_data = top[i]->mutable_cpu_data();
	//第n张图片
    for (int n = 0; n < this->num_; ++n) {
      //卷积操作，采用矩阵乘积实现
      this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight,
          top_data + n * this->top_dim_);
      if (this->bias_term_) {
        const Dtype* bias = this->blobs_[1]->cpu_data();
		//加上偏置
        this->forward_cpu_bias(top_data + n * this->top_dim_, bias);
      }
    }
  }
}
//反向传播
template <typename Dtype>
void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down/*是否反传*/, const vector<Blob<Dtype>*>& bottom) {
  const Dtype* weight = this->blobs_[0]->cpu_data();
  Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
  for (int i = 0; i < top.size(); ++i) {
	//上一层传下来的导数
    const Dtype* top_diff = top[i]->cpu_diff();
    const Dtype* bottom_data = bottom[i]->cpu_data();
	//传给下一层的导数
    Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
    // Bias gradient, if necessary.
	// 更新偏置，直接加上残差(每个偏置所对应的图内所有残差之和)
    if (this->bias_term_ && this->param_propagate_down_[1]) {
      Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff();
      for (int n = 0; n < this->num_; ++n) {
        this->backward_cpu_bias(bias_diff, top_diff + n * this->top_dim_);
      }
    }
    if (this->param_propagate_down_[0] || propagate_down[i]) {
      for (int n = 0; n < this->num_; ++n) {
        // gradient w.r.t. weight. Note that we will accumulate diffs.
		// 对weight 计算导数（用来更新weight）
        // /将下一层残差与weight进行相关计算，得到卷积层的残差
        if (this->param_propagate_down_[0]) {
          this->weight_cpu_gemm(bottom_data + n * this->bottom_dim_,
              top_diff + n * this->top_dim_, weight_diff);
        }
        // gradient w.r.t. bottom data, if necessary.
		// 对bottom数据计算导数（传给下一层）
		// bottom_data与top_diff做相关计算，得到w权值更新量
        if (propagate_down[i]) {
          this->backward_cpu_gemm(top_diff + n * this->top_dim_, weight,
              bottom_diff + n * this->bottom_dim_);
        }
      }
    }
  }
}

//卷积操作
//用矩阵乘法实现的
template <typename Dtype>
void BaseConvolutionLayer<Dtype>::forward_cpu_gemm(const Dtype* input,
    const Dtype* weights, Dtype* output, bool skip_im2col) {
  const Dtype* col_buff = input;
  if (!is_1x1_) {
    if (!skip_im2col) {
	  // 如果没有1x1卷积，也没有skip_im2col  
      // 则使用conv_im2col_cpu对使用卷积核滑动过程中的每一个kernel大小的图像块
      // 变成一个列向量，其中height=kernel_dim_  
      // width = 卷积后图像heght*卷积后图像width  
      conv_im2col_cpu(input, col_buffer_.mutable_cpu_data());
    }
    col_buff = col_buffer_.cpu_data();
  }
  //使用caffe的cpu_gemm(调用cblas的矩阵乘法)来进行计算  
  for (int g = 0; g < group_; ++g) {
	// 分组分别进行计算  
    // conv_out_channels_ / group_是每个卷积组的输出的channel  
    // kernel_dim_ = input channels per-group x kernel height x kernel width  
    // 计算的是output[output_offset_ * g] =  
    // weights[weight_offset_ * g] X col_buff[col_offset_ * g]  
    // weights的形状是 [conv_out_channel x kernel_dim_]  
    // col_buff的形状是[kernel_dim_ x (卷积后图像高度乘以卷积后图像宽度)]  
    // 所以output的形状自然就是conv_out_channel X (卷积后图像高度乘以卷积后图像宽度)
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, conv_out_channels_ /
        group_, conv_out_spatial_dim_, kernel_dim_,
        (Dtype)1., weights + weight_offset_ * g, col_buff + col_offset_ * g,
        (Dtype)0., output + output_offset_ * g);
  }
}