Caffe源码:inner_product_layer.cpp

@brief: Caffe全连接层ip2–>ip1反向传播(不包含ReluLayer)


这里写图片描述
以LeNet全连接层反向传播为例,ip2–>ip1,如图示,输入神经元个数500,输出神经元个数10,batchsize为64,则要更新的权重矩阵为500x10。
对每一个输出神经元 yi=djwjixj+biasmultiplierbi,yiwji=xj , bi 为偏置项共10个, yibi=biasmultiplier=1caffe1
对权重矩阵求梯度核心代码:

//this->blobs_[0]->mutable_cpu_diff()=1.0*top_diff*bottom_data+1.0*this->blobs_[0]->mutable_cpu_diff()
//top_diff N_ x M_  bottom_data=M_ x K_ this->blobs_[0]->mutable_cpu_diff()=N_ x K_
// C=alpha*A*B+beta*C 
  caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, 
    N_, K_, M_,
  (Dtype)1., top_diff, bottom_data,
  (Dtype)1., this->blobs_[0]->mutable_cpu_diff());

top_diff为 yi,i(1,10) 的输入梯度 1064 ,bottom_data为 xdd(1,500) 的输入 (64500) ,this->blobs_[0]->mutable_cpu_diff()为要更新的梯度矩阵 W(10500)
采用矩阵形式运算,运算并更新
这里写图片描述

this->blobs_[0]->mutable_cpu_diff()=1.0*top_diff*bottom_data+1.0*this->blobs_[0]->mutable_cpu_diff()

对偏置项求梯度核心代码:

if (bias_term_ && this->param_propagate_down_[1]) {
    const Dtype* top_diff = top[0]->cpu_diff();
    // Gradient with respect to bias
    caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
        bias_multiplier_.cpu_data(), (Dtype)1.,
        this->blobs_[1]->mutable_cpu_diff());//this->blobs_[1]->mutable_cpu_diff() N_ =alpha*top_diff (N_ *  M_ ) *bias_multiplier_.cpu_data()  M_  +beta*this->blobs_[1]->mutable_cpu_diff()  N_
  }//偏置项是要学的 偏置项乘子设置为1 一个样本对应一个乘子

这里写图片描述

对输入求梯度核心代码:
根据链式法则 输入 xj 对输出 yi 都有贡献 所有 yi 的输入梯度都要传到 xj 上。

 if (propagate_down[0]) {
    const Dtype* top_diff = top[0]->cpu_diff();
    // Gradient with respect to bottom data
    if (transpose_) {
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans,
          M_, K_, N_,
          (Dtype)1., top_diff, this->blobs_[0]->cpu_data(),
          (Dtype)0., bottom[0]->mutable_cpu_diff());
    } else {
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          M_, K_, N_,
          (Dtype)1., top_diff, this->blobs_[0]->cpu_data(),
          (Dtype)0., bottom[0]->mutable_cpu_diff());
    }
  }

这里写图片描述

#include <vector>

#include "caffe/filler.hpp"
#include "caffe/layers/inner_product_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
void InnerProductLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const int num_output = this->layer_param_.inner_product_param().num_output();//内积输出个数
  bias_term_ = this->layer_param_.inner_product_param().bias_term();//默认为true
  transpose_ = this->layer_param_.inner_product_param().transpose();//默认为false
  N_ = num_output;//全连接层所具有的输出神经元数目
  const int axis = bottom[0]->CanonicalAxisIndex(
      this->layer_param_.inner_product_param().axis());
  // Dimensions starting from "axis" are "flattened" into a single
  // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W),
  // and axis == 1, N inner products with dimension CHW are performed.
  K_ = bottom[0]->count(axis);//如果是全连接层连到卷积层则是从axis向后的维度拉成一个向量 e.g ip1 64*50*4*4  则K_=50*4*4=800
                              //如果是全连接层连全连接层则是输入全连接层所有的输入神经元数目
  // Check if we need to set up the weights
  if (this->blobs_.size() > 0) {
    LOG(INFO) << "Skipping parameter initialization";
  } else {
    if (bias_term_) {
      this->blobs_.resize(2);
    } else {
      this->blobs_.resize(1);
    }
    // Initialize the weights
    vector<int> weight_shape(2);
    if (transpose_) {
      weight_shape[0] = K_;
      weight_shape[1] = N_;
    } else {
      weight_shape[0] = N_;
      weight_shape[1] = K_;
    }
    this->blobs_[0].reset(new Blob<Dtype>(weight_shape));
    // fill the weights
    shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
        this->layer_param_.inner_product_param().weight_filler()));
    weight_filler->Fill(this->blobs_[0].get());//对全连接层的权重矩阵初始化
    //debug info
    /*Blob<Dtype>* blobip = this->blobs_[0].get();
    Dtype* pointerip = blobip->mutable_cpu_data();
    for (int i = 0; i < 400000; i++)
        std::cout << "The num at " << i << " is " << *(pointerip + i);*/
    // If necessary, intiialize and fill the bias term
    if (bias_term_) {
      vector<int> bias_shape(1, N_);
      this->blobs_[1].reset(new Blob<Dtype>(bias_shape));
      shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
          this->layer_param_.inner_product_param().bias_filler()));
      bias_filler->Fill(this->blobs_[1].get());//对全连接层的偏置初始化
      debug info
      //Blob<Dtype>* blobip = this->blobs_[1].get();
      //Dtype* pointerip = blobip->mutable_cpu_data();
      //for (int i = 0; i < blobip->count(); i++)
      //std::cout << "The num at " << i << " is " << *(pointerip + i);
    }
  }  // parameter initialization
  this->param_propagate_down_.resize(this->blobs_.size(), true);
}

template <typename Dtype>
void InnerProductLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  // Figure out the dimensions
  const int axis = bottom[0]->CanonicalAxisIndex(
      this->layer_param_.inner_product_param().axis());
  const int new_K = bottom[0]->count(axis);
  CHECK_EQ(K_, new_K)
      << "Input size incompatible with inner product parameters.";
  // The first "axis" dimensions are independent inner products; the total
  // number of these is M_, the product over these dimensions.
  M_ = bottom[0]->count(0, axis);//对于全连接至卷积:输出featuremap(三维张量)的个数(样本个数)
                                 //FC->FC 样本个数
  // The top shape will be the bottom shape with the flattened axes dropped,
  // and replaced by a single axis with dimension num_output (N_).
  vector<int> top_shape = bottom[0]->shape();
  top_shape.resize(axis + 1);
  top_shape[axis] = N_;
  top[0]->Reshape(top_shape);
  // Set up the bias multiplier   
  if (bias_term_) {
    vector<int> bias_shape(1, M_);
    bias_multiplier_.Reshape(bias_shape);
    caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data());//设置偏置项乘子为1
  }
}

template <typename Dtype>
void InnerProductLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  const Dtype* weight = this->blobs_[0]->cpu_data();
  caffe_cpu_gemm<Dtype>(CblasNoTrans, transpose_ ? CblasNoTrans : CblasTrans,
      M_, N_, K_, (Dtype)1.,
      bottom_data, weight, (Dtype)0., top_data);//计算输出神经元的值
  if (bias_term_) {//将偏置项加入到输出神经元的值
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
        bias_multiplier_.cpu_data(),
        this->blobs_[1]->cpu_data(), (Dtype)1., top_data);
  }
}

template <typename Dtype>
void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (this->param_propagate_down_[0]) {
    const Dtype* top_diff = top[0]->cpu_diff();//拿到顶层梯度指针
    const Dtype* bottom_data = bottom[0]->cpu_data();//拿到输入神经元指针
    // Gradient with respect to weight
    if (transpose_) {
      caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans,
          K_, N_, M_,
          (Dtype)1., bottom_data, top_diff,
          (Dtype)1., this->blobs_[0]->mutable_cpu_diff());
    } else {
        //this->blobs_[0]->mutable_cpu_diff()=1.0*top_diff*bottom_data+1.0*this->blobs_[0]->mutable_cpu_diff()
        //top_diff N_ x M_  bottom_data=M_ x K_ this->blobs_[0]->mutable_cpu_diff()=N_ x K_
        // C=alpha*A*B+beta*C 
      caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, //计算梯度并更新权重矩阵  math_functions中是按照M、N、K来排列
          N_, K_, M_,
          (Dtype)1., top_diff, bottom_data,
          (Dtype)1., this->blobs_[0]->mutable_cpu_diff());//this->blobs_[0]->mutable_cpu_diff() 保存权重矩阵
    }
  }
  if (bias_term_ && this->param_propagate_down_[1]) {
    const Dtype* top_diff = top[0]->cpu_diff();
    // Gradient with respect to bias
    caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
        bias_multiplier_.cpu_data(), (Dtype)1.,
        this->blobs_[1]->mutable_cpu_diff());//this->blobs_[1]->mutable_cpu_diff() N_ =alpha*top_diff (N_ *  M_ ) *bias_multiplier_.cpu_data()  M_  +beta*this->blobs_[1]->mutable_cpu_diff()  N_
  }//偏置项是要学的 偏置项乘子设置为1 一个样本对应一个乘子 bias_multiplier_是一个值为1的单位向量
  //const Dtype* p = bias_multiplier_.cpu_data();
  //std::cout << *(p + 1);

  if (propagate_down[0]) {
    const Dtype* top_diff = top[0]->cpu_diff();
    // Gradient with respect to bottom data
    if (transpose_) {
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans,
          M_, K_, N_,
          (Dtype)1., top_diff, this->blobs_[0]->cpu_data(),
          (Dtype)0., bottom[0]->mutable_cpu_diff());
    } else {
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
          M_, K_, N_,//严格按照math_functions中的参数顺序来
          (Dtype)1., top_diff, this->blobs_[0]->cpu_data(),
          (Dtype)0., bottom[0]->mutable_cpu_diff());//bottom[0]->mutable_cpu_diff() M_ * K_ =top_diff  M_ * N_ *this->blobs_[0]->cpu_data() N_ * K_
    }
  }
}

#ifdef CPU_ONLY
STUB_GPU(InnerProductLayer);
#endif

INSTANTIATE_CLASS(InnerProductLayer);
REGISTER_LAYER_CLASS(InnerProduct);

}  // namespace caffe
  • 2
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值