caffe源码阅读4-layer.hpp

最新推荐文章于 2020-04-23 18:06:04 发布

thy_2014

最新推荐文章于 2020-04-23 18:06:04 发布

阅读量2.1k

点赞数

分类专栏：深度学习

本文链接：https://blog.csdn.net/thy_2014/article/details/51943159

版权

深度学习专栏收录该内容

21 篇文章 0 订阅

订阅专栏

An interface for the units of computation which can be composed into a Net.

Layer&s must implement a Forward function, in which they take their input (bottom) Blob&s (if any) and compute their output Blob&s (if any). They may also implement a Backward function, in which they compute the error gradients with respect to their input Blob&s, given the error gradients with their output Blob&s.

按照我们对一般卷积神经网络的模型来理解：一个网络(net)包含很多层(layer)，而每层里面的东西无外乎数据(前馈的数据和反馈的误差)，这些数据在caffe里面已经用blob类来实现了，那么应该可以想到layer中应该包含了很多blob类，或者说是一个blob类型的vector。

1 layer中的数据有哪些？

 protected:
 /** The protobuf that stores the layer parameters */
  LayerParameter layer_param_;
  /** The vector that stores the learnable parameters as a set of blobs. */
  vector<shared_ptr<Blob<Dtype> > > blobs_;
  /** Vector indicating whether to compute the diff of each param blob. */
  vector<bool> param_propagate_down_;

  /** The vector that indicates whether each top blob has a non-zero weight in
   *  the objective function. */
  vector<Dtype> loss_;

在源码中可以看到，确实有一个vector来存储很多blob的变量blobs_；

param_propagate_down_在注释中其实说明了，指示本层中的blob是否需要计算diff；为什么会需要这个东西呢？这里就需要再次强调一下了：net > layer > blob。而一个layer中可能包含多个blob，例如有多个bottom, 多个top。

那么loss_是干嘛的呢？暂时也看不明白。

至于LayerParameter定义在caffe.proto中：

message LayerParameter {
  repeated string bottom = 2; // the name of the bottom blobs
  repeated string top = 3; // the name of the top blobs
  optional string name = 4; // the layer name

  // Rules controlling whether and when a layer is included in the network,
  // based on the current NetState.  You may specify a non-zero number of rules
  // to include OR exclude, but not both.  If no include or exclude rules are
  // specified, the layer is always included.  If the current NetState meets
  // ANY (i.e., one or more) of the specified rules, the layer is
  // included/excluded.
  repeated NetStateRule include = 32;
  repeated NetStateRule exclude = 33;

  // NOTE
  // Add new LayerTypes to the enum below in lexicographical order (other than
  // starting with NONE), starting with the next available ID in the comment
  // line above the enum. Update the next available ID when you add a new
  // LayerType.
  //
  // LayerType next available ID: 38 (last added: CONTRASTIVE_LOSS)
  enum LayerType {
    // "NONE" layer type is 0th enum element so that we don't cause confusion
    // by defaulting to an existent LayerType (instead, should usually error if
    // the type is unspecified).
    NONE = 0;
    ABSVAL = 35;
    ACCURACY = 1;
    ARGMAX = 30;
    BNLL = 2;
    CONCAT = 3;
    CONTRASTIVE_LOSS = 37;
    CONVOLUTION = 4;
    DATA = 5;
    DROPOUT = 6;
    DUMMY_DATA = 32;
    EUCLIDEAN_LOSS = 7;
    ELTWISE = 25;
    FLATTEN = 8;
    HDF5_DATA = 9;
    HDF5_OUTPUT = 10;
    HINGE_LOSS = 28;
    IM2COL = 11;
    IMAGE_DATA = 12;
    INFOGAIN_LOSS = 13;
    INNER_PRODUCT = 14;
    LRN = 15;
    MEMORY_DATA = 29;
    MULTINOMIAL_LOGISTIC_LOSS = 16;
    MVN = 34;
    POOLING = 17;
    POWER = 26;
    RELU = 18;
    SIGMOID = 19;
    SIGMOID_CROSS_ENTROPY_LOSS = 27;
    SILENCE = 36;
    SOFTMAX = 20;
    SOFTMAX_LOSS = 21;
    SPLIT = 22;
    SLICE = 33;
    TANH = 23;
    WINDOW_DATA = 24;
    THRESHOLD = 31;
  }
  optional LayerType type = 5; // the layer type from the enum above

  // The blobs containing the numeric parameters of the layer
  repeated BlobProto blobs = 6;
  // The names of the parameter blobs -- useful for sharing parameters among
  // layers (but never required).
  repeated string param = 1001;
  // Whether to require shared weights to have the same shape, or just the same
  // count -- defaults to STRICT if unspecified.
  repeated DimCheckMode blob_share_mode = 1002;
  enum DimCheckMode {
  // Neil: Disabled for windows
  //  // STRICT (default) requires that num, channels, height, width each match.
  //  STRICT = 0;
    // PERMISSIVE requires only the count (num*channels*height*width) to match.
    PERMISSIVE = 1;
  }
  // The ratio that is multiplied on the global learning rate. If you want to
  // set the learning ratio for one blob, you need to set it for all blobs.
  repeated float blobs_lr = 7;
  // The weight decay that is multiplied on the global weight decay.
  repeated float weight_decay = 8;

  // The amount of weight to assign each top blob in the objective.
  // Each layer assigns a default value, usually of either 0 or 1,
  // to each top blob.
  repeated float loss_weight = 35;

  optional AccuracyParameter accuracy_param = 27;
  optional ArgMaxParameter argmax_param = 23;
  optional ConcatParameter concat_param = 9;
  optional ContrastiveLossParameter contrastive_loss_param = 40;
  optional ConvolutionParameter convolution_param = 10;
  optional DataParameter data_param = 11;
  optional DropoutParameter dropout_param = 12;
  optional DummyDataParameter dummy_data_param = 26;
  optional EltwiseParameter eltwise_param = 24;
  optional HDF5DataParameter hdf5_data_param = 13;
  optional HDF5OutputParameter hdf5_output_param = 14;
  optional HingeLossParameter hinge_loss_param = 29;
  optional ImageDataParameter image_data_param = 15;
  optional InfogainLossParameter infogain_loss_param = 16;
  optional InnerProductParameter inner_product_param = 17;
  optional LRNParameter lrn_param = 18;
  optional MemoryDataParameter memory_data_param = 22;
  optional MVNParameter mvn_param = 34;
  optional PoolingParameter pooling_param = 19;
  optional PowerParameter power_param = 21;
  optional ReLUParameter relu_param = 30;
  optional SigmoidParameter sigmoid_param = 38;
  optional SoftmaxParameter softmax_param = 39;
  optional SliceParameter slice_param = 31;
  optional TanHParameter tanh_param = 37;
  optional ThresholdParameter threshold_param = 25;
  optional WindowDataParameter window_data_param = 20;

  // Parameters for data pre-processing.
  optional TransformationParameter transform_param = 36;

  // Note: certain layers may have more than one computational engine
  // for their implementation. These layers include an Engine type and
  // engine parameter for selecting the implementation.
  // The default for the engine is set by the ENGINE switch at compile-time.

  // DEPRECATED: The layer parameters specified as a V0LayerParameter.
  // This should never be used by any code except to upgrade to the new
  // LayerParameter specification.
  optional V0LayerParameter layer = 1;
}

相当于是对层中各个参数的相信说明，例如该层的名字，该层的上一层下一层是什么，以及学习率等等参数。

继续再看hpp会发现各种虚函数，先来看看protected里面的方法：

2 前馈，反馈函数：

  /** @brief Using the CPU device, compute the layer output. */
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      vector<Blob<Dtype>*>* top) = 0;
  /**
   * @brief Using the GPU device, compute the layer output.
   *        Fall back to Forward_cpu() if unavailable.
   */
  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      vector<Blob<Dtype>*>* top) {
    // LOG(WARNING) << "Using CPU code as backup.";
    return Forward_cpu(bottom, top);
  }

  /**
   * @brief Using the CPU device, compute the gradients for any parameters and
   *        for the bottom blobs if propagate_down is true.
   */
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down,
      vector<Blob<Dtype>*>* bottom) = 0;
  /**
   * @brief Using the GPU device, compute the gradients for any parameters and
   *        for the bottom blobs if propagate_down is true.
   *        Fall back to Backward_cpu() if unavailable.
   */
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down,
      vector<Blob<Dtype>*>* bottom) {
    // LOG(WARNING) << "Using CPU code as backup.";
    Backward_cpu(top, propagate_down, bottom);
  }

这几个函数理解起来应该很容易。重点是那几个XX_gpu()，为啥里面调用的是XX_cpu()啊？？！！如果是所谓的在GPU不可用的情况下，调用XX_cpu()还可以理解，但是什么都没有做直接调用XX_cpu了？不过这是虚函数哈，不要着急，在详细的实现里面应该能够明白所以然。

回答这里的问题(参见：caffe源码简单解析——Layer层)，在layer中主要的接口是前馈和反馈函数：

inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top)

inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom)

反馈中的参数propagate_down表明是否需要反馈传递参数。而两个函数会根据caffe: mode来判断在cpu中执行还是在gpu中执行。

注意：有些layer并没有GPU计算的实现，所以封装时加入了CPU的计算作为后备

3 检测blobs的数量是否正确：

  /**
   * Called by the parent Layer's SetUp to check that the number of bottom
   * and top Blobs provided as input match the expected numbers specified by
   * the {ExactNum,Min,Max}{Bottom,Top}Blobs() functions.
   */
  virtual void CheckBlobCounts(const vector<Blob<Dtype>*>& bottom,
                               const vector<Blob<Dtype>*>& top) {
    if (ExactNumBottomBlobs() >= 0) {
      CHECK_EQ(ExactNumBottomBlobs(), bottom.size())
          << type_name() << " Layer takes " << ExactNumBottomBlobs()
          << " bottom blob(s) as input.";
    }
    if (MinBottomBlobs() >= 0) {
      CHECK_LE(MinBottomBlobs(), bottom.size())
          << type_name() << " Layer takes at least " << MinBottomBlobs()
          << " bottom blob(s) as input.";
    }
    if (MaxBottomBlobs() >= 0) {
      CHECK_GE(MaxBottomBlobs(), bottom.size())
          << type_name() << " Layer takes at most " << MaxBottomBlobs()
          << " bottom blob(s) as input.";
    }
    if (ExactNumTopBlobs() >= 0) {
      CHECK_EQ(ExactNumTopBlobs(), top.size())
          << type_name() << " Layer produces " << ExactNumTopBlobs()
          << " top blob(s) as output.";
    }
    if (MinTopBlobs() >= 0) {
      CHECK_LE(MinTopBlobs(), top.size())
          << type_name() << " Layer produces at least " << MinTopBlobs()
          << " top blob(s) as output.";
    }
    if (MaxTopBlobs() >= 0) {
      CHECK_GE(MaxTopBlobs(), top.size())
          << type_name() << " Layer produces at most " << MaxTopBlobs()
          << " top blob(s) as output.";
    }
    if (EqualNumBottomTopBlobs()) {
      CHECK_EQ(bottom.size(), top.size())
          << type_name() << " Layer produces one top blob as output for each "
          << "bottom blob input.";
    }
  }

这里涉及到了有些奇怪的函数，例如：ExactNumBottomBlobs()，但并不会影响阅读，CheckBlobCounts()实现的功能就是检测输入的bottom blob和输出的top blob是否在指定的范围内，既然是指定的范围，自然就会联想到在什么地方指定这个范围的？带着疑问继续阅读源码，在某个地方，总会明白的。

4 最后一个protected函数，SetLossWeight()：

  /**
   * Called by SetUp to initialize the weights associated with any top blobs in
   * the loss function. Store non-zero loss weights in the diff blob.
   */
  inline void SetLossWeights(vector<Blob<Dtype>*>* top) {
    const int num_loss_weights = layer_param_.loss_weight_size();
    if (num_loss_weights) {
      CHECK_EQ(top->size(), num_loss_weights) << "loss_weight must be "
          "unspecified or specified once per top blob.";
      for (int top_id = 0; top_id < top->size(); ++top_id) {
        const Dtype loss_weight = layer_param_.loss_weight(top_id);
        if (loss_weight == Dtype(0)) { continue; }
        this->set_loss(top_id, loss_weight);
        const int count = (*top)[top_id]->count();
        Dtype* loss_multiplier = (*top)[top_id]->mutable_cpu_diff();
        caffe_set(count, loss_weight, loss_multiplier);
      }
    }
  }

这里是设置损失值的，一般也只有在损失函数的那层才会计算损失值的，所以这个函数主要还是用在最后一层。大概相当于是把损失值复制过来，所以可以大胆猜测，这是反馈的第一步吧。

接着来看public中的非虚函数部分：

5 构造函数：

  /**
   * You should not implement your own constructor. Any set up code should go
   * to SetUp(), where the dimensions of the bottom blobs are provided to the
   * layer.
   */
  explicit Layer(const LayerParameter& param)
    : layer_param_(param) {
      // The only thing we do is to copy blobs if there are any.
      if (layer_param_.blobs_size() > 0) {
        blobs_.resize(layer_param_.blobs_size());
        for (int i = 0; i < layer_param_.blobs_size(); ++i) {
          blobs_[i].reset(new Blob<Dtype>());
          blobs_[i]->FromProto(layer_param_.blobs(i));
        }
      }
    }

这个构造函数，从注释上可以看到了，似乎基本都没有直接调用这个构造函数，因为在caffe中有各种各样的层。而不同的层都有自己的 SetUp()。那么这里的 Layer()做了什么事情呢？首先我们可以确认的一点是这里的layer.hpp是各种各样的层的一个抽象，也就是说其它的层都会继承这个。从这个构造函数的代码中可以看到，其实它只做了将blobs拷贝过来，但是具体的其它参数，以及方法都是空白的。

6 前馈和反馈函数：

  /**
   * @brief Given the bottom blobs, compute the top blobs and the loss.
   *
   * @param bottom
   *     the input blobs, whose data fields store the input data for this layer
   * @param top
   *     the preshaped output blobs, whose data fields will store this layers'
   *     outputs
   * \return The total loss from the layer.
   *
   * The Forward wrapper calls the relevant device wrapper function
   * (Forward_cpu or Forward_gpu) to compute the top blob values given the
   * bottom blobs.  If the layer has any non-zero loss_weights, the wrapper
   * then computes and returns the loss.
   *
   * Your layer should implement Forward_cpu and (optionally) Forward_gpu.
   */
  inline Dtype Forward(const vector<Blob<Dtype>*>& bottom,
      vector<Blob<Dtype>*>* top);

  /**
   * @brief Given the top blob error gradients, compute the bottom blob error
   *        gradients.
   *
   * @param top
   *     the output blobs, whose diff fields store the gradient of the error
   *     with respect to themselves
   * @param propagate_down
   *     a vector with equal length to bottom, with each index indicating
   *     whether to propagate the error gradients down to the bottom blob at
   *     the corresponding index
   * @param bottom
   *     the input blobs, whose diff fields will store the gradient of the error
   *     with respect to themselves after Backward is run
   *
   * The Backward wrapper calls the relevant device wrapper function
   * (Backward_cpu or Backward_gpu) to compute the bottom blob diffs given the
   * top blob diffs.
   *
   * Your layer should implement Forward_cpu and (optionally) Forward_gpu.
   */
  inline void Backward(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down,
      vector<Blob<Dtype>*>* bottom);

这里的注释很详细，具体的实现也在layer.hpp中：

// Forward and backward wrappers. You should implement the cpu and
// gpu specific implementations instead, and should not change these
// functions.
template <typename Dtype>
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
    vector<Blob<Dtype>*>* top) {
  Dtype loss = 0;
  switch (Caffe::mode()) {
  case Caffe::CPU:
    Forward_cpu(bottom, top);
    for (int top_id = 0; top_id < top->size(); ++top_id) {
      if (!this->loss(top_id)) { continue; }
      const int count = (*top)[top_id]->count();
      const Dtype* data = (*top)[top_id]->cpu_data();
      const Dtype* loss_weights = (*top)[top_id]->cpu_diff();
      loss += caffe_cpu_dot(count, data, loss_weights);
    }
    break;
  case Caffe::GPU:
    Forward_gpu(bottom, top);
#ifndef CPU_ONLY
    for (int top_id = 0; top_id < top->size(); ++top_id) {
      if (!this->loss(top_id)) { continue; }
      const int count = (*top)[top_id]->count();
      const Dtype* data = (*top)[top_id]->gpu_data();
      const Dtype* loss_weights = (*top)[top_id]->gpu_diff();
      Dtype blob_loss = 0;
      caffe_gpu_dot(count, data, loss_weights, &blob_loss);
      loss += blob_loss;
    }
#endif
    break;
  default:
    LOG(FATAL) << "Unknown caffe mode.";
  }
  return loss;
}

template <typename Dtype>
inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    vector<Blob<Dtype>*>* bottom) {
  switch (Caffe::mode()) {
  case Caffe::CPU:
    Backward_cpu(top, propagate_down, bottom);
    break;
  case Caffe::GPU:
    Backward_gpu(top, propagate_down, bottom);
    break;
  default:
    LOG(FATAL) << "Unknown caffe mode.";
  }
}

其中具体的计算方式，可以先看看深度学习基础教程。在该代码中会根据不同的caffe::mode来选择不同的函数实现。

每种层的具体实现后面再慢慢分析~

thy_2014

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
caffe源码阅读4-layer.hpp

An interface for the units of computation which can be composed into a Net.Layer&s must implement a Forward function, in which they take their input (bottom) Blob&s (if any) and compute their outp
复制链接

扫一扫