caffe源码解读

renchenliang

于 2019-12-07 13:17:46 发布

阅读量582

点赞数

分类专栏：深度学习框架文章标签： caffe源码

本文链接：https://blog.csdn.net/renchenliang/article/details/101383100

版权

深度学习框架专栏收录该内容

1 篇文章 0 订阅

订阅专栏

caffe源码解读

Caffe组成结构

Caffe组成结构

caffe主要由下列4个类组成：Blob;layer;Net;Solver。主要功能如下：

caffe.proto 网络超参数的定义

caffe.proto定义了多个message结构体，它包含了网络需要传递的参数。required是必须要定义的，optional是可选的（后面可加默认选项），
这些message有属于Blob：BlobProto, BlobProtoVector, Datum
，也有Net：NetParameter, SolverParameter, SolverState, NetState, NetStateRule, ParamSpec；也有layer：FillerParameter, LayerParameter, ArgMaxParameter, TransformationParameter, LossParameter, AccuracyParameter, ConcatParameter等；
下列为部分message的解释，更多的见（https://blog.csdn.net/weixin_39970417/article/details/80825601）

syntax = "proto2" package caffe;(命名空间) // Specifies the shape (dimensions) of a Blob. message BlobShape { //数据块形状定义为Num * Channel * Height * Wight, 原因在于caffe基于容器的多维嵌套来实现高维数据的封装, 即vector。 repeated int64 dim = 1 [packed = true];} // Blob数据块，包括Blob shape，数据和微分 message BlobProto { // Blob的shape, 即numpy中的shape optional BlobShape shape = 7; // Blob的数据部分 repeated float data = 5 [packed = true]; // Blob的微分部分 repeated float diff = 6 [packed = true]; // Blob中的数据部分(double类型) repeated double double_data = 8 [packed = true]; // Blob的微分部分(double类型) repeated double double_diff = 9 [packed = true]; // 4D dimensions -- deprecated. Use "shape" instead. // Blob的4个维度，已被Blob shape代替 // Blob中数据的个数(例如卷积核的个数) optional int32 num = 1 [default = 0]; // Blob中数据的通道数 optional int32 channels = 2 [default = 0]; // Blob中数据的高度 optional int32 height = 3 [default = 0]; // Blob中数据的宽度 optional int32 width = 4 [default = 0];} ... ... // 卷积层参数 message ConvolutionParameter { // 输出数据的个数 optional uint32 num_output = 1; // The number of outputs for the layer // 是否有偏置项 optional bool bias_term = 2 [default = true]; // whether to have bias terms // Pad, kernel size, and stride are all given as a single value for equal // dimensions in all spatial dimensions, or once per spatial dimension. // 卷积padding的大小 repeated uint32 pad = 3; // The padding size; defaults to 0 // 卷积核的大小 repeated uint32 kernel_size = 4; // The kernel size // 卷积的步长 repeated uint32 stride = 6; // The stride; defaults to 1 // Factor used to dilate the kernel, (implicitly) zero-filling the resulting // holes. (Kernel dilation is sometimes referred to by its use in the // algorithme à trous from Holschneider et al. 1987.) // 卷积膨胀，在卷积的时候可以skip一定长度的像素 repeated uint32 dilation = 18; // The dilation; defaults to 1 // For 2D convolution only, the *_h and *_w versions may also be used to // specify both spatial dimensions. // padding, kernel, stride的宽度和高度 optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only) optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only) optional uint32 kernel_h = 11; // The kernel height (2D only) optional uint32 kernel_w = 12; // The kernel width (2D only) optional uint32 stride_h = 13; // The stride height (2D only) optional uint32 stride_w = 14; // The stride width (2D only) // 来自于AlexNet论文 optional uint32 group = 5 [default = 1]; // The group size for group conv // 权重初始化 optional FillerParameter weight_filler = 7; // The filler for the weight // 偏置初始化 optional FillerParameter bias_filler = 8; // The filler for the bias enum Engine { DEFAULT = 0; CAFFE = 1; CUDNN = 2; } // 卷积的方式的选择，default是正常的卷积，caffe是矩阵乘法的卷积，cudnn是cuda库流并行式的卷积 optional Engine engine = 15 [default = DEFAULT]; // The axis to interpret as "channels" when performing convolution. // Preceding dimensions are treated as independent inputs; // succeeding dimensions are treated as "spatial". // With (N, C, H, W) inputs, and axis == 1 (the default), we perform // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for // groups g>1) filters across the spatial axes (H, W) of the input. // With (N, C, D, H, W) inputs, and axis == 1, we perform // N independent 3D convolutions, sliding (C/g)-channels // filters across the spatial axes (D, H, W) of the input. // 通道channel所在的维度 optional int32 axis = 16 [default = 1]; // Whether to force use of the general ND convolution, even if a specific // implementation for blobs of the appropriate number of spatial dimensions // is available. (Currently, there is only a 2D-specific convolution // implementation; for input blobs with num_axes != 2, this option is // ignored and the ND implementation will be used.) // 如果输入数据维度等于2，则执行通用的ND卷积，否则正常执行卷积 optional bool force_nd_im2col = 17 [default = false]; } } }

Blob模块

Blob类是caffe数据的载体。它相当于一个4维数组（n,c,h,w）。它的成员变量如下：

protected: shared_ptr<SyncedMemory> data_; //存储前向传递数据 shared_ptr<SyncedMemory> diff_; //存储反向传递梯度 shared_ptr<SyncedMemory> shape_data_;// 参数维度old version vector<int> shape_; //参数维度 int count_; //Blob存储的元素个数（shape_所有元素乘积） int capacity_;//当前Blob的元素个数（控制动态分配）

其中SyncedMemory类将在后面讲解。

template <typename Dtype>//caffe由于内存是连续的，所以reshape只需要改变shape_就可以
void Blob<Dtype>::Reshape(const vector<int>& shape) {
  CHECK_LE(shape.size(), kMaxBlobAxes);
  count_ = 1;
  shape_.resize(shape.size());
  if (!shape_data_ || shape_data_->size() < shape.size() * sizeof(int)) {
    shape_data_.reset(new SyncedMemory(shape.size() * sizeof(int)));
  }//检查数据是否存在以及内存大小是否大于新的内存大小，如果是否定的就新建内存
  int* shape_data = static_cast<int*>(shape_data_->mutable_cpu_data());
  for (int i = 0; i < shape.size(); ++i) {
    CHECK_GE(shape[i], 0);
    if (count_ != 0) {
      CHECK_LE(shape[i], INT_MAX / count_) << "blob size exceeds INT_MAX";
    }
    count_ *= shape[i];
    shape_[i] = shape[i];
    shape_data[i] = shape[i];
  }
  if (count_ > capacity_) {
    capacity_ = count_;
    data_.reset(new SyncedMemory(capacity_ * sizeof(Dtype)));
    diff_.reset(new SyncedMemory(capacity_ * sizeof(Dtype)));
  }
}

获取Blob cpu上的数据(也有Gpu的)

template <typename Dtype>// **数据不可修改**
const Dtype* Blob<Dtype>::cpu_data() const {
  CHECK(data_);
  return (const Dtype*)data_->cpu_data();
template <typename Dtype>

template <typename Dtype>// **数据可修改**
Dtype* Blob<Dtype>::mutable_cpu_data() {
  CHECK(data_);
  return static_cast<Dtype*>(data_->mutable_cpu_data());
}
}

2种读数据的方式，一种是不可变，比如说在读上一层bottom的输出data的时，这时我们只能读取它而不能修改它。而在写输出top数据到下一层时，这时我们需要修改数据了。

SyncedMemory类

SyncedMemory类是负责Blob的内存管理，如内存的释放与分配。它的主要成员变量如下：

class SyncedMemory {
   private:
        void* cpu_ptr_;//内存指针
        void* gpu_ptr_;//显存指针
        size_t size_;  //数据大小
        SyncedHead head_;//当前数据状态
        bool own_cpu_data_;
        bool cpu_malloc_use_cuda_;
        bool own_gpu_data_;
        int gpu_device_;
};

SyncedMemory屏蔽了代码对不同硬件设备的内存分配的感知，同时隐藏了CPU和GPU之间的同步过程。
SyncedMemory采用“lazy”的模式，就是内存的实际申请时机是在第一次使用时进行的(通过枚举状态),其代码如下（syncedmem.hpp syncedmem.cpp）

enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };

inline void SyncedMemory::to_cpu() {
  check_device();
  switch (head_) {
  case UNINITIALIZED:
    CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_);//分配内存（里面用到了malloc\free）
    caffe_memset(size_, 0, cpu_ptr_);//置0
    head_ = HEAD_AT_CPU;//修改状态
    own_cpu_data_ = true;
    break;
  case HEAD_AT_GPU:
#ifndef CPU_ONLY
    if (cpu_ptr_ == NULL) {
      CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_);
      own_cpu_data_ = true;
    }
    caffe_gpu_memcpy(size_, gpu_ptr_, cpu_ptr_);
    head_ = SYNCED;
#else
    NO_GPU;
#endif
    break;
  case HEAD_AT_CPU:
  case SYNCED:
    break;
  }
}

layer类

layer类主要在编写网络结构proto.txt文件中用到。例如

layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {  //在LayerParameter里面定义了ParamSpec，里面涉及的参数主要有学习率，衰减率
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {//也在LayerParameter里面
    num_output: 96
    kernel_size: 11
    stride: 4
    pad: 2
    weight_filler {   //ConvolutionParameter{optional FillerParameter weight_filler}这样定义的，其中FillerParameter里面有定义了type，标准差，最大最小值等参数
      type: "gaussian"
      std: 0.01            #标准差：distribution with stdev 0.01(default mean: 0)
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layer里面的所有参数都是layer类的成员变量,其中layer类里面的LayerParameter定义了大量的参数变量（在caffe.proto文件里 message LayerParameter{…}）。
layer类主要成员变量如下

template<typename Dtype>
class Layer{
protected:
  //protobuf文件中存储的layer参数,从protocal buffers格式的网络结构说明文件中读取
    //protected类成员，构造函数中初始化
    LayerParameter layer_param_;
    //层状态，参与网络的训练还是测试
    Phase phase_;
    // 可学习参数层权值和偏置参数，使用向量是因为权值参数和偏置是分开保存在两个blob中的
    // 在基类layer中初始化(只是在描述文件定义了的情况下)
    vector<shared_ptr<Blob<Dtype> > > blobs_;
    // 标志每个可学习参数blob是否需要计算反向传递的梯度值
    vector<bool> param_propagate_down_;
    // 非LossLayer为零，LossLayer中表示每个top blob计算的loss的权重
    vector<Dtype> loss_;
private:
    /** Whether this layer is actually shared by other nets*/
    bool is_shared_;
    // 若该layer被shared，则需要这个mutex序列保持forward过程的正常运行
    shared_ptr<boost::mutex> forward_mutex_;
}

layer类的主要成员函数如下，也是最核心的东西，包括网络cpu、gpu的forward与backward函数

class layer{
protect:
inline Dtype Forward(const vector<Blob<Dtype>*>& bottom,
                     const vector<Blob<Dtype>*>& top);
//给定相对于 top 层输出的梯度，计算其相对于输入的梯度，并传递到 bottom
 层。一个有参数的 layer 需要计算相对于各个参数的梯度值并存储在内部。
inline void Backward(const vector<Blob<Dtype>*>& top,
                     const vector<bool>& propagate_down,
                     const vector<Blob<Dtype>*>& bottom);

	virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
		      const vector<Blob<Dtype>*>& top) = 0;
     virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
                          const vector<bool>& propagate_down,
                          const vector<Blob<Dtype>*>& bottom) = 0;
      }//类里面成员函数的定义

由于其是纯虚函数，必须要被其继承类实现

 //类成员函数的实现    
template <typename Dtype>
inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  switch (Caffe::mode()) {
  case Caffe::CPU:
    Backward_cpu(top, propagate_down, bottom);
    break;
  case Caffe::GPU:
    Backward_gpu(top, propagate_down, bottom);
    break;
  default:
    LOG(FATAL) << "Unknown caffe mode.";
  }
}

forward与backward函数的实现都是基于forward_cpu与backward_cpu。

layer主要包括以下几层：

输入层DataLayer

Datalayer

里面用了高效的数据读取（LMDB、LevelDB），并利用多线程的方式加速solver对数据的读取，以及包括一些简单的数据预处理（resize ,crop.mirror,mean subtraction等（用了单独的data_transformer.cpp去处理，并在data_layer中被调用）），其中里面重要的参数定义在文章中最前面的caffe_proto中的message结构体中。DataLayer类的继承关系如下。

Datalayer里的load_batch函数实现对LMDB数据的batch读取。

下面为prototxt中的datalayer,里面的超参数都可以在DataLayer的cpp里面找到，存储在LayerParameter里。

layer {
  name: "mnist"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TRAIN
  }
  transform_param {
    scale: 0.00390625
  }
  data_param {
    source: "/home/xy/caffe-master/examples/mnist/mnist_train_lmdb"
    batch_size: 64
    backend: LMDB
  }
}

BlockingQueue

BlockingQueue是线程安全的队列。

InternalThread

InternalThread封装了boost::thread，主要用于建立多线程对数据的读取

网络层ReLULayer

大部分网络层继承于NeutralLayer,而NeutralLayer继承于我们前面提到的Layer.

class ReLULayer : public NeuronLayer<Dtype> {
 public:
 explicit ReLULayer(const LayerParameter& param)
      : NeuronLayer<Dtype>(param) {}
  virtual inline const char* type() const { return "ReLU"; }
 protect:(包括gpu)
 virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
 virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

//Relulayer的实现
template<typename Dtype>
void ReLULayer<Dtype>::Forward_cpu(const vector<Blob<Dtype> *> &bottom,
                                   const vector<Blob<Dtype> *> &top) {

    const Dtype* bottom_data = bottom[0]->cpu_data();
    Dtype* top_data = top[0]->mutable_cpu_data();

    const int count = bottom[0]->count();
    Dtype negative_slope = this->layer_param_.relu_param().negative_slope();
    for (int i = 0; i < count; ++i) {
        top_data[i] = std::max(bottom_data[i], Dtype(0));
    }
}

template <typename Dtype>
void ReLULayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
                                    const vector<bool>& propagate_down,
                                    const vector<Blob<Dtype>*>& bottom) {
    if (propagate_down[0]) {
        const Dtype* bottom_data = bottom[0]->cpu_data();
        const Dtype* top_diff = top[0]->cpu_diff();

        Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
        const int count = bottom[0]->count();

        for (int i = 0; i < count; ++i) {
            bottom_diff[i] = top_diff[i] * ((bottom_data[i] > 0);
        }

    }
}

layer_factory

caffe里面有许多网络层，这些网络层由layer_factory连接到Net中。layer_factory定义如下。这里有2个类，并用了2个宏定义，用来注册layer。

template<typename Dtype>
class LayerRegistry{
public:
    // 函数指针Creator，返回Layer<Dtype>指针
    typedef shared_ptr<Layer<Dtype>> (*Creator)(const LayerParameter&);
    typedef std::map<string, Creator> CreatorRegistry;

    static CreatorRegistry& Registry() {
        static CreatorRegistry* g_registry_ = new CreatorRegistry();
        return *g_registry_;
    }

    // Adds a creator.
    static void AddCreator(const string& type, Creator creator) {
        CreatorRegistry& registry = Registry();
        LOG(INFO) << "AddCreator: " << type;
        registry[type] = creator;
    }

    // Get a layer using a LayerParameter.
    static shared_ptr<Layer<Dtype> > CreateLayer(const LayerParameter& param) {

        const string& type = param.type();
        CreatorRegistry& registry = Registry();

        CHECK_EQ(registry.count(type), 1) << "Unknown layer type: ";
        return registry[type](param);
    }
private:
    // Layer registry should never be instantiated
    // everything is done with its static variables.
    LayerRegistry() {}
};

/**********************************************************/
template <typename Dtype>
class LayerRegisterer {
 public:
  LayerRegisterer(const string& type,
                  shared_ptr<Layer<Dtype> > (*creator)(const LayerParameter&)) {
    // LOG(INFO) << "Registering layer type: " << type;
    LayerRegistry<Dtype>::AddCreator(type, creator);
  }
};

#define REGISTER_LAYER_CREATOR(type, creator)                                  \
  static LayerRegisterer<float> g_creator_f_##type(#type, creator<float>);     \
  static LayerRegisterer<double> g_creator_d_##type(#type, creator<double>)    \

#define REGISTER_LAYER_CLASS(type)                                             \
  template <typename Dtype>                                                    \
  shared_ptr<Layer<Dtype> > Creator_##type##Layer(const LayerParameter& param) \
  {                                                                            \
    return shared_ptr<Layer<Dtype> >(new type##Layer<Dtype>(param));           \
  }                                                                            \
  REGISTER_LAYER_CREATOR(type, Creator_##type##Layer)

}  // namespace caffe

最后在每个layer的结尾都有

INSTANTIATE_CLASS(AbsValLayer);//定义实例化这个类
REGISTER_LAYER_CLASS(AbsVal);

其中INSTANTIATE_CLASS定义在common.h中

// Instantiate a class with float and double specifications.
#define INSTANTIATE_CLASS(classname) \
  char gInstantiationGuard##classname; \
  template class classname<float>; \
  template class classname<double>

Util工具

IO操作

IO.cpp的功能主要是读取prototxt文件。

bool ReadProtoFromTextFile(const char* filename, Message* proto) {
  int fd = open(filename, O_RDONLY);
  CHECK_NE(fd, -1) << "File not found: " << filename;
  FileInputStream* input = new FileInputStream(fd);
  bool success = google::protobuf::TextFormat::Parse(input, proto);
  delete input;
  close(fd);
  return success;
}

renchenliang

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
caffe源码解读

caffe源码解读Caffe组成结构Blob模块SyncedMemory类layer类输入层Data Layer网络层ReLULayerCaffe组成结构caffe主要由下列4个类组成：Blob;layer;Net;Solver。主要功能如下：caffe.proto定义了多个message结构体，它包含了网络需要传递的参数。required是必须要定义的，optional是可选的（后面可加默...
复制链接

扫一扫

专栏目录