caffe 源码的解读（2）DataStructure

最新推荐文章于 2018-10-08 19:10:27 发布

_小马奔腾

最新推荐文章于 2018-10-08 19:10:27 发布

阅读量665

点赞数

分类专栏： caffe 文章标签： caffe 源码 layer

本文链接：https://blog.csdn.net/dongfang1984/article/details/56843085

版权

caffe 专栏收录该内容

17 篇文章 0 订阅

订阅专栏

caffe 以c++为核心代码。而且是纯正的c++。而c++与c不同的一大特点是面向对象，而面向对象的程序设计中，数据是核心。因此本文先分析caffe中重要的数据。

一、Blob的重要参数

protected:

  shared_ptr<SyncedMemory> data_;   //SyncedMemory类封装了CPU/GPU内存申请、同步和释放（Blob不关心具体细节）
  shared_ptr<SyncedMemory> diff_;
  vector<int> shape_;    //Blob维度参数
  int count_;            //Blob存储的元素个数（shape_所有元素乘积）
  int capacity_;         //表示当前Blob的元素个数（控制动态分配）
  /*别那么天真，认为blob就是 NCHW 4-dim vector */

Batch类重要参数

// Batch实际上就是一个data_和label_类标  
template <typename Dtype>  
class Batch {  
 public:  
  Blob<Dtype> data_, label_;  
};

blob类中重要的函数Reshape

// in blob.cpp
// 完成blob形状shape_的记录，大小count_的计算，合适大小capacity_存储的申请
template <typename Dtype>
void Blob<Dtype>::Reshape(const vector<int>& shape) {
  CHECK_LE(shape.size(), kMaxBlobAxes);
  count_ = 1;
  shape_.resize(shape.size());
  for (int i = 0; i < shape.size(); ++i) {
    CHECK_GE(shape[i], 0);
    count_ *= shape[i];
    shape_[i] = shape[i];
  }
  if (count_ > capacity_) {
    capacity_ = count_;
    data_.reset(new SyncedMemory(capacity_ * sizeof(Dtype))); // 只是构造了SyncedMemory对象，并未真正分配内存和显存
    diff_.reset(new SyncedMemory(capacity_ * sizeof(Dtype))); // 真正分配是在第一次访问数据时
  }
}

blob数据的访问

//diff类似。Blob定义了两种数据访问方式：const方式只读，不允许改写数据；
const Dtype* cpu_data() const; 
const Dtype* gpu_data() const; 

//mutable方式可改写数据（对diff_的访问也是类似的）。
Dtype* mutable_cpu_data(); 
Dtype* mutable_gpu_data();

Blob类重要函数InferBlobShape

vector<int> DataTransformer<Dtype>::InferBlobShape(const cv::Mat& cv_img){  //粗略核心代码
    vector<int> shape(4);
    shape[0] = 1;
    shape[1] = img_channels;
    shape[2] = (crop_size)? crop_size: img_height;
    shape[3] = (crop_size)? crop_size: img_width;
    return shape;
    }
    //实际上就是返回NCHW。 注意num默认为1。因此我们的常常在caffe源码中看到如下代码

vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);    //得到Channel、height、weight
const int batch_size = this->layer_param_.image_data_param().batch_size();  //得到batch_size
   top_shape [0] = batch_size;
   for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
     this->prefetch_[i].data_.Reshape(top_shape);
   }  //PREFETCH_COUNT=3  就是Channels、height、weight
   top[0]->Reshape(top_shape);

二、layer的重要参数

protected:

  LayerParameter layer_param_;    // 层说明参数，从protocal buffers格式的网络结构说明文件中读取
  Phase phase_;                  //  /** The phase: TRAIN or TEST */
  vector<shared_ptr<Blob<Dtype> > > blobs_;  // 层权值和偏置参数(w,b)，使用向量是因为权值参数和偏置是分开保存在两个blob中的
  vector<bool> param_propagate_down_;  // 标志每个top blob是否需要计算反向传递的梯度值
  vector<Dtype> loss_;  // 非LossLayer为零，LossLayer中表示每个top blob计算的loss的权重

我们添加层一般照着ImageDataParameter 修改。该层主要的功能是，从参数中给定的列表文件读取图像列表以及类标，读取图像的时候会进行预处理，然后前向传.

ImageDataParameter 的变量

message ImageDataParameter {
  optional string source = 1;                     //定义了（图像+lable）.txt的路径
  optional uint32 batch_size = 4 [default = 1];   //batch_size默认值是 1
  optional uint32 rand_skip = 7 [default = 0];    //随机跳过一些数据，默认值0 ，可以随机跳过rand_skip * rand(0,1)
  optional bool shuffle = 8 [default = false];    //是否在每个epoch进行shuffle。默认不shuffle
  optional uint32 new_height = 9 [default = 0];   //如果new_height!=0 ;则resize
  optional uint32 new_width = 10 [default = 0];   //同上
  optional bool is_color = 11 [default = true];
  /*最好在TransformationParameter类中指定如下参数*/
  optional float scale = 2 [default = 1];         //是否进行缩放
  optional string mean_file = 3;                  //均值文件
  optional uint32 crop_size = 5 [default = 0];    //randomly corp 
  optional bool mirror = 6 [default = false];     //镜像（左右翻转）
  optional string root_folder = 12 [default = ""]; // 图像的根目录
}

BasePrefetchingDataLayer 类的变量

vector<shared_ptr<Batch<Dtype> > > prefetch_;
BlockingQueue<Batch<Dtype>*> prefetch_free_;
BlockingQueue<Batch<Dtype>*> prefetch_full_;
Batch<Dtype>* prefetch_current_;
Blob<Dtype>  transformed_data_;   //重要的数据

参考：http://imbinwang.github.io/blog/inside-caffe-code-blob

_小马奔腾

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
caffe 源码的解读（2）DataStructure

Blob的重要参数protected: shared_ptr<SyncedMemory> data_; //SyncedMemory类封装了CPU/GPU内存申请、同步和释放（Blob不关心具体细节） shared_ptr<SyncedMemory> diff_; vector<int> shape_; //Blob维度参数 int count_; //
复制链接

扫一扫