caffe源码解读（６）－数据读取层DataLayer

最新推荐文章于 2020-07-29 16:43:40 发布

冬后晚晴

最新推荐文章于 2020-07-29 16:43:40 发布

阅读量1k

点赞数

分类专栏： caffe源码学习

本文链接：https://blog.csdn.net/weixin_37970694/article/details/79196744

版权

caffe源码学习专栏收录该内容

15 篇文章 2 订阅

订阅专栏

数据读取层，除了读取LMDB、LEVELDB之外，也可以从原始图像直接读取(ImageDataLayer).

一.数据结构描述

message DataParameter {
  enum DB {
    LEVELDB = 0;//使用LEVELDB
    LMDB = 1;//使用LMDB
  }
  optional string source = 1;
  optional uint32 batch_size = 4;
  optional uint32 rand_skip = 7 [default = 0];
  optional DB backend = 8 [default = LEVELDB];
  optional float scale = 2 [default = 1];
  optional string mean_file = 3;
  optional uint32 crop_size = 5 [default = 0];
  optional bool mirror = 6 [default = false];
  //强制编码图像为３通道彩色图像
  optional bool force_encoded_color = 9 [default = false];
  //预取队列，预先放到主机内存中的队列数：默认为４个Batch
  optional uint32 prefetch = 10 [default = 4];
}

二.数据读取层实现

数据读取层实现位于src/caffe/layers/base_data_layer.cpp中，内容如下：
(1)BaseDataLayer

template <typename Dtype>
//构造函数，初始化Layer参数、数据变换器transform参数
BaseDataLayer<Dtype>::BaseDataLayer(const LayerParameter& param)
    : Layer<Dtype>(param),
      transform_param_(param.transform_param()) {
}
//BaseDataLayer层设置
template <typename Dtype>
void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  if (top.size() == 1) {//判断输出Blob个数，若为１只输出data，若为２则输出data和label.
    output_labels_ = false;
  } else {
    output_labels_ = true;
  }
  //初始化数据变换器对象
  data_transformer_.reset(
      new DataTransformer<Dtype>(transform_param_, this->phase_));
  data_transformer_->InitRand();//生成随机数种子
  // The subclasses should setup the size of bottom and top
  DataLayerSetUp(bottom, top);//子类设置bottom和top的形状
}

(2)BasePrefetchingDataLayer

template <typename Dtype>
//BasePrefetchingDataLayer构造函数
BasePrefetchingDataLayer<Dtype>::BasePrefetchingDataLayer(
    const LayerParameter& param)
    : BaseDataLayer<Dtype>(param),
      prefetch_(param.data_param().prefetch()),
      prefetch_free_(), prefetch_full_(), prefetch_current_() {
  for (int i = 0; i < prefetch_.size(); ++i) {
    prefetch_[i].reset(new Batch<Dtype>());
    prefetch_free_.push(prefetch_[i].get());//将Batch对象都放入空闲队列中
  }
}
//BasePrefetchingDataLayer层配置函数
template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  BaseDataLayer<Dtype>::LayerSetUp(bottom, top);

  // Before starting the prefetch thread, we make cpu_data and gpu_data
  // calls so that the prefetch thread does not accidentally make simultaneous
  // cudaMalloc calls when the main thread is running. In some GPUs this
  // seems to cause failures if we do not so.
  //编译CPU代码
  for (int i = 0; i < prefetch_.size(); ++i) {
    prefetch_[i]->data_.mutable_cpu_data();
    if (this->output_labels_) {
      prefetch_[i]->label_.mutable_cpu_data();
    }
  }
//如果编译选项没有CPU_ONLY，则需要编译GPU代码
#ifndef CPU_ONLY
  if (Caffe::mode() == Caffe::GPU) {
    for (int i = 0; i < prefetch_.size(); ++i) {
      prefetch_[i]->data_.mutable_gpu_data();
      if (this->output_labels_) {
        prefetch_[i]->label_.mutable_gpu_data();
      }
    }
  }
#endif
  DLOG(INFO) << "Initializing prefetch";
  this->data_transformer_->InitRand();
  StartInternalThread();//开启内部预取线程
  DLOG(INFO) << "Prefetch initialized.";
}

(3)InternalThreadEntry内部预取线程入口

template <typename Dtype>
//内部线程入口
void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
//创建CUDA Stream,非阻塞类型
#ifndef CPU_ONLY
  cudaStream_t stream;
  if (Caffe::mode() == Caffe::GPU) {
    CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
  }
#endif

  try {
    while (!must_stop()) {//循环载入批量数据
      Batch<Dtype>* batch = prefetch_free_.pop();//拿到一个空闲的batch
      load_batch(batch);//载入批量数据
#ifndef CPU_ONLY
      if (Caffe::mode() == Caffe::GPU) {
        batch->data_.data().get()->async_gpu_push(stream);
        if (this->output_labels_) {
          batch->label_.data().get()->async_gpu_push(stream);
        }
        CUDA_CHECK(cudaStreamSynchronize(stream));//同步到GPU
      }
#endif
      prefetch_full_.push(batch);//加入到带负载的Batch队列
    }
  } catch (boost::thread_interrupted&) {//捕获到异常退出while循环
    // Interrupted exception is expected on shutdown
  }
#ifndef CPU_ONLY
  if (Caffe::mode() == Caffe::GPU) {
    CUDA_CHECK(cudaStreamDestroy(stream));//销毁CUDA Stream
  }
#endif
}

(4)Forward_cpu前向传播

template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  //从带负载的队列中取出一个Batch对象
  if (prefetch_current_) {
    prefetch_free_.push(prefetch_current_);
  }
  prefetch_current_ = prefetch_full_.pop("Waiting for data");
  // Reshape to loaded data.
  //Top Blob根据Batch中的data_形状进行变形
  top[0]->ReshapeLike(prefetch_current_->data_);
  top[0]->set_cpu_data(prefetch_current_->data_.mutable_cpu_data());
  if (this->output_labels_) {//如果输出标签数据
    // Reshape to loaded labels.
     //Top Blob根据Batch中的label_形状进行变形
    top[1]->ReshapeLike(prefetch_current_->label_);
    top[1]->set_cpu_data(prefetch_current_->label_.mutable_cpu_data());
  }
}