Caffe 数据层 ImageDataLayer DataLayer WindowDataLayer等


本文主要参考
梳理caffe代码image_data_layer、data_layer、window_data_layer(七)

概述

数据层主要为模型提供数据输入。职责包括:
- 读取数据(根据不同的数据存储方式进行读取)
- 数据预处理(crop resize mirror等)

数据层也是层,因此它在实现时逃不过层的必备接口 以及 数据层专属接口
- 层初始化 LayerSetup
- 前向传播 Forward
- 反向传播 Backward
- 预取 Prefetch: 数据层专属接口

总的继承关系如下:

这里写图片描述

Layer

Layer层是Caffe的基本计算单元,用一系列Layer可以组合成一个Net。所有继承子Layer的子类必须实现Forward方法(它接受bottom Blobs作为输入,计算之后以Top Blobs作为输出), 可选实现Backward方法(根据Top blobs的梯度,计算损失函数相对于Bottom Blobs的梯度)

layer.hpp

namespace caffe {


template <typename Dtype>
class Layer {
 public:

  // 显示的构造函数只复制层参数说明的值,不需要重写,所有初始工作在SetUp()中完成
  // 继承自Layer类的子类都会显示的调用Layer的构造函数
  explicit Layer(const LayerParameter& param)
    : layer_param_(param), is_shared_(false) {
    //..
  }

  // 虚析构
  virtual ~Layer() {}

  // layer 初始化设置 在模型初始化时重置 layers 及其相互之间的连接
  // @param bottom 层的输入数据,blob中的存储空间已申请
  // @param top 层的输出数据,blob对象已构造但是其中的存储空间未申请,具体空间大小需根据bottom blob大小和layer_param_共同决定,具体在Reshape函数现实
  void SetUp(const vector<Blob<Dtype>*>& bottom, 
      const vector<Blob<Dtype>*>& top) {
    InitMutex();
    // 检查输入输出blob个数是否满足要求,每个层能处理的输入输出数据不一样
    CheckBlobCounts(bottom, top);
    // 调用LayerSetUp函数初始化特殊的层,每个Layer子类需重写这个函数完成定制的初始化
    LayerSetUp(bottom, top);
    // 调用Reshape函数为top blob分配合适大小的存储空间
    Reshape(bottom, top);
    // 为每个top blob设置损失权重乘子,非LossLayer的top blob其值为零
    SetLossWeights(top);
  }


   /**
   * @brief 定制初始化,每个子类layer必须实现此虚函数
   * @param bottom
   *     输入blob, 数据成员data_和diff_存储了相关数据
   * @param top
   *     输出blob, blob对象已构造但数据成员的空间尚未申请,the allocated but unshaped output 
   * 此方法执行一次定制化的层初始化,包括从layer_param_读入并处理相关的参数,
   * 在Forward调用之前使用Reshape来设置Top blob和中间缓存的形状。由派生类重写
   */
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {}

  /**
   * @brief 根据bottom blob的形状和layer_param_ 计算top blob和internal buffers 的形状并为其分配存储空间。每个子类Layer必须重写的Reshape函数
   */
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) = 0;


  // 这两个函数非虚函数,它们内部会调用如下虚函数(Forward_cpu and (optionally) Forward_gpu)完成数据前向传递和误差反向传播,根据执行环境的不同每个子类Layer必须重写CPU和GPU版本

  // 从bottom 层中接收数据,进行计算后将输出送入到 top 层中;
  inline Dtype Forward(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top);

  // 给定 “损失相对于top 层的梯度”,计算 “损失相对于bottom层的梯度” 和 “损失相对于内部参数的梯度”,前者传给Bottom层的diff_后者存储在层内部
   /** @param top
   *     the output blobs, whose diff fields store the gradient of the error
   *     with respect to themselves
   * @param propagate_down
   *     a vector with equal length to bottom, with each index indicating
   *     whether to propagate the error gradients down to the bottom blob at
   *     the corresponding index
   * @param bottom
   *     the input blobs, whose diff fields will store the gradient of the error
   *     with respect to themselves after Backward is run
   **/
  inline void Backward(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down,
      const vector<Blob<Dtype>*>& bottom);




  /**
   * @brief Returns the vector of learnable parameter blobs.
   */
  vector<shared_ptr<Blob<Dtype> > >& blobs() {
    return blobs_;
  }

  /**
   * @brief Returns the layer parameter.
   */
  const LayerParameter& layer_param() const { return layer_param_; }


  /**
   * @brief Writes the layer parameter to a protocol buffer
   */
  virtual void ToProto(LayerParameter* param, bool write_diff = false);


  /**
   * @brief Returns the scalar loss associated with a top blob at a given index.
   */
  inline Dtype loss(const int top_index) const {
    return (loss_.size() > top_index) ? loss_[top_index] : Dtype(0);
  }
  /**
   * @brief Sets the loss associated with a top blob at a given index.
   */
  inline void set_loss(const int top_index, const Dtype value) {
    if (loss_.size() <= top_index) {
      loss_.resize(top_index + 1, Dtype(0));
    }
    loss_[top_index] = value;
  }



  /**
   * @brief Specifies whether the layer should compute gradients w.r.t. a
   *        parameter at a particular index given by param_id.
   *
   * You can safely ignore false values and always compute gradients
   * for all parameters, but possibly with wasteful computation.
   */
  inline bool param_propagate_down(const int param_id) {
    return (param_propagate_down_.size() > param_id) ?
        param_propagate_down_[param_id] : false;
  }
  /**
   * @brief Sets whether the layer should compute gradients w.r.t. a
   *        parameter at a particular index given by param_id.
   *        设置是否对某个学习参数blob计算梯度
   */
  inline void set_param_propagate_down(const int param_id, const bool value) {
    if (param_propagate_down_.size() <= param_id) {
      param_propagate_down_.resize(param_id + 1, true);
    }
    param_propagate_down_[param_id] = value;
  }



protected:


  /** The protobuf that stores the layer parameters */
  //protobuf文件中存储的layer参数,从protocal buffers格式的网络结构说明文件中读取
  LayerParameter layer_param_;

  /** The phase: TRAIN or TEST */
  Phase phase_;

  /** The vector that stores the learnable parameters as a set of blobs. */
  // 可学习参数层权值和偏置参数,使用向量是因为权值参数和偏置是分开保存在两个blob中的
  // 在基类layer中初始化(只是在描述文件定义了的情况下)
  vector<shared_ptr<Blob<Dtype> > > blobs_;

  /** Vector indicating whether to compute the diff of each param blob. */
  vector<bool> param_propagate_down_;


  /** The vector that indicates whether each top blob has a non-zero weight in
   *  the objective function.
   */
  // 非LossLayer为零,LossLayer中表示每个top blob计算的loss的权重
  vector<Dtype> loss_;

  /** 
   * @brief Using the CPU device, compute the layer output.纯虚函数,子类必须实现
   */
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) = 0;


  /**
   * @brief Using the GPU device, compute the layer output.
   *        Fall back to Forward_cpu() if unavailable.
   */
  /* void函数返回void函数
   * 为什么这么设置,是为了模板的统一性
   * template<class T>
   * T default_value()
   * {
        return T();
   * }
   * 其中T可以为void
   */
  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
    // LOG(WARNING) << "Using CPU code as backup.";
    return Forward_cpu(bottom, top);
  }

  /**
   * @brief Using the CPU device, compute the gradients for any parameters and
   *        for the bottom blobs if propagate_down is true.
   * 纯虚函数,派生类必须实现
   */
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down,
      const vector<Blob<Dtype>*>& bottom) = 0;
  /**
   * @brief Using the GPU device, compute the gradients for any parameters and
   *        for the bottom blobs if propagate_down is true.
   *        Fall back to Backward_cpu() if unavailable.
   */
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down,
      const vector<Blob<Dtype>*>& bottom) {
    // LOG(WARNING) << "Using CPU code as backup.";
    Backward_cpu(top, propagate_down, bottom);
  }

  /**
   * Called by SetUp to initialize the weights associated with any top blobs in
   * the loss function. Store non-zero loss weights in the diff blob.
   */
  inline void SetLossWeights(const vector<Blob<Dtype>*>& top) {
    const int num_loss_weights = layer_param_.loss_weight_size();
    if (num_loss_weights) {
      CHECK_EQ(top.size(), num_loss_weights) << "loss_weight must be "
          "unspecified or specified once per top blob.";
      for (int top_id = 0; top_id < top.size(); ++top_id) {
        const Dtype loss_weight = layer_param_.loss_weight(top_id);
        if (loss_weight == Dtype(0)) { continue; }
        this->set_loss(top_id, loss_weight);
        const int count = top[top_id]->count();
        Dtype* loss_multiplier = top[top_id]->mutable_cpu_diff();
        caffe_set(count, loss_weight, loss_multiplier);
      }
    }
  }


 private:

  /** Whether this layer is actually shared by other nets*/
  bool is_shared_;

  /** The mutex for sequential forward if this layer is shared
   *  类型为 boost::mutex 的 mutex 全局互斥对象
   */
  // 若该layer被shared,则需要这个mutex序列保持forward过程的正常运行
  shared_ptr<boost::mutex> forward_mutex_;

  /** Initialize forward_mutex_ */
  void InitMutex();
  /** Lock forward_mutex_ if this layer is shared */
  void Lock();
  /** Unlock forward_mutex_ if this layer is shared */
  void Unlock();

  DISABLE_COPY_AND_ASSIGN(Layer);


};  // class Layer



// Forward and backward wrappers. You should implement the cpu and
// gpu specific implementations instead, and should not change these
// functions.
// 前向传播和反向传播接口。 每个Layer的派生类都应该实现Forward_cpu()
template <typename Dtype>
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  // Lock during forward to ensure sequential forward
  Lock();
  Dtype loss = 0;
  Reshape(bottom, top);
  switch (Caffe::mode()) {
  case Caffe::CPU:
    Forward_cpu(bottom, top);

    // 计算loss
    for (int top_id = 0; top_id < top.size(); ++top_id) {
      if (!this->loss(top_id)) { continue; }
      const int count = top[top_id]->count();
      const Dtype* data = top[top_id]->cpu_data();
      const Dtype* loss_weights = top[top_id]->cpu_diff();
      loss += caffe_cpu_dot(count, data, loss_weights);
    }
    break;
  case Caffe::GPU:
    Forward_gpu(bottom, top);
#ifndef CPU_ONLY
    for (int top_id = 0; top_id < top.size(); ++top_id) {
      if (!this->loss(top_id)) { continue; }
      const int count = top[top_id]->count();
      const Dtype* data = top[top_id]->gpu_data();
      const Dtype* loss_weights = top[top_id]->gpu_diff();
      Dtype blob_loss = 0;
      caffe_gpu_dot(count, data, loss_weights, &blob_loss);
      loss += blob_loss;
    }
#endif
    break;
  default:
    LOG(FATAL) << "Unknown caffe mode.";
  }
  Unlock();
  return loss;
}


template <typename Dtype>
inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  switch (Caffe::mode()) {
  case Caffe::CPU:
    Backward_cpu(top, propagate_down, bottom);
    break;
  case Caffe::GPU:
    Backward_gpu(top, propagate_down, bottom);
    break;
  default:
    LOG(FATAL) << "Unknown caffe mode.";
  }
}

// Serialize LayerParameter to protocol buffer
//Layer的序列化函数,将layer的层说明参数layer_param_,
//层权值和偏置参数blobs_复制到LayerParameter对象,便于写到磁盘
template <typename Dtype>
void Layer<Dtype>::ToProto(LayerParameter* param, bool write_diff) {
  param->Clear();
  param->CopyFrom(layer_param_);
  param->clear_blobs();
  // 复制层权值和偏置参数blobs_
  for (int i = 0; i < blobs_.size(); ++i) {
    blobs_[i]->ToProto(param->add_blobs(), write_diff);
  }
}


}  // namespace caffe

layer.cpp

#include <boost/thread.hpp>
#include "caffe/layer.hpp"

namespace caffe {

template <typename Dtype>
void Layer<Dtype>::InitMutex() {
  forward_mutex_.reset(new boost::mutex());
}

template <typename Dtype>
void Layer<Dtype>::Lock() {
  if (IsShared()) {
    forward_mutex_->lock();
  }
}

template <typename Dtype>
void Layer<Dtype>::Unlock() {
  if (IsShared()) {
    forward_mutex_->unlock();
  }
}

//模板显示实例化
INSTANTIATE_CLASS(Layer);

}  // namespace caffe

BaseDataLayer

base_data_layer.hpp

namespace caffe {

/**
 * @brief Provides base for data layers that feed blobs to the Net.
 *
 * TODO(dox): thorough documentation for Forward and proto params.
 */
template <typename Dtype>
class BaseDataLayer : public Layer<Dtype> {
 public:
  explicit BaseDataLayer(const LayerParameter& param);
  // LayerSetUp: implements common data layer setup functionality, and calls
  // DataLayerSetUp to do special data layer setup for individual layer types.
  // This method may not be overridden except by the BasePrefetchingDataLayer.
  // 该函数只能被BasePrefetchingDataLayer层进行重载 
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  // Data layers should be shared by multiple solvers in parallel
  virtual inline bool ShareInParallel() const { return true; }
  // 数据层的初始化,该函数是虚函数,子类可以进行重载,从而初始化数据层  
  virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {}
  // Data layers have no bottoms, so reshaping is trivial.
  // 数据层是没有输入的(即bottoms),所以reshape只是形式
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {}

  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}

 protected:
  // 对输入的数据进行变换的参数,这其中包括是否需要mirror,是否需要crop 是否需要减去meanfile,是否需要scale
  TransformationParameter transform_param_;
  // // 实际执行数据变换类的指针(一个Transform函数加上参数即可完成对数据的变换,参数是数据
  shared_ptr<DataTransformer<Dtype> > data_transformer_;
  bool output_labels_;
};

// // Batch实际上就是一个data_和label_类标 
template <typename Dtype>
class Batch {
 public:
  Blob<Dtype> data_, label_;
};


// BasePrefetchingDataLayer层继承于BaseDataLayer  
template <typename Dtype>
class BasePrefetchingDataLayer :
    public BaseDataLayer<Dtype>, public InternalThread {
 public:
  explicit BasePrefetchingDataLayer(const LayerParameter& param);
  // LayerSetUp: implements common data layer setup functionality, and calls
  // DataLayerSetUp to do special data layer setup for individual layer types.
  // This method may not be overridden.
  void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);

  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);

  // Prefetches batches (asynchronously if to GPU memory)
  static const int PREFETCH_COUNT = 3;

 protected:
  virtual void InternalThreadEntry();
  // 多了load_batch函数,该函数是纯虚函数,继承该函数的类都需要实现的 
  virtual void load_batch(Batch<Dtype>* batch) = 0;
  // 还有prefetch数组,prefetch_free_,prefetch_full_  
  Batch<Dtype> prefetch_[PREFETCH_COUNT];
  BlockingQueue<Batch<Dtype>*> prefetch_free_;
  BlockingQueue<Batch<Dtype>*> prefetch_full_;

  // 处理之后的数据存放指针?
  Blob<Dtype> transformed_data_;
};

}  // namespace caffe

#endif  // CAFFE_DATA_LAYERS_HPP_

base_data_layer.cpp

namespace caffe {

template <typename Dtype>
BaseDataLayer<Dtype>::BaseDataLayer(const LayerParameter& param)
    : Layer<Dtype>(param),
      transform_param_(param.transform_param()) {
}

// 主要是 设置数据预处理参数, 调用DataLayerSetUp(真正的数据子类中实现)
template <typename Dtype>
void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  if (top.size() == 1) {
    output_labels_ = false;
  } else {
    output_labels_ = true;
  }
  data_transformer_.reset(
      new DataTransformer<Dtype>(transform_param_, this->phase_));
  data_transformer_->InitRand();
  // The subclasses should setup the size of bottom and top
  DataLayerSetUp(bottom, top);
}

template <typename Dtype>
BasePrefetchingDataLayer<Dtype>::BasePrefetchingDataLayer(
    const LayerParameter& param)
    : BaseDataLayer<Dtype>(param),
      prefetch_free_(), prefetch_full_() {
  for (int i = 0; i < PREFETCH_COUNT; ++i) {
    prefetch_free_.push(&prefetch_[i]);
  }
}

template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  BaseDataLayer<Dtype>::LayerSetUp(bottom, top);
  // Before starting the prefetch thread, we make cpu_data and gpu_data
  // calls so that the prefetch thread does not accidentally make simultaneous
  // cudaMalloc calls when the main thread is running. In some GPUs this
  // seems to cause failures if we do not so.
  // 对预取指针提前进行内存分配
  for (int i = 0; i < PREFETCH_COUNT; ++i) {
    prefetch_[i].data_.mutable_cpu_data();
    if (this->output_labels_) {
      prefetch_[i].label_.mutable_cpu_data();
    }
  }
#ifndef CPU_ONLY
  if (Caffe::mode() == Caffe::GPU) {
    for (int i = 0; i < PREFETCH_COUNT; ++i) {
      prefetch_[i].data_.mutable_gpu_data();
      if (this->output_labels_) {
        prefetch_[i].label_.mutable_gpu_data();
      }
    }
  }
#endif
  // 其它预取初始化操作
  DLOG(INFO) << "Initializing prefetch";
  this->data_transformer_->InitRand();
  StartInternalThread();
  DLOG(INFO) << "Prefetch initialized.";
}

template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
#ifndef CPU_ONLY
  cudaStream_t stream;
  if (Caffe::mode() == Caffe::GPU) {
    CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
  }
#endif

  try {
    while (!must_stop()) {
      // 获取预取指针
      Batch<Dtype>* batch = prefetch_free_.pop();
      // 加载数据
      load_batch(batch);
#ifndef CPU_ONLY
      if (Caffe::mode() == Caffe::GPU) {
        batch->data_.data().get()->async_gpu_push(stream);
        CUDA_CHECK(cudaStreamSynchronize(stream));
      }
#endif
      // 将预取好数据的指针放入prefetch_full_供后面使用
      prefetch_full_.push(batch);
    }
  } catch (boost::thread_interrupted&) {
    // Interrupted exception is expected on shutdown
  }
#ifndef CPU_ONLY
  if (Caffe::mode() == Caffe::GPU) {
    CUDA_CHECK(cudaStreamDestroy(stream));
  }
#endif
}

template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  // 从预取队列里拿出一个batch数据,如果拿不到就提示 "Data layer prefetch queue empty"
  Batch<Dtype>* batch = prefetch_full_.pop("Data layer prefetch queue empty");
  // Reshape to loaded data.
  top[0]->ReshapeLike(batch->data_);
  // Copy the data
  caffe_copy(batch->data_.count(), batch->data_.cpu_data(),
             top[0]->mutable_cpu_data());
  DLOG(INFO) << "Prefetch copied";
  if (this->output_labels_) {
    // Reshape to loaded labels.
    top[1]->ReshapeLike(batch->label_);
    // Copy the labels.
    caffe_copy(batch->label_.count(), batch->label_.cpu_data(),
        top[1]->mutable_cpu_data());
  }

  prefetch_free_.push(batch);
}

#ifdef CPU_ONLY
STUB_GPU_FORWARD(BasePrefetchingDataLayer, Forward);
#endif

INSTANTIATE_CLASS(BaseDataLayer);
INSTANTIATE_CLASS(BasePrefetchingDataLayer);

}  // namespace caffe

DataLayer

data_layer.hpp

namespace caffe {

/*
原始数据的输入层,处于整个网络的最底层,它可以
从数据库leveldb、 lmdb中读取数据。 作为网络的最底层,主
要实现数据格式的转换
 */

template <typename Dtype>
class DataLayer : public BasePrefetchingDataLayer<Dtype> {
 public:
  explicit DataLayer(const LayerParameter& param);
  virtual ~DataLayer();
  // 数据层初始化
  virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  // DataLayer uses DataReader instead for sharing for parallelism
  virtual inline bool ShareInParallel() const { return false; }
  virtual inline const char* type() const { return "Data"; }
  virtual inline int ExactNumBottomBlobs() const { return 0; }
  virtual inline int MinTopBlobs() const { return 1; }
  virtual inline int MaxTopBlobs() const { return 2; }

 protected:
  // 加载数据的层
  virtual void load_batch(Batch<Dtype>* batch);

  DataReader reader_;
};

}  // namespace caffe

#endif  // CAFFE_DATA_LAYER_HPP_

data_layer.cpp

namespace caffe {

template <typename Dtype>
DataLayer<Dtype>::DataLayer(const LayerParameter& param)
  : BasePrefetchingDataLayer<Dtype>(param),
    reader_(param) { //reader_是数据库句柄
}

template <typename Dtype>
DataLayer<Dtype>::~DataLayer() {
  this->StopInternalThread();
}

// 主要是设置预取数据的形状
template <typename Dtype>
void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const int batch_size = this->layer_param_.data_param().batch_size();
  // Read a data point, and use it to initialize the top blob.
  // 读取一个样本?
  Datum& datum = *(reader_.full().peek());

  // Use data_transformer to infer the expected blob shape from datum.
  vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
  this->transformed_data_.Reshape(top_shape);
  // Reshape top[0] and prefetch_data according to the batch_size.
  top_shape[0] = batch_size;
  top[0]->Reshape(top_shape);
  for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
    this->prefetch_[i].data_.Reshape(top_shape);
  }
  LOG(INFO) << "output data size: " << top[0]->num() << ","
      << top[0]->channels() << "," << top[0]->height() << ","
      << top[0]->width();
  // label
  if (this->output_labels_) {
    vector<int> label_shape(1, batch_size);
    top[1]->Reshape(label_shape);
    for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
      this->prefetch_[i].label_.Reshape(label_shape);
    }
  }
}

// 主要是对数据进行预处理
// This function is called on prefetch thread
template<typename Dtype>
void DataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
  CPUTimer batch_timer;
  batch_timer.Start();
  double read_time = 0;
  double trans_time = 0;
  CPUTimer timer;
  CHECK(batch->data_.count());
  CHECK(this->transformed_data_.count());

  // Reshape according to the first datum of each batch
  // on single input batches allows for inputs of varying dimension.
  const int batch_size = this->layer_param_.data_param().batch_size();
  Datum& datum = *(reader_.full().peek());
  // Use data_transformer to infer the expected blob shape from datum.
  vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
  this->transformed_data_.Reshape(top_shape);
  // Reshape batch according to the batch_size.
  top_shape[0] = batch_size;
  batch->data_.Reshape(top_shape);

  Dtype* top_data = batch->data_.mutable_cpu_data();
  Dtype* top_label = NULL;  // suppress warnings about uninitialized variables

  if (this->output_labels_) {
    top_label = batch->label_.mutable_cpu_data();
  }
  for (int item_id = 0; item_id < batch_size; ++item_id) {
    timer.Start();
    // get a datum
    Datum& datum = *(reader_.full().pop("Waiting for data"));
    read_time += timer.MicroSeconds();
    timer.Start();
    // Apply data transformations (mirror, scale, crop...)
    int offset = batch->data_.offset(item_id);
    this->transformed_data_.set_cpu_data(top_data + offset);
    this->data_transformer_->Transform(datum, &(this->transformed_data_));
    // Copy label.
    if (this->output_labels_) {
      top_label[item_id] = datum.label();
    }
    trans_time += timer.MicroSeconds();

    reader_.free().push(const_cast<Datum*>(&datum));
  }
  timer.Stop();
  batch_timer.Stop();
  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
  DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";
  DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}

INSTANTIATE_CLASS(DataLayer);
REGISTER_LAYER_CLASS(Data);

}  // namespace caffe

DummyDataLayer

dummy_dataLayer.hpp

/** 
 * @brief Provides data to the Net generated by a Filler. 
 * 
 * TODO(dox): thorough documentation for Forward and proto params. 
 * 该类是继承自Layer,通过Filler产生数据 
 */  
template <typename Dtype>  
class DummyDataLayer : public Layer<Dtype> {  
 public:  
  explicit DummyDataLayer(const LayerParameter& param)  
      : Layer<Dtype>(param) {}  
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top);  
  // Data layers should be shared by multiple solvers in parallel  
  virtual inline bool ShareInParallel() const { return true; }  
  // Data layers have no bottoms, so reshaping is trivial.  
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top) {}  

  virtual inline const char* type() const { return "DummyData"; }  
  virtual inline int ExactNumBottomBlobs() const { return 0; }  
  virtual inline int MinTopBlobs() const { return 1; }  

 protected:  
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top);  
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,  
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}  
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,  
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}  

  vector<shared_ptr<Filler<Dtype> > > fillers_;  
  vector<bool> refill_;  
};  

dummy_dataLayer.cpp

#include <vector>  

#include "caffe/filler.hpp"  
#include "caffe/layer.hpp"  
#include "caffe/vision_layers.hpp"  

namespace caffe {  

template <typename Dtype>  
void DummyDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top) {  
  // 输出有几个  
  const int num_top = top.size();  
  // 获取该层的参数  
  const DummyDataParameter& param = this->layer_param_.dummy_data_param();  
  // 有几个filler  
  const int num_data_filler = param.data_filler_size();  
  // 检查filler的个数,要么为0、1、或者等于输出的个数  
  CHECK(num_data_filler == 0 || num_data_filler == 1 ||  
        num_data_filler == num_top)  
      << "Number of data fillers must be 0, 1 or equal to the number of tops: "  
      << num_top << "; you specified " << num_data_filler << " data fillers.";  

  // 判断是否全部为0  
  const bool legacy_dims = param.num_size() || param.channels_size() ||  
                           param.height_size() || param.width_size();  
  // 下面就是检查参数是不是满足要求,1或者0或者等于num_top  
  if (legacy_dims) {// 如果不是全部为0  
    CHECK_EQ(0, param.shape_size())  
        << "Both shape and legacy fields were specified";  
    // Using deprecated 4D output dim specifiers.  
    CHECK(param.num_size() == 1 || param.num_size() == num_top)  
        << "Must specify 'num' once, or once per top blob "  
        << "(" << num_top << "); specified " << param.num_size() << ".";  
    CHECK(param.channels_size() == 1 || param.channels_size() == num_top)  
        << "Must specify 'channels' once, or once per top blob "  
        << "(" << num_top << "); specified " << param.channels_size() << ".";  
    CHECK(param.height_size() == 1 || param.height_size() == num_top)  
        << "Must specify 'height' once, or once per top blob "  
        << "(" << num_top << "); specified " << param.height_size() << ".";  
    CHECK(param.width_size() == 1 || param.width_size() == num_top)  
        << "Must specify 'width' once, or once per top blob "  
        << "(" << num_top << "); specified " << param.width_size() << ".";  
  } else {  
    CHECK(param.shape_size() == 1 || param.shape_size() == num_top)  
        << "Must specify 'shape' once, or once per top blob "  
        << "(" << num_top << "); specified " << param.shape_size() << ".";  
  }  
  // refill_[i] tells Forward i whether or not to actually refill top Blob i.  
  // If refill_[i] is false, Forward does nothing for Blob i. We use this to  
  // avoid wastefully refilling "constant" Blobs in every forward pass.  
  // We first fill refill_ in with the INVERSE of its final values.  
  // The first time we run Forward from the LayerSetUp method, we'll fill only  
  // Blobs for which refill_ is normally false.  These Blobs will never be  
  // filled again.  
  // refill_表明是不是需要填充Blob,如果refill_[i]=false,那么就不会Blob i做任何事  
  //   
  refill_.clear();  
  fillers_.clear();  
  // 要么是0,要么是1  
  if (num_data_filler <= 1) {  
    // 定义了生成数据的参数  
    // 比如均值、方差等,详细请看其定义  
    FillerParameter filler_param;  
    if (num_data_filler == 0) {  
      // 如果没有指定,那么就是常数值填充  
      filler_param.set_type("constant");  
      filler_param.set_value(0);  
    } else {  
      // 否则复制filler到filler_param  
      filler_param.CopyFrom(param.data_filler(0));  
    }  
    // Refill on each iteration iff not using a constant filler,  
    // but use the inverse of this rule for the first run.  
    // 如果  
    refill_.resize(1);  
    refill_[0] = (strcmp(filler_param.type().c_str(), "constant") == 0);  
    fillers_.resize(1);  
    // 实例化填充器  
    fillers_[0].reset(GetFiller<Dtype>(filler_param));  
  } else {// 如果等于=num_top  
    refill_.resize(num_top);  
    fillers_.resize(num_top);  
    for (int i = 0; i < num_top; ++i) {  
      fillers_[i].reset(GetFiller<Dtype>(param.data_filler(i)));  
      // Refill on each iteration iff not using a constant filler,  
      // but use the inverse of this rule for the first run.  
      refill_[i] =  
          (strcmp(param.data_filler(i).type().c_str(), "constant") == 0);  
    }  
  }  

  // 改变形状  
  for (int i = 0; i < num_top; ++i) {  
    if (legacy_dims) {  
      const int num = (param.num_size() == 1) ? param.num(0) : param.num(i);  
      const int channels =  
          (param.channels_size() == 1) ? param.channels(0) : param.channels(i);  
      const int height =  
          (param.height_size() == 1) ? param.height(0) : param.height(i);  
      const int width =  
          (param.width_size() == 1) ? param.width(0) : param.width(i);  
      top[i]->Reshape(num, channels, height, width);  
    } else {  
      const int shape_index = (param.shape_size() == 1) ? 0 : i;  
      top[i]->Reshape(param.shape(shape_index));  
    }  
  }  
  // Run Forward once, with refill_ inverted, to fill the constant Blobs.  
  // 执行forward_cpu  
  this->Forward(bottom, top);  
  // Invert the inverted refill_ values to refill the desired (non-constant)  
  // Blobs in every usual forward pass.  
  for (int i = 0; i < refill_.size(); ++i) {  
    refill_[i] = !refill_[i];  
  }  
}  

// Forward里调用了该函数  
template <typename Dtype>  
void DummyDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top) {  
      // 调用fillers_来进行錐ill  
  for (int i = 0; i < top.size(); ++i) {  
    const int filler_id = (fillers_.size() > 1) ? i : 0;  
    if (refill_[filler_id]) {  
      fillers_[filler_id]->Fill(top[i]);  
    }  
  }  
}  

// 初始化类  
// 注册类  
INSTANTIATE_CLASS(DummyDataLayer);  
REGISTER_LAYER_CLASS(DummyData);  

}  // namespace caffe  

HDF5DataLayer

hdf5_data_layer.hpp

/** 
 * @brief Provides data to the Net from HDF5 files. 
 * 
 * TODO(dox): thorough documentation for Forward and proto params. 
 * 从HDF5中读取 
 */  
template <typename Dtype>  
class HDF5DataLayer : public Layer<Dtype> {  
 public:  
  explicit HDF5DataLayer(const LayerParameter& param)  
      : Layer<Dtype>(param) {}  
  virtual ~HDF5DataLayer();  
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top);  
  // Data layers should be shared by multiple solvers in parallel  
  virtual inline bool ShareInParallel() const { return true; }  
  // Data layers have no bottoms, so reshaping is trivial.  
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top) {}  

  virtual inline const char* type() const { return "HDF5Data"; }  
  virtual inline int ExactNumBottomBlobs() const { return 0; }  
  virtual inline int MinTopBlobs() const { return 1; }  

 protected:  
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top);  
  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top);  
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,  
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}  
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,  
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}  
  // 从HDF5文件读取数据  
  virtual void LoadHDF5FileData(const char* filename);  

  std::vector<std::string> hdf_filenames_;  
  unsigned int num_files_;  
  unsigned int current_file_;  
  hsize_t current_row_;  
  std::vector<shared_ptr<Blob<Dtype> > > hdf_blobs_;  
  std::vector<unsigned int> data_permutation_;  
  std::vector<unsigned int> file_permutation_;  
};  

hdf5_data_layer.cpp

/* 
TODO: 
- load file in a separate thread ("prefetch") 
- can be smarter about the memcpy call instead of doing it row-by-row 
  :: use util functions caffe_copy, and Blob->offset() 
  :: don't forget to update hdf5_daa_layer.cu accordingly 
- add ability to shuffle filenames if flag is set 
*/  
#include <fstream>  // NOLINT(readability/streams)  
#include <string>  
#include <vector>  

#include "hdf5.h"  
#include "hdf5_hl.h"  
#include "stdint.h"  

#include "caffe/data_layers.hpp"  
#include "caffe/layer.hpp"  
#include "caffe/util/hdf5.hpp"  

namespace caffe {  

template <typename Dtype>  
HDF5DataLayer<Dtype>::~HDF5DataLayer<Dtype>() { }  

// Load data and label from HDF5 filename into the class property blobs.  
// 读取HDF5文件数据到hdf_blobs  
template <typename Dtype>  
void HDF5DataLayer<Dtype>::LoadHDF5FileData(const char* filename) {  
  DLOG(INFO) << "Loading HDF5 file: " << filename;  
  // 打开文件  
  hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);  
  if (file_id < 0) {  
    LOG(FATAL) << "Failed opening HDF5 file: " << filename;  
  }  

  int top_size = this->layer_param_.top_size();  
  hdf_blobs_.resize(top_size);  

  const int MIN_DATA_DIM = 1;  
  const int MAX_DATA_DIM = INT_MAX;  

  for (int i = 0; i < top_size; ++i) {  
    hdf_blobs_[i] = shared_ptr<Blob<Dtype> >(new Blob<Dtype>());  
    // message LayerParameter {  
    // optional string name = 1; // the layer name  
    // optional string type = 2; // the layer type  
    // repeated string bottom = 3; // the name of each bottom blob  
    // repeated string top = 4; // the name of each top blob  
    hdf5_load_nd_dataset(file_id, this->layer_param_.top(i).c_str(),  
        MIN_DATA_DIM, MAX_DATA_DIM, hdf_blobs_[i].get());  
  }  

  herr_t status = H5Fclose(file_id);  
  CHECK_GE(status, 0) << "Failed to close HDF5 file: " << filename;  

  // MinTopBlobs==1 guarantees at least one top blob  
  CHECK_GE(hdf_blobs_[0]->num_axes(), 1) << "Input must have at least 1 axis.";  
  const int num = hdf_blobs_[0]->shape(0);  
  for (int i = 1; i < top_size; ++i) {  
    CHECK_EQ(hdf_blobs_[i]->shape(0), num);  
  }  
  // Default to identity permutation.  
  data_permutation_.clear();  
  data_permutation_.resize(hdf_blobs_[0]->shape(0));  
  for (int i = 0; i < hdf_blobs_[0]->shape(0); i++)  
    data_permutation_[i] = i;  

  // Shuffle if needed.  
  // 将数据索引映射表进行shuffle  
  if (this->layer_param_.hdf5_data_param().shuffle()) {  
    std::random_shuffle(data_permutation_.begin(), data_permutation_.end());  
    DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0)  
               << " rows (shuffled)";  
  } else {  
    DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0) << " rows";  
  }  
}  

// 主要的功能就是读取HDF5文件,并且设置top blob的形状  
template <typename Dtype>  
void HDF5DataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top) {  
  // Refuse transformation parameters since HDF5 is totally generic.  
  CHECK(!this->layer_param_.has_transform_param()) <<  
      this->type() << " does not transform data.";  
  // Read the source to parse the filenames.  
  // 读取HDF列表文件  
  const string& source = this->layer_param_.hdf5_data_param().source();  
  LOG(INFO) << "Loading list of HDF5 filenames from: " << source;  
  hdf_filenames_.clear();  
  std::ifstream source_file(source.c_str());  
  if (source_file.is_open()) {  
    std::string line;  
    while (source_file >> line) {  
      hdf_filenames_.push_back(line);  
    }  
  } else {  
    LOG(FATAL) << "Failed to open source file: " << source;  
  }  
  source_file.close();  
  num_files_ = hdf_filenames_.size();  
  current_file_ = 0;  
  LOG(INFO) << "Number of HDF5 files: " << num_files_;  
  CHECK_GE(num_files_, 1) << "Must have at least 1 HDF5 filename listed in "  
    << source;  

  file_permutation_.clear();  
  file_permutation_.resize(num_files_);  
  // 文件名字是否shuffle  
  // Default to identity permutation.  
  for (int i = 0; i < num_files_; i++) {  
    file_permutation_[i] = i;  
  }  

  // Shuffle if needed.  
  if (this->layer_param_.hdf5_data_param().shuffle()) {  
    std::random_shuffle(file_permutation_.begin(), file_permutation_.end());  
  }  

  // Load the first HDF5 file and initialize the line counter.  
  // 从给定的文件名列表中的第一个文件名读取数据到hdf_blobs  
  LoadHDF5FileData(hdf_filenames_[file_permutation_[current_file_]].c_str());  
  // 设置行指针  
  current_row_ = 0;  

  // Reshape blobs.  
  // 根据读取的hdf_blobs形状改变top的形状  
  const int batch_size = this->layer_param_.hdf5_data_param().batch_size();  
  const int top_size = this->layer_param_.top_size();  
  vector<int> top_shape;  
  for (int i = 0; i < top_size; ++i) {  
    top_shape.resize(hdf_blobs_[i]->num_axes());  
    top_shape[0] = batch_size;  
    for (int j = 1; j < top_shape.size(); ++j) {  
      top_shape[j] = hdf_blobs_[i]->shape(j);  
    }  
    top[i]->Reshape(top_shape);  
  }  
}  

template <typename Dtype>  
void HDF5DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top) {  
  const int batch_size = this->layer_param_.hdf5_data_param().batch_size();  
  for (int i = 0; i < batch_size; ++i, ++current_row_) {  
    // 因为SetUp里面已经读取了第一个文件的数据了  
    if (current_row_ == hdf_blobs_[0]->shape(0)) {  
      if (num_files_ > 1) {// 如果文件数目大于1  
        ++current_file_;  
        // 如果current_file是最后一个文件的索引编号则  
        if (current_file_ == num_files_) {  
          current_file_ = 0;// 重置  
          // 混淆文件索引,再来一遍  
          if (this->layer_param_.hdf5_data_param().shuffle()) {  
            std::random_shuffle(file_permutation_.begin(),  
                                file_permutation_.end());  
          }  
          DLOG(INFO) << "Looping around to first file.";  
        }  
        // 读取数据到hdf_blobs  
        LoadHDF5FileData(  
            hdf_filenames_[file_permutation_[current_file_]].c_str());  
      }// end of if (current_row_  
      current_row_ = 0;  
      // 混淆数据索引  
      if (this->layer_param_.hdf5_data_param().shuffle())  
        std::random_shuffle(data_permutation_.begin(), data_permutation_.end());  
    }  
    // 复制数据到top  
    for (int j = 0; j < this->layer_param_.top_size(); ++j) {  
      int data_dim = top[j]->count() / top[j]->shape(0);  
      caffe_copy(data_dim,  
          &hdf_blobs_[j]->cpu_data()[data_permutation_[current_row_]  
            * data_dim], &top[j]->mutable_cpu_data()[i * data_dim]);  
    }  
  }  
}  

#ifdef CPU_ONLY  
STUB_GPU_FORWARD(HDF5DataLayer, Forward);  
#endif  

INSTANTIATE_CLASS(HDF5DataLayer);  
REGISTER_LAYER_CLASS(HDF5Data);  

}  // namespace caffe  

HDF5OutputLayer

hdf5_output_layer.hpp

/** 
 * @brief Write blobs to disk as HDF5 files. 
 * 
 * TODO(dox): thorough documentation for Forward and proto params. 
 * 将数据写入到HDF5文件 
 */  
template <typename Dtype>  
class HDF5OutputLayer : public Layer<Dtype> {  
 public:  
  explicit HDF5OutputLayer(const LayerParameter& param)  
      : Layer<Dtype>(param), file_opened_(false) {}  
  virtual ~HDF5OutputLayer();  
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top);  
  // Data layers should be shared by multiple solvers in parallel  
  virtual inline bool ShareInParallel() const { return true; }  
  // Data layers have no bottoms, so reshaping is trivial.  
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top) {}  

  virtual inline const char* type() const { return "HDF5Output"; }  
  // TODO: no limit on the number of blobs  
  virtual inline int ExactNumBottomBlobs() const { return 2; }  
  virtual inline int ExactNumTopBlobs() const { return 0; }  

  inline std::string file_name() const { return file_name_; }  

 protected:  
  // HDF5输出层不前向传也不反向传,只是将前一层传递过来的数据写入HDF5文件  
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top);  
  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top);  
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,  
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);  
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,  
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);  
  // 将bottom的数据存储到文件  
  virtual void SaveBlobs();  

  bool file_opened_;  
  std::string file_name_;  
  hid_t file_id_;  
  Blob<Dtype> data_blob_;  
  Blob<Dtype> label_blob_;  
};  

hdf5_output_layer.cpp

#include <vector>  

#include "hdf5.h"  
#include "hdf5_hl.h"  

#include "caffe/blob.hpp"  
#include "caffe/common.hpp"  
#include "caffe/layer.hpp"  
#include "caffe/util/hdf5.hpp"  
#include "caffe/vision_layers.hpp"  

namespace caffe {  

template <typename Dtype>  
void HDF5OutputLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,  
    const vector<Blob<Dtype>*>& top) {  
  // 参数文件中的文件名  
  file_name_ = this->layer_param_.hdf5_output_param().file_name();  
  // 打开文件  
  file_id_ = H5Fcreate(file_name_.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT,  
                       H5P_DEFAULT);  
  CHECK_GE(file_id_, 0) << "Failed to open HDF5 file" << file_name_;  
  file_opened_ = true;// 设置文件打开标志  
}  

template <typename Dtype>  
HDF5OutputLayer<Dtype>::~HDF5OutputLayer<Dtype>() {  
  if (file_opened_) {  
    herr_t status = H5Fclose(file_id_);  
    CHECK_GE(status, 0) << "Failed to close HDF5 file " << file_name_;  
  }  
}  

// 将blob存放到hdf5文件  
// 数据和类标  
template <typename Dtype>  
void HDF5OutputLayer<Dtype>::SaveBlobs() {  
  // TODO: no limit on the number of blobs  
  LOG(INFO) << "Saving HDF5 file " << file_name_;  
  CHECK_EQ(data_blob_.num(), label_blob_.num()) <<  
      "data blob and label blob must have the same batch size";  
  hdf5_save_nd_dataset(file_id_, HDF5_DATA_DATASET_NAME, data_blob_);  
  hdf5_save_nd_dataset(file_id_, HDF5_DATA_LABEL_NAME, label_blob_);  
  LOG(INFO) << "Successfully saved " << data_blob_.num() << " rows";  
}  

// 实际上就是从bottom将输入过来的数据存放到hdf5文件  
template <typename Dtype>  
void HDF5OutputLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top) {  
  CHECK_GE(bottom.size(), 2);  
  CHECK_EQ(bottom[0]->num(), bottom[1]->num());  
  // 改变data_blob_的形状以及label_blob_的形状  
  data_blob_.Reshape(bottom[0]->num(), bottom[0]->channels(),  
                     bottom[0]->height(), bottom[0]->width());  
  label_blob_.Reshape(bottom[1]->num(), bottom[1]->channels(),  
                     bottom[1]->height(), bottom[1]->width());  
  const int data_datum_dim = bottom[0]->count() / bottom[0]->num();  
  const int label_datum_dim = bottom[1]->count() / bottom[1]->num();  

  // 从bottom[0]和[1]复制到data_blob_和label_blob_  
  for (int i = 0; i < bottom[0]->num(); ++i) {  
    caffe_copy(data_datum_dim, &bottom[0]->cpu_data()[i * data_datum_dim],  
        &data_blob_.mutable_cpu_data()[i * data_datum_dim]);  
    caffe_copy(label_datum_dim, &bottom[1]->cpu_data()[i * label_datum_dim],  
        &label_blob_.mutable_cpu_data()[i * label_datum_dim]);  
  }  
  // 存放到文件  
  SaveBlobs();  
}  

// 不反传  
template <typename Dtype>  
void HDF5OutputLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,  
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {  
  return;  
}  

#ifdef CPU_ONLY  
STUB_GPU(HDF5OutputLayer);  
#endif  

INSTANTIATE_CLASS(HDF5OutputLayer);  
REGISTER_LAYER_CLASS(HDF5Output);  

}  // namespace caffe 

ImageDataLayer

image_data_layer.hpp

/** 
 * @brief Provides data to the Net from image files. 
 * 
 * TODO(dox): thorough documentation for Forward and proto params. 
 * 从图像文件中读取数据,这个应该比较常用 
 * 从一个列表文件读取图像的路径和类标,列表文件的路径在层参数的配置文件中指定 
 */  
template <typename Dtype>  
class ImageDataLayer : public BasePrefetchingDataLayer<Dtype> {  
 public:  
  explicit ImageDataLayer(const LayerParameter& param)  
      : BasePrefetchingDataLayer<Dtype>(param) {}  
  virtual ~ImageDataLayer();  
  virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top);  

  virtual inline const char* type() const { return "ImageData"; }  
  virtual inline int ExactNumBottomBlobs() const { return 0; }  
  virtual inline int ExactNumTopBlobs() const { return 2; }  

 protected:  
  shared_ptr<Caffe::RNG> prefetch_rng_;  
  // 对图像索引进行打乱  
  virtual void ShuffleImages();  
  virtual void load_batch(Batch<Dtype>* batch);  

  // 图像路径和类标的vector  
  vector<std::pair<std::string, int> > lines_;  
  // 随机跳过的图像的个数,也就是调过之后的一开始的图像的id  
  int lines_id_;  
};  

image_data_layer.cpp

#ifdef USE_OPENCV  
#include <opencv2/core/core.hpp>  

#include <fstream>  // NOLINT(readability/streams)  
#include <iostream>  // NOLINT(readability/streams)  
#include <string>  
#include <utility>  
#include <vector>  

#include "caffe/data_layers.hpp"  
#include "caffe/layer.hpp"  
#include "caffe/util/benchmark.hpp"  
#include "caffe/util/io.hpp"  
#include "caffe/util/math_functions.hpp"  
#include "caffe/util/rng.hpp"  

namespace caffe {  

template <typename Dtype>  
ImageDataLayer<Dtype>::~ImageDataLayer<Dtype>() {  
  this->StopInternalThread();  
}  

template <typename Dtype>  
void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top) {  
  // 根据参数文件设置参数  
  // 图像的高度、宽度、是否彩色图像、图像目录  
  const int new_height = this->layer_param_.image_data_param().new_height();  
  const int new_width  = this->layer_param_.image_data_param().new_width();  
  const bool is_color  = this->layer_param_.image_data_param().is_color();  
  string root_folder = this->layer_param_.image_data_param().root_folder();  

  // 当前只支持读取高度和宽度同样大小的图像  
  CHECK((new_height == 0 && new_width == 0) ||  
      (new_height > 0 && new_width > 0)) << "Current implementation requires "  
      "new_height and new_width to be set at the same time.";  

  // Read the file with filenames and labels  
  // 读取存放图像文件名和类标的列表文件  
  const string& source = this->layer_param_.image_data_param().source();  
  LOG(INFO) << "Opening file " << source;  
  std::ifstream infile(source.c_str());  
  string filename;  
  int label;  
  // lines_存放文件名和类标的pair  
  while (infile >> filename >> label) {  
    lines_.push_back(std::make_pair(filename, label));  
  }  

  // 是否需要打乱文件的顺序  
  if (this->layer_param_.image_data_param().shuffle()) {  
    // randomly shuffle data  
    LOG(INFO) << "Shuffling data";  
    const unsigned int prefetch_rng_seed = caffe_rng_rand();  
    prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));  
    ShuffleImages();  
  }  
  LOG(INFO) << "A total of " << lines_.size() << " images.";  

  // 随机跳过的图像,调过的图像个数在[0, rand_skip-1]之间  
  lines_id_ = 0;  
  // Check if we would need to randomly skip a few data points  
  // 如果参数中的rand_skip大于1,则随机跳过[0,rand_skip-1]个图片  
  //   
  if (this->layer_param_.image_data_param().rand_skip()) {  
    unsigned int skip = caffe_rng_rand() %  
        this->layer_param_.image_data_param().rand_skip();  
    LOG(INFO) << "Skipping first " << skip << " data points.";  
    CHECK_GT(lines_.size(), skip) << "Not enough points to skip";  
    lines_id_ = skip;  
  }  
  // Read an image, and use it to initialize the top blob.  
  // 读取文件名到Mat  
  cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,  
                                    new_height, new_width, is_color);  
  CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;  
  // Use data_transformer to infer the expected blob shape from a cv_image.  
  // 对数据的形状进行推断  
  vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);  
  // 设置transformed_data_的形状  
  this->transformed_data_.Reshape(top_shape);  
  // Reshape prefetch_data and top[0] according to the batch_size.  
  // 设置batch_size  
  const int batch_size = this->layer_param_.image_data_param().batch_size();  
  CHECK_GT(batch_size, 0) << "Positive batch size required";  
  top_shape[0] = batch_size;  
  // 设置预取数组中数据的形状  
  for (int i = 0; i < this->PREFETCH_COUNT; ++i) {  
    this->prefetch_[i].data_.Reshape(top_shape);  
  }  
  // 设置输出的数据的形状  
  top[0]->Reshape(top_shape);  

  LOG(INFO) << "output data size: " << top[0]->num() << ","  
      << top[0]->channels() << "," << top[0]->height() << ","  
      << top[0]->width();  
  // label  
  // 设置输出的类标的形状  
  vector<int> label_shape(1, batch_size);  
  top[1]->Reshape(label_shape);  
  // 设置预取数组中类标的形状  
  for (int i = 0; i < this->PREFETCH_COUNT; ++i) {  
    this->prefetch_[i].label_.Reshape(label_shape);  
  }  
}  

// 产生打乱图像顺序的数组  
template <typename Dtype>  
void ImageDataLayer<Dtype>::ShuffleImages() {  
  caffe::rng_t* prefetch_rng =  
      static_cast<caffe::rng_t*>(prefetch_rng_->generator());  
  shuffle(lines_.begin(), lines_.end(), prefetch_rng);  
}  

// This function is called on prefetch thread  
// 该函数会被内部的线程调用  
template <typename Dtype>  
void ImageDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {  
  CPUTimer batch_timer;  
  batch_timer.Start();  
  double read_time = 0;  
  double trans_time = 0;  
  CPUTimer timer;  
  CHECK(batch->data_.count());  
  CHECK(this->transformed_data_.count());  
  // 获取层参数,具体参见层参数的定义的解释  
  ImageDataParameter image_data_param = this->layer_param_.image_data_param();  
  const int batch_size = image_data_param.batch_size();  
  const int new_height = image_data_param.new_height();  
  const int new_width = image_data_param.new_width();  
  const bool is_color = image_data_param.is_color();  
  string root_folder = image_data_param.root_folder();  

  // Reshape according to the first image of each batch  
  // on single input batches allows for inputs of varying dimension.  
  // 读取跳过之后的第一幅图像,然后根据该图像设置相撞  
  cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,  
      new_height, new_width, is_color);  
  CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;  
  // Use data_transformer to infer the expected blob shape from a cv_img.  
  // 推断图像形状  
  vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);  
  // 设置transformed_data_形状  
  this->transformed_data_.Reshape(top_shape);  
  // Reshape batch according to the batch_size.  
  // 设置batch_size  
  top_shape[0] = batch_size;  
  batch->data_.Reshape(top_shape);  

  Dtype* prefetch_data = batch->data_.mutable_cpu_data();  
  Dtype* prefetch_label = batch->label_.mutable_cpu_data();  

  // datum scales  
  // 读取一批图像,并进行预处理  
  const int lines_size = lines_.size();  
  for (int item_id = 0; item_id < batch_size; ++item_id) {  
    // get a blob  
    timer.Start();  
    CHECK_GT(lines_size, lines_id_);  
    cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,  
        new_height, new_width, is_color);  
    CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;  
    read_time += timer.MicroSeconds();  
    timer.Start();  
    // Apply transformations (mirror, crop...) to the image  
    // 进行预处理  

    // 根据图像的批次获得图像数据的偏移量  
    int offset = batch->data_.offset(item_id);  
    // 设置图像数据的指针到transformed_data_  
    this->transformed_data_.set_cpu_data(prefetch_data + offset);  
    // 进行预处理  
    this->data_transformer_->Transform(cv_img, &(this->transformed_data_));  
    trans_time += timer.MicroSeconds();//统计预处理时间  

    // 复制类标到prefetch_label  
    prefetch_label[item_id] = lines_[lines_id_].second;  
    // go to the next iter  
    lines_id_++;  
    // 是否是图像目录中的最后一个图像  
    if (lines_id_ >= lines_size) {  
      // We have reached the end. Restart from the first.  
      DLOG(INFO) << "Restarting data prefetching from start.";  
      lines_id_ = 0;  
      // 打乱图像索引的顺序  
      if (this->layer_param_.image_data_param().shuffle()) {  
        ShuffleImages();  
      }  
    }  
  }  
  batch_timer.Stop();  
  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";  
  DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";  
  // 预处理时间  
  DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";  
}  

INSTANTIATE_CLASS(ImageDataLayer);  
REGISTER_LAYER_CLASS(ImageData);  

}  // namespace caffe  
#endif  // USE_OPENCV  

MemoryDataLayer

memory_data_layer.hpp

/** 
 * @brief Provides data to the Net from memory. 
 * 从内存中读取数据,这里指已经从数据文件或者图像文件中读取到了数据,然后输入到该层 
 * TODO(dox): thorough documentation for Forward and proto params. 
 */  
template <typename Dtype>  
class MemoryDataLayer : public BaseDataLayer<Dtype> {  
 public:  
  explicit MemoryDataLayer(const LayerParameter& param)  
      : BaseDataLayer<Dtype>(param), has_new_data_(false) {}  
  virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top);  

  virtual inline const char* type() const { return "MemoryData"; }  
  virtual inline int ExactNumBottomBlobs() const { return 0; }  
  virtual inline int ExactNumTopBlobs() const { return 2; }  

  // 将内存中的数据加入added_data_和added_label_(数据和类标)  
  virtual void AddDatumVector(const vector<Datum>& datum_vector);  
#ifdef USE_OPENCV  
  // 如果有opencv则将opencv读取到的Mat,并且将labels加入added_data_和added_label_(数据和类标)  
  virtual void AddMatVector(const vector<cv::Mat>& mat_vector,  
      const vector<int>& labels);  
#endif  // USE_OPENCV  

  // Reset should accept const pointers, but can't, because the memory  
  //  will be given to Blob, which is mutable  
  // Reset函数实际上是将data、label、以及batchsize(n)设置到内部的变量里面去  
  void Reset(Dtype* data, Dtype* label, int n);  
  void set_batch_size(int new_size);  

  int batch_size() { return batch_size_; }  
  int channels() { return channels_; }  
  int height() { return height_; }  
  int width() { return width_; }  

 protected:  
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top);  

  int batch_size_, channels_, height_, width_, size_;  
  Dtype* data_;  
  Dtype* labels_;  
  // batch_size  
  int n_;  
  size_t pos_;  
  // 内部的数据和类标  
  Blob<Dtype> added_data_;  
  Blob<Dtype> added_label_;  
  // 是否有新的数据  
  bool has_new_data_;  
};  

memory_data_layer.cpp

#ifdef USE_OPENCV  
#include <opencv2/core/core.hpp>  
#endif  // USE_OPENCV  

#include <vector>  

#include "caffe/data_layers.hpp"  
#include "caffe/layer.hpp"  
#include "caffe/util/io.hpp"  

namespace caffe {  

template <typename Dtype>  
void MemoryDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,  
     const vector<Blob<Dtype>*>& top) {  
  // 参数文件获取参数  
  batch_size_ = this->layer_param_.memory_data_param().batch_size();  
  channels_ = this->layer_param_.memory_data_param().channels();  
  height_ = this->layer_param_.memory_data_param().height();  
  width_ = this->layer_param_.memory_data_param().width();  
  size_ = channels_ * height_ * width_;  
  CHECK_GT(batch_size_ * size_, 0) <<  
      "batch_size, channels, height, and width must be specified and"  
      " positive in memory_data_param";  
  // 设置top的形状  
  vector<int> label_shape(1, batch_size_);  
  top[0]->Reshape(batch_size_, channels_, height_, width_);  
  top[1]->Reshape(label_shape);  
  // 设置内部变量added_data_和added_label_的形状  
  added_data_.Reshape(batch_size_, channels_, height_, width_);  
  added_label_.Reshape(label_shape);  
  data_ = NULL;  
  labels_ = NULL;  
  added_data_.cpu_data();  
  added_label_.cpu_data();  
}  

// 将Datum的vector放入到added_data_和added_label_  
// 并进行预处理  
template <typename Dtype>  
void MemoryDataLayer<Dtype>::AddDatumVector(const vector<Datum>& datum_vector) {  
  CHECK(!has_new_data_) <<  
      "Can't add data until current data has been consumed.";  
  size_t num = datum_vector.size();  
  CHECK_GT(num, 0) << "There is no datum to add.";  
  CHECK_EQ(num % batch_size_, 0) <<  
      "The added data must be a multiple of the batch size.";  
  // 改变形状  
  added_data_.Reshape(num, channels_, height_, width_);  
  added_label_.Reshape(num, 1, 1, 1);  
  // Apply data transformations (mirror, scale, crop...)  
  // 对数据进行预处理  
  this->data_transformer_->Transform(datum_vector, &added_data_);  
  // Copy Labels  
  // 复制类标到top_label  
  Dtype* top_label = added_label_.mutable_cpu_data();  
  for (int item_id = 0; item_id < num; ++item_id) {  
    top_label[item_id] = datum_vector[item_id].label();  
  }  
  // num_images == batch_size_  
  Dtype* top_data = added_data_.mutable_cpu_data();  
  // 将数据、类标以及数据个数设置到该类的内部变量  
  Reset(top_data, top_label, num);  
  // 设置标记为true  
  has_new_data_ = true;  
}  

// 如果定义OPENCV,则对数据进行处理存放到added_data_和added_label_  
#ifdef USE_OPENCV  
template <typename Dtype>  
void MemoryDataLayer<Dtype>::AddMatVector(const vector<cv::Mat>& mat_vector,  
    const vector<int>& labels) {  
  size_t num = mat_vector.size();  
  CHECK(!has_new_data_) <<  
      "Can't add mat until current data has been consumed.";  
  CHECK_GT(num, 0) << "There is no mat to add";  
  CHECK_EQ(num % batch_size_, 0) <<  
      "The added data must be a multiple of the batch size.";  
  added_data_.Reshape(num, channels_, height_, width_);  
  added_label_.Reshape(num, 1, 1, 1);  
  // Apply data transformations (mirror, scale, crop...)  
  // 预处理  
  this->data_transformer_->Transform(mat_vector, &added_data_);  
  // Copy Labels  
  Dtype* top_label = added_label_.mutable_cpu_data();  
  for (int item_id = 0; item_id < num; ++item_id) {  
    top_label[item_id] = labels[item_id];  
  }  
  // num_images == batch_size_  
  Dtype* top_data = added_data_.mutable_cpu_data();  
  Reset(top_data, top_label, num);  
  has_new_data_ = true;  
}  
#endif  // USE_OPENCV  

// 将数据和类标设置到内部的变量  
// data_、labels_、n_  
// 并且设置位置pos_=0  
template <typename Dtype>  
void MemoryDataLayer<Dtype>::Reset(Dtype* data, Dtype* labels, int n) {  
  CHECK(data);  
  CHECK(labels);  
  CHECK_EQ(n % batch_size_, 0) << "n must be a multiple of batch size";  
  // Warn with transformation parameters since a memory array is meant to  
  // be generic and no transformations are done with Reset().  
  if (this->layer_param_.has_transform_param()) {  
    LOG(WARNING) << this->type() << " does not transform array data on Reset()";  
  }  
  data_ = data;  
  labels_ = labels;  
  n_ = n;// batch_size  
  pos_ = 0;  
}  

// 设置内内部变量added_data_和added_label_的批数  
template <typename Dtype>  
void MemoryDataLayer<Dtype>::set_batch_size(int new_size) {  
  CHECK(!has_new_data_) <<  
      "Can't change batch_size until current data has been consumed.";  
  batch_size_ = new_size;  
  added_data_.Reshape(batch_size_, channels_, height_, width_);  
  added_label_.Reshape(batch_size_, 1, 1, 1);  
}  

// 将内部变量added_data_和added_label_复制到top传递给下一层  
template <typename Dtype>  
void MemoryDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top) {  
  CHECK(data_) << "MemoryDataLayer needs to be initalized by calling Reset";  
  // 这里直接使用内部变量将数据复制到top[0]、将类标复制到top[1]  
  top[0]->Reshape(batch_size_, channels_, height_, width_);  
  top[1]->Reshape(batch_size_, 1, 1, 1);  
  top[0]->set_cpu_data(data_ + pos_ * size_);  
  top[1]->set_cpu_data(labels_ + pos_);  
  pos_ = (pos_ + batch_size_) % n_;  
  if (pos_ == 0)  
    has_new_data_ = false;// 传过一次之后,就没有新数据啦  
}  

INSTANTIATE_CLASS(MemoryDataLayer);  
REGISTER_LAYER_CLASS(MemoryData);  

}  // namespace caffe  

WindowDataLayer

window_data_layer.hpp

/** 
 * @brief Provides data to the Net from windows of images files, specified 
 *        by a window data file. 
 *  从图像文件的窗口获取数据,需要指定窗口数据文件 
 * TODO(dox): thorough documentation for Forward and proto params. 
 */  
template <typename Dtype>  
class WindowDataLayer : public BasePrefetchingDataLayer<Dtype> {  
 public:  
  explicit WindowDataLayer(const LayerParameter& param)  
      : BasePrefetchingDataLayer<Dtype>(param) {}  
  virtual ~WindowDataLayer();  
  virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top);  

  virtual inline const char* type() const { return "WindowData"; }  
  virtual inline int ExactNumBottomBlobs() const { return 0; }  
  virtual inline int ExactNumTopBlobs() const { return 2; }  

 protected:  
  virtual unsigned int PrefetchRand();  
  virtual void load_batch(Batch<Dtype>* batch);  

  shared_ptr<Caffe::RNG> prefetch_rng_;  
  vector<std::pair<std::string, vector<int> > > image_database_;  
  // 窗口类中所使用的窗口数据的枚举  
  // 就是定义个vector<float>,然后里面按顺序存放下面这些类型的数据  
  enum WindowField { IMAGE_INDEX, LABEL, OVERLAP, X1, Y1, X2, Y2, NUM };  
  vector<vector<float> > fg_windows_;  
  vector<vector<float> > bg_windows_;  
  Blob<Dtype> data_mean_;  
  vector<Dtype> mean_values_;  
  bool has_mean_file_;  
  bool has_mean_values_;  
  bool cache_images_;  
  vector<std::pair<std::string, Datum > > image_database_cache_;  
};

window_data_layer.cpp

#ifdef USE_OPENCV  
#include <opencv2/highgui/highgui_c.h>  
#include <stdint.h>  

#include <algorithm>  
#include <map>  
#include <string>  
#include <utility>  
#include <vector>  

#include "opencv2/core/core.hpp"  
#include "opencv2/highgui/highgui.hpp"  
#include "opencv2/imgproc/imgproc.hpp"  

#include "caffe/common.hpp"  
#include "caffe/data_layers.hpp"  
#include "caffe/layer.hpp"  
#include "caffe/util/benchmark.hpp"  
#include "caffe/util/io.hpp"  
#include "caffe/util/math_functions.hpp"  
#include "caffe/util/rng.hpp"  

// caffe.proto > LayerParameter > WindowDataParameter  
//   'source' field specifies the window_file  
//   'crop_size' indicates the desired warped size  

namespace caffe {  

template <typename Dtype>  
WindowDataLayer<Dtype>::~WindowDataLayer<Dtype>() {  
  this->StopInternalThread();  
}  

// 读取窗口数据文件的信息,并设置各个数据结构的形状  
template <typename Dtype>  
void WindowDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,  
      const vector<Blob<Dtype>*>& top) {  
  // LayerSetUp runs through the window_file and creates two structures  
  // that hold windows: one for foreground (object) windows and one  
  // for background (non-object) windows. We use an overlap threshold  
  // to decide which is which.  

  // window_file format  
  // repeated:  
  //    # image_index  
  //    img_path (abs path)  
  //    channels  
  //    height  
  //    width  
  //    num_windows  
  //    class_index overlap x1 y1 x2 y2  

  // 窗口文件的格式如下:  
  // # 图像索引(举例:# 1就表示第一个图像,注意#号与数字之间有空格)  
  // 图像的路径  
  // 图像通道数  
  // 图像高度  
  // 图像宽度  
  // 窗口数目  
  // 类标,overlap,x1,y1,x2,y2  
  // 注:x1,y1,x2,y2是窗口的左上和右下的坐标  
  // 我这里举个例子  
  // # 1 /1.jpg 3 720 480 100 1 1 0 0 100 100  
  // 上述的例子即使表示一个编号为1的图像相对路径为/1.jpg,通道为3,高度为720  
  // 宽度为480,窗口数目为100,类标为1,overlap为1,窗口的左上坐标为(0,0),右下坐标为(100,100)  


  LOG(INFO) << "Window data layer:" << std::endl  
      << "  foreground (object) overlap threshold: "  
      << this->layer_param_.window_data_param().fg_threshold() << std::endl  
      << "  background (non-object) overlap threshold: "  
      << this->layer_param_.window_data_param().bg_threshold() << std::endl  
      << "  foreground sampling fraction: "  
      << this->layer_param_.window_data_param().fg_fraction() << std::endl  
      << "  cache_images: "  
      << this->layer_param_.window_data_param().cache_images() << std::endl  
      << "  root_folder: "  
      << this->layer_param_.window_data_param().root_folder();  

  cache_images_ = this->layer_param_.window_data_param().cache_images();  
  string root_folder = this->layer_param_.window_data_param().root_folder();  

  // 根据参数文件中是否需要进行左右mirror,或者是否进行crop,  
  // 来判断是否需要初始化随机数种子  
  const bool prefetch_needs_rand =  
      this->transform_param_.mirror() ||  
      this->transform_param_.crop_size();  
  if (prefetch_needs_rand) {  
    const unsigned int prefetch_rng_seed = caffe_rng_rand();  
    prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));  
  } else {  
    prefetch_rng_.reset();  
  }  

  // 打开窗口文件  
  std::ifstream infile(this->layer_param_.window_data_param().source().c_str());  
  CHECK(infile.good()) << "Failed to open window file "  
      << this->layer_param_.window_data_param().source() << std::endl;  

  // 这个是类标与类标出现的次数之间的映射  
  // 这里称之为类标直方图  
  map<int, int> label_hist;  
  label_hist.insert(std::make_pair(0, 0));  

  string hashtag;  
  int image_index, channels;  
  // 先从窗口文件中读取一个图像索引测试一下是否为空  
  if (!(infile >> hashtag >> image_index)) {  
    LOG(FATAL) << "Window file is empty";  
  }  
  do {  
    // 检查是否# 开头  
    CHECK_EQ(hashtag, "#");  
    // read image path  
    string image_path;  
    // 接下来读取图像的相对路径  
    // 将该路径与根目录路径拼接  
    infile >> image_path;  
    image_path = root_folder + image_path;  
    // read image dimensions  
    vector<int> image_size(3);  
    // 读取图像的维度信息,分别为channel,height , width  
    infile >> image_size[0] >> image_size[1] >> image_size[2];  
    channels = image_size[0];  
    // 将图像路径和图像大小压入到image_database_中  
    image_database_.push_back(std::make_pair(image_path, image_size));  

    // 如果需要缓存图像到内存的话,则用image_database_cache_进行存储  
    if (cache_images_) {  
      Datum datum;  
      // 将图像数据读取到Datum这个结构  
      if (!ReadFileToDatum(image_path, &datum)) {  
        LOG(ERROR) << "Could not open or find file " << image_path;  
        return;  
      }  
      // 将Datum结构的图像缓存到到image_database_cache_  
      image_database_cache_.push_back(std::make_pair(image_path, datum));  
    }  
    // read each box  
    int num_windows;  
    // 读取窗口个数  
    infile >> num_windows;  
    // 从参数文件获取前景和背景阈值  
    const float fg_threshold =  
        this->layer_param_.window_data_param().fg_threshold();  
    const float bg_threshold =  
        this->layer_param_.window_data_param().bg_threshold();  
    for (int i = 0; i < num_windows; ++i) {  
      int label, x1, y1, x2, y2;  
      float overlap;  
      // 读取  类标,与前景目标的重叠率,x1,y1,x2,y2  
      infile >> label >> overlap >> x1 >> y1 >> x2 >> y2;  

      // 按照顺序放在window这个数据结构里头  
      vector<float> window(WindowDataLayer::NUM);  
      window[WindowDataLayer::IMAGE_INDEX] = image_index;  
      window[WindowDataLayer::LABEL] = label;  
      window[WindowDataLayer::OVERLAP] = overlap;  
      window[WindowDataLayer::X1] = x1;  
      window[WindowDataLayer::Y1] = y1;  
      window[WindowDataLayer::X2] = x2;  
      window[WindowDataLayer::Y2] = y2;  

      // add window to foreground list or background list  
      // 下面是将窗口的前景和背景都装入到fg_windows_和bg_windows_中去  
      // 如果重叠的比例大于前景阈值,那么就认为是前景  
      if (overlap >= fg_threshold) {  
        int label = window[WindowDataLayer::LABEL];  
        // 类标必须大于0,因为重叠区域已经大于前景阈值了  
        // 此时如果类标不大于0,表明数据有误!  
        CHECK_GT(label, 0);  
        fg_windows_.push_back(window);  
        // 该类的直方图+1  
        label_hist.insert(std::make_pair(label, 0));  
        label_hist[label]++;  
      } else if (overlap < bg_threshold) {  
      // 如果重叠阈值小于背景阈值则认为是背景  
        // background window, force label and overlap to 0  
        window[WindowDataLayer::LABEL] = 0;  
        window[WindowDataLayer::OVERLAP] = 0;  
        bg_windows_.push_back(window);  
        // 0类的直方图(也就是背景的直方图)+1  
        label_hist[0]++;  
      }  
    }  

    // 每处理100个就显示一瞎  
    if (image_index % 100 == 0) {  
      LOG(INFO) << "num: " << image_index << " "  
          << image_path << " "  
          << image_size[0] << " "  
          << image_size[1] << " "  
          << image_size[2] << " "  
          << "windows to process: " << num_windows;  
    }  
  } while (infile >> hashtag >> image_index);  

  // 读取完毕后输出图像的个数  
  LOG(INFO) << "Number of images: " << image_index+1;  

  // 输出统计的每个类别的个数  
  for (map<int, int>::iterator it = label_hist.begin();  
      it != label_hist.end(); ++it) {  
    LOG(INFO) << "class " << it->first << " has " << label_hist[it->first]  
              << " samples";  
  }  

  LOG(INFO) << "Amount of context padding: "  
      << this->layer_param_.window_data_param().context_pad();  

  LOG(INFO) << "Crop mode: "  
      << this->layer_param_.window_data_param().crop_mode();  

  // image  
  // 获取crop_size  
  const int crop_size = this->transform_param_.crop_size();  
  CHECK_GT(crop_size, 0);  
  // 获取batch_size  
  const int batch_size = this->layer_param_.window_data_param().batch_size();  
  // 将top[0]设置为batch_size,channels, crop_size, crop_size大小的  
  top[0]->Reshape(batch_size, channels, crop_size, crop_size);  
  // 将prefetch_中的数据形状也这么设置  
  for (int i = 0; i < this->PREFETCH_COUNT; ++i)  
    this->prefetch_[i].data_.Reshape(  
        batch_size, channels, crop_size, crop_size);  

  LOG(INFO) << "output data size: " << top[0]->num() << ","  
      << top[0]->channels() << "," << top[0]->height() << ","  
      << top[0]->width();  
  // label  
  // 将top[1]设置为类标大小  
  vector<int> label_shape(1, batch_size);  
  top[1]->Reshape(label_shape);  
  // 将prefetch_中的类标形状也这么设置  
  for (int i = 0; i < this->PREFETCH_COUNT; ++i) {  
    this->prefetch_[i].label_.Reshape(label_shape);  
  }  

  // data mean  
  // 是否有均值文件或者有均值  
  has_mean_file_ = this->transform_param_.has_mean_file();  
  has_mean_values_ = this->transform_param_.mean_value_size() > 0;  
  if (has_mean_file_) {// 有均值文件就读  
    const string& mean_file =  
          this->transform_param_.mean_file();  
    LOG(INFO) << "Loading mean file from: " << mean_file;  
    BlobProto blob_proto;  
    ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);  
    data_mean_.FromProto(blob_proto);  
  }  
  if (has_mean_values_) {// 有均值就直接从参数中获取  
    CHECK(has_mean_file_ == false) <<  
      "Cannot specify mean_file and mean_value at the same time";  
    for (int c = 0; c < this->transform_param_.mean_value_size(); ++c) {  
      mean_values_.push_back(this->transform_param_.mean_value(c));  
    }  

    // 检查均值是不是等于1,或者等于图像的通道数  
    // 也就是要么所有通道都使用同一个均值  
    // 要么每个通道用一个均值  
    CHECK(mean_values_.size() == 1 || mean_values_.size() == channels) <<  
     "Specify either 1 mean_value or as many as channels: " << channels;  
    if (channels > 1 && mean_values_.size() == 1) {  
      // Replicate the mean_value for simplicity  
      for (int c = 1; c < channels; ++c) {  
        mean_values_.push_back(mean_values_[0]);  
      }  
    }  
  }  
}  

// 随机数生成器进行初始化并生成随机数  
template <typename Dtype>  
unsigned int WindowDataLayer<Dtype>::PrefetchRand() {  
  CHECK(prefetch_rng_);  
  caffe::rng_t* prefetch_rng =  
      static_cast<caffe::rng_t*>(prefetch_rng_->generator());  
  return (*prefetch_rng)();  
}  

// 因为继承BasePrefetchingDataLayer所以要实现load_batch  
// 以供线程调用  
// This function is called on prefetch thread  
template <typename Dtype>  
void WindowDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {  
  // At each iteration, sample N windows where N*p are foreground (object)  
  // windows and N*(1-p) are background (non-object) windows  
  CPUTimer batch_timer;  
  batch_timer.Start();  
  double read_time = 0;  
  double trans_time = 0;  
  CPUTimer timer;  
  // top数据和类标  
  Dtype* top_data = batch->data_.mutable_cpu_data();  
  Dtype* top_label = batch->label_.mutable_cpu_data();  
  // 缩放尺度  
  const Dtype scale = this->layer_param_.window_data_param().scale();  
  // batch_size  
  const int batch_size = this->layer_param_.window_data_param().batch_size();  
  // 上下文填充  
  const int context_pad = this->layer_param_.window_data_param().context_pad();  
  // crop_size  
  const int crop_size = this->transform_param_.crop_size();  
  // 是否镜像  
  const bool mirror = this->transform_param_.mirror();  
  // 前景比例  
  const float fg_fraction =  
      this->layer_param_.window_data_param().fg_fraction();  
  Dtype* mean = NULL;  
  int mean_off = 0;  
  int mean_width = 0;  
  int mean_height = 0;  
  // 如果有平均值文件则  
  if (this->has_mean_file_) {  
    mean = this->data_mean_.mutable_cpu_data();  
    // 经过crop之后的平均值图像的中心  
    mean_off = (this->data_mean_.width() - crop_size) / 2;  
    mean_width = this->data_mean_.width();  
    mean_height = this->data_mean_.height();  
  }  
  cv::Size cv_crop_size(crop_size, crop_size);  
  // 获取crop的模式,是warp还是square  
  const string& crop_mode = this->layer_param_.window_data_param().crop_mode();  

  bool use_square = (crop_mode == "square") ? true : false;  

  // zero out batch  
  caffe_set(batch->data_.count(), Dtype(0), top_data);  

  // 根据前景比例获得前景图像的数目  
  const int num_fg = static_cast<int>(static_cast<float>(batch_size)  
      * fg_fraction);  
  // 样本数量,是前景还是背景?[0]是背景[1]是前景  
  const int num_samples[2] = { batch_size - num_fg, num_fg };  

  int item_id = 0;  
  // sample from bg set then fg set  
  // 先对背景进行采样  
  // 再对前景进行采样  
  for (int is_fg = 0; is_fg < 2; ++is_fg) {  
    for (int dummy = 0; dummy < num_samples[is_fg]; ++dummy) {  
      // sample a window  
      timer.Start();  
      // 生成一个随机数  
      const unsigned int rand_index = PrefetchRand();  
      // fg_windows_和bg_windows_存储的是对应的窗口信息  
      // 在SetUp中读取的窗口数据文件的时候获得的  
      // 从该图像的若干窗口中去随机选择一个窗口  
      vector<float> window = (is_fg) ?  
          fg_windows_[rand_index % fg_windows_.size()] :  
          bg_windows_[rand_index % bg_windows_.size()];  

      // 随机选择是否需要镜像  
      bool do_mirror = mirror && PrefetchRand() % 2;  

      // load the image containing the window  
      // 载入图像的路径以及类标  
      pair<std::string, vector<int> > image =  
          image_database_[window[WindowDataLayer<Dtype>::IMAGE_INDEX]];  

      // 读取图像  
      cv::Mat cv_img;  
      if (this->cache_images_) {  
        // 如果图像缓冲到内存则获得对应图像的Datum  
        pair<std::string, Datum> image_cached =  
          image_database_cache_[window[WindowDataLayer<Dtype>::IMAGE_INDEX]];  
        // 将图像的Datum解码为OpenCV的Mat  
        cv_img = DecodeDatumToCVMat(image_cached.second, true);  
      } else {  
        // 否则直接读取  
        cv_img = cv::imread(image.first, CV_LOAD_IMAGE_COLOR);  
        if (!cv_img.data) {  
          LOG(ERROR) << "Could not open or find file " << image.first;  
          return;  
        }  
      }  
      read_time += timer.MicroSeconds();  
      timer.Start();  
      const int channels = cv_img.channels();  

      // crop window out of image and warp it  
      // 窗口坐标  
      int x1 = window[WindowDataLayer<Dtype>::X1];  
      int y1 = window[WindowDataLayer<Dtype>::Y1];  
      int x2 = window[WindowDataLayer<Dtype>::X2];  
      int y2 = window[WindowDataLayer<Dtype>::Y2];  

      int pad_w = 0;  
      int pad_h = 0;  
      // context_pad也是个大小,具体什么含义,我没有具体研究  
      // 毕竟不是搞检测的  
      // context_scale = crop_size / (crop_size - 2*context_pad)  
      if (context_pad > 0 || use_square) {  
        // scale factor by which to expand the original region  
        // such that after warping the expanded region to crop_size x crop_size  
        // there's exactly context_pad amount of padding on each side  
        Dtype context_scale = static_cast<Dtype>(crop_size) /  
            static_cast<Dtype>(crop_size - 2*context_pad);  

        // compute the expanded region  
        // 高度的一半  
        Dtype half_height = static_cast<Dtype>(y2-y1+1)/2.0;  
        // 宽度的一半  
        Dtype half_width = static_cast<Dtype>(x2-x1+1)/2.0;  
        // x中心  
        Dtype center_x = static_cast<Dtype>(x1) + half_width;  
        // y中心  
        Dtype center_y = static_cast<Dtype>(y1) + half_height;  
        if (use_square) {// 如果使用正方形形状则将较大的那个赋值给小的  
          if (half_height > half_width) {  
            half_width = half_height;  
          } else {  
            half_height = half_width;  
          }  
        }  

        // 获取经过处理之后的x1,y1,x2,y2  
        x1 = static_cast<int>(round(center_x - half_width*context_scale));  
        x2 = static_cast<int>(round(center_x + half_width*context_scale));  
        y1 = static_cast<int>(round(center_y - half_height*context_scale));  
        y2 = static_cast<int>(round(center_y + half_height*context_scale));  

        // the expanded region may go outside of the image  
        // so we compute the clipped (expanded) region and keep track of  
        // the extent beyond the image  
        // 经过处理之后的窗口如果不在图像内部是有问题的  
        // 这里对窗口的坐标进行处理  
        // 使得窗口的左上角不超过图像的左上角  
        // 窗口的右下角不超过图像的右下角  
        // 所以这里叫clip bounds嘛  
        int unclipped_height = y2-y1+1;  
        int unclipped_width = x2-x1+1;  
        int pad_x1 = std::max(0, -x1);  
        int pad_y1 = std::max(0, -y1);  
        int pad_x2 = std::max(0, x2 - cv_img.cols + 1);  
        int pad_y2 = std::max(0, y2 - cv_img.rows + 1);  
        // clip bounds  
        x1 = x1 + pad_x1;  
        x2 = x2 - pad_x2;  
        y1 = y1 + pad_y1;  
        y2 = y2 - pad_y2;  
        CHECK_GT(x1, -1);  
        CHECK_GT(y1, -1);  
        CHECK_LT(x2, cv_img.cols);  
        CHECK_LT(y2, cv_img.rows);  

        // 经过clip之后的高度和宽度  
        int clipped_height = y2-y1+1;  
        int clipped_width = x2-x1+1;  

        // scale factors that would be used to warp the unclipped  
        // expanded region  
        // scale_x/scale_y=crop_size除以未经clip之后的宽度/高度  
        Dtype scale_x =  
            static_cast<Dtype>(crop_size)/static_cast<Dtype>(unclipped_width);  
        Dtype scale_y =  
            static_cast<Dtype>(crop_size)/static_cast<Dtype>(unclipped_height);  

        // size to warp the clipped expanded region to  
        // 用clip的宽度和高度乘以scale_x或者scale_y得到crop_size中的宽度和高度  
        cv_crop_size.width =  
            static_cast<int>(round(static_cast<Dtype>(clipped_width)*scale_x));  
        cv_crop_size.height =  
            static_cast<int>(round(static_cast<Dtype>(clipped_height)*scale_y));  
        // 再对pad的边界进行处理  
        pad_x1 = static_cast<int>(round(static_cast<Dtype>(pad_x1)*scale_x));  
        pad_x2 = static_cast<int>(round(static_cast<Dtype>(pad_x2)*scale_x));  
        pad_y1 = static_cast<int>(round(static_cast<Dtype>(pad_y1)*scale_y));  
        pad_y2 = static_cast<int>(round(static_cast<Dtype>(pad_y2)*scale_y));  

        pad_h = pad_y1;  
        // if we're mirroring, we mirror the padding too (to be pedantic)  
        // 如果需要镜像填充的部分也要镜像  
        if (do_mirror) {  
          pad_w = pad_x2;  
        } else {  
          pad_w = pad_x1;  
        }  

        // ensure that the warped, clipped region plus the padding fits in the  
        // crop_size x crop_size image (it might not due to rounding)  
        // 确保大小是在crop_size x crop_size以内的  
        if (pad_h + cv_crop_size.height > crop_size) {  
          cv_crop_size.height = crop_size - pad_h;  
        }  
        if (pad_w + cv_crop_size.width > crop_size) {  
          cv_crop_size.width = crop_size - pad_w;  
        }  
      }  

      cv::Rect roi(x1, y1, x2-x1+1, y2-y1+1);  
      // 进行crop  
      cv::Mat cv_cropped_img = cv_img(roi);  
      // 使用线性插值进行缩放,缩放到cv_crop_size  
      cv::resize(cv_cropped_img, cv_cropped_img,  
          cv_crop_size, 0, 0, cv::INTER_LINEAR);  

      // horizontal flip at random  
      if (do_mirror) {  
        // 对图像进行镜像  
        cv::flip(cv_cropped_img, cv_cropped_img, 1);  
      }  

      // copy the warped window into top_data  
      for (int h = 0; h < cv_cropped_img.rows; ++h) {  
        const uchar* ptr = cv_cropped_img.ptr<uchar>(h);  
        int img_index = 0;  
        for (int w = 0; w < cv_cropped_img.cols; ++w) {  
          for (int c = 0; c < channels; ++c) {  
            int top_index = ((item_id * channels + c) * crop_size + h + pad_h)  
                     * crop_size + w + pad_w;  
            // int top_index = (c * height + h) * width + w;  
            Dtype pixel = static_cast<Dtype>(ptr[img_index++]);  
            if (this->has_mean_file_) {// 有均值文件减去均值文件中对应的数值  
              int mean_index = (c * mean_height + h + mean_off + pad_h)  
                           * mean_width + w + mean_off + pad_w;  
              top_data[top_index] = (pixel - mean[mean_index]) * scale;  
            } else {  
              if (this->has_mean_values_) {// 有均值则减去均值  
                top_data[top_index] = (pixel - this->mean_values_[c]) * scale;  
              } else {  
                top_data[top_index] = pixel * scale;// 像素值进行缩放  
              }  
            }  
          }  
        }  
      }  
      trans_time += timer.MicroSeconds();  
      // get window label  
      top_label[item_id] = window[WindowDataLayer<Dtype>::LABEL];  

      #if 0  
      // useful debugging code for dumping transformed windows to disk  
      string file_id;  
      std::stringstream ss;  
      ss << PrefetchRand();  
      ss >> file_id;  
      std::ofstream inf((string("dump/") + file_id +  
          string("_info.txt")).c_str(), std::ofstream::out);  
      inf << image.first << std::endl  
          << window[WindowDataLayer<Dtype>::X1]+1 << std::endl  
          << window[WindowDataLayer<Dtype>::Y1]+1 << std::endl  
          << window[WindowDataLayer<Dtype>::X2]+1 << std::endl  
          << window[WindowDataLayer<Dtype>::Y2]+1 << std::endl  
          << do_mirror << std::endl  
          << top_label[item_id] << std::endl  
          << is_fg << std::endl;  
      inf.close();  
      std::ofstream top_data_file((string("dump/") + file_id +  
          string("_data.txt")).c_str(),  
          std::ofstream::out | std::ofstream::binary);  
      for (int c = 0; c < channels; ++c) {  
        for (int h = 0; h < crop_size; ++h) {  
          for (int w = 0; w < crop_size; ++w) {  
            top_data_file.write(reinterpret_cast<char*>(  
                &top_data[((item_id * channels + c) * crop_size + h)  
                          * crop_size + w]),  
                sizeof(Dtype));  
          }  
        }  
      }  
      top_data_file.close();  
      #endif  

      item_id++;  
    }  
  }  
  batch_timer.Stop();  
  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";  
  DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";  
  DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";  
}  

INSTANTIATE_CLASS(WindowDataLayer);  
REGISTER_LAYER_CLASS(WindowData);  

}  // namespace caffe  
#endif  // USE_OPENCV  
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值