base_data_layer.hpp:
#ifndef CAFFE_DATA_LAYERS_HPP_
#define CAFFE_DATA_LAYERS_HPP_
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/data_transformer.hpp"
#include "caffe/internal_thread.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/blocking_queue.hpp"
namespace caffe {
/**
* @brief Provides base for data layers that feed blobs to the Net.
*
* TODO(dox): thorough documentation for Forward and proto params.
*/
template <typename Dtype>
class BaseDataLayer : public Layer<Dtype> {
public:
explicit BaseDataLayer(const LayerParameter& param);
// LayerSetUp: implements common data layer setup functionality, and calls
// DataLayerSetUp to do special data layer setup for individual layer types.
// This method may not be overridden except by the BasePrefetchingDataLayer.
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {}
// Data layers have no bottoms, so reshaping is trivial.
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {}
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
protected:
TransformationParameter transform_param_;
shared_ptr<DataTransformer<Dtype> > data_transformer_;
bool output_labels_;
};
template <typename Dtype>
class Batch {
public:
Blob<Dtype> data_, label_;
};
template <typename Dtype>
class BasePrefetchingDataLayer :
public BaseDataLayer<Dtype>, public InternalThread {
public:
explicit BasePrefetchingDataLayer(const LayerParameter& param);
// LayerSetUp: implements common data layer setup functionality, and calls
// DataLayerSetUp to do special data layer setup for individual layer types.
// This method may not be overridden.
void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
protected:
virtual void InternalThreadEntry();
virtual void load_batch(Batch<Dtype>* batch) = 0;
vector<shared_ptr<Batch<Dtype> > > prefetch_;
BlockingQueue<Batch<Dtype>*> prefetch_free_;
BlockingQueue<Batch<Dtype>*> prefetch_full_;
Batch<Dtype>* prefetch_current_;
Blob<Dtype> transformed_data_;
};
} // namespace caffe
#endif // CAFFE_DATA_LAYERS_HPP_
base_data_layer.cpp:
#include <boost/thread.hpp>
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/data_transformer.hpp"
#include "caffe/internal_thread.hpp"
#include "caffe/layer.hpp"
#include "caffe/layers/base_data_layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/blocking_queue.hpp"
namespace caffe {
/* 先调用基类构造函数
再为TransformationParameter transform_param_变量赋值
TransformationParameter为caffe.proto定义的数据类型 */
template <typename Dtype>
BaseDataLayer<Dtype>::BaseDataLayer(const LayerParameter& param)
: Layer<Dtype>(param),
transform_param_(param.transform_param()) {
}
template <typename Dtype>
void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
if (top.size() == 1) {
output_labels_ = false;
} else {
output_labels_ = true;
}
data_transformer_.reset(
new DataTransformer<Dtype>(transform_param_, this->phase_));
data_transformer_->InitRand();
// The subclasses should setup the size of bottom and top
/* DataLayerSetUp为虚函数
由于其函数定义后加了{},是空实现
因此真正执行的是子类DataLayer、ImageDataLayer
实现的DataLayerSetUp函数 */
DataLayerSetUp(bottom, top);
}
/*
* prefetch_(param.data_param().prefetch())初始化向量prefetch_,
* param.data_param().prefetch()为向量元素的个数,
* 类似于vector<int> a(5)
* 意思是预取出param.data_param().prefetch()个batch
*/
template <typename Dtype>
BasePrefetchingDataLayer<Dtype>::BasePrefetchingDataLayer(
const LayerParameter& param)
: BaseDataLayer<Dtype>(param),
prefetch_(param.data_param().prefetch()),
prefetch_free_(), prefetch_full_(), prefetch_current_() {
/* prefetch_定义:vector<shared_ptr<Batch<Dtype> > > prefetch_;
为向量prefetch_每个元素shared_ptr指针赋值,开辟内存空间
在空闲队列中压入一个Batch<Dtype>*指针 */
for (int i = 0; i < prefetch_.size(); ++i) {
prefetch_[i].reset(new Batch<Dtype>());
prefetch_free_.push(prefetch_[i].get());
}
}
template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
BaseDataLayer<Dtype>::LayerSetUp(bottom, top);
// Before starting the prefetch thread, we make cpu_data and gpu_data
// calls so that the prefetch thread does not accidentally make simultaneous
// cudaMalloc calls when the main thread is running. In some GPUs this
// seems to cause failures if we do not so.
/* 执行data_.mutable_cpu_data,调用Blob<Dtype>::mutable_cpu_data函数
该函数执行static_cast<Dtype*>(data_->mutable_cpu_data())
调用SyncedMemory::mutable_cpu_data函数,该函数再调用
SyncedMemory::to_cpu()函数,该函数调用CaffeMallocHost为
cpu_ptr_指针分配内存并初始化数据
综上可知以下代码是为Batch的data_和label(Blob类型)
变量的data_(shared_ptr<SyncedMemory>类型)
变量的cpu_ptr_(void*类型)分配内存 */
for (int i = 0; i < prefetch_.size(); ++i) {
prefetch_[i]->data_.mutable_cpu_data();
if (this->output_labels_) {
prefetch_[i]->label_.mutable_cpu_data();
}
}
#ifndef CPU_ONLY
/* 为data_和label的成员变量data_的成员变量gpu_ptr_
分配显存并初始化数据 */
if (Caffe::mode() == Caffe::GPU) {
for (int i = 0; i < prefetch_.size(); ++i) {
prefetch_[i]->data_.mutable_gpu_data();
if (this->output_labels_) {
prefetch_[i]->label_.mutable_gpu_data();
}
}
}
#endif
DLOG(INFO) << "Initializing prefetch";
this->data_transformer_->InitRand();
/* 先调用InternalThread::StartInternalThread()函数
该函数会调用:
thread_.reset(new boost::thread(&InternalThread::entry, this, device, mode,
rand_seed, solver_count, solver_rank, multiprocess));
启动线程InternalThread::entry函数,该函数再调用
BasePrefetchingDataLayer<Dtype>::InternalThreadEntry()函数
该函数调用load_batch函数从lmdb/leveldb文件中加载数据 */
StartInternalThread();
DLOG(INFO) << "Prefetch initialized.";
}
template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
#ifndef CPU_ONLY
cudaStream_t stream;
if (Caffe::mode() == Caffe::GPU) {
/* 创建一个异步流stream */
CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
}
#endif
try {
while (!must_stop()) {
/* prefetch_free_.pop从空队列中取出内存块
load_batch加载数据至内存块 */
Batch<Dtype>* batch = prefetch_free_.pop();
load_batch(batch);
#ifndef CPU_ONLY
/* 内存数据同步给显存 */
if (Caffe::mode() == Caffe::GPU) {
batch->data_.data().get()->async_gpu_push(stream);
if (this->output_labels_) {
batch->label_.data().get()->async_gpu_push(stream);
}
CUDA_CHECK(cudaStreamSynchronize(stream));
}
#endif
/* 在full队列中压入一个batch块 */
prefetch_full_.push(batch);
}
} catch (boost::thread_interrupted&) {
// Interrupted exception is expected on shutdown
}
#ifndef CPU_ONLY
/* 销毁流stream */
if (Caffe::mode() == Caffe::GPU) {
CUDA_CHECK(cudaStreamDestroy(stream));
}
#endif
}
template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
/* 如果prefetch_current_保存了上一次处理的batch块
则将其压入到free队列中,用于保存后续读取的数据 */
if (prefetch_current_) {
prefetch_free_.push(prefetch_current_);
}
/* 从full队列中取出一个batch块给prefetch_current_ */
prefetch_current_ = prefetch_full_.pop("Waiting for data");
// Reshape to loaded data.
/* 将输出top的形状更新为prefetch_current_->data_的形状
然后将top的数据指针更新为prefetch_current_->data_的数据
同理将label相关信息赋值给top[1] */
top[0]->ReshapeLike(prefetch_current_->data_);
top[0]->set_cpu_data(prefetch_current_->data_.mutable_cpu_data());
if (this->output_labels_) {
// Reshape to loaded labels.
top[1]->ReshapeLike(prefetch_current_->label_);
top[1]->set_cpu_data(prefetch_current_->label_.mutable_cpu_data());
}
}
#ifdef CPU_ONLY
STUB_GPU_FORWARD(BasePrefetchingDataLayer, Forward);
#endif
INSTANTIATE_CLASS(BaseDataLayer);
INSTANTIATE_CLASS(BasePrefetchingDataLayer);
} // namespace caffe
load_batch函数代码分析:
// This function is called on prefetch thread
template<typename Dtype>
void DataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
CPUTimer batch_timer;
batch_timer.Start();
double read_time = 0;
double trans_time = 0;
CPUTimer timer;
CHECK(batch->data_.count());
CHECK(this->transformed_data_.count());
const int batch_size = this->layer_param_.data_param().batch_size();
Datum datum;
for (int item_id = 0; item_id < batch_size; ++item_id) {
/* timer.Start()和timer.MicroSeconds()用于计时
datum.ParseFromString是调用了protobuf库
用于解析从本地lmdb/leveldb读取的数据 */
timer.Start();
while (Skip()) {
Next();
}
datum.ParseFromString(cursor_->value());
read_time += timer.MicroSeconds();
if (item_id == 0) {
// Reshape according to the first datum of each batch
// on single input batches allows for inputs of varying dimension.
// Use data_transformer to infer the expected blob shape from datum.
/* 调用DataTransformer<Dtype>::InferBlobShape提取datum维数信息
调用Blob<Dtype>::Reshape为transformed_data_分配空间 */
vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
this->transformed_data_.Reshape(top_shape);
// Reshape batch according to the batch_size.
/* 根据批处理数量更新blob第一维信息
batch->data_类型为Blob<Dtype>
Blob<Dtype>::Reshape为其分配空间,其中的分配语句为:
data_.reset(new SyncedMemory(capacity_ * sizeof(Dtype)));
diff_.reset(new SyncedMemory(capacity_ * sizeof(Dtype))); */
top_shape[0] = batch_size;
batch->data_.Reshape(top_shape);
}
// Apply data transformations (mirror, scale, crop...)
timer.Start();
/* 计算第item_id个3维图像的偏移量 */
int offset = batch->data_.offset(item_id);
/* 获取一个batch内存块的首地址 */
Dtype* top_data = batch->data_.mutable_cpu_data();
/* 调用Blob<Dtype>::set_cpu_data函数
该函数会使用Blob的shared_ptr<SyncedMemory> data_变量
调用SyncedMemory::set_cpu_data(void* data)函数
该函数会更新SyncedMemory的void* cpu_ptr_变量
使cpu_ptr_指针指向top_data + offset */
this->transformed_data_.set_cpu_data(top_data + offset);
/* 调用DataTransformer<Dtype>::Transform函数对datum数据
进行裁剪、缩放后将数据存储在Blob变量this->transformed_data_中 */
this->data_transformer_->Transform(datum, &(this->transformed_data_));
// Copy label.
if (this->output_labels_) {
/* 先调用Blob<Dtype>::mutable_cpu_data()函数,该函数会调用
成员变量shared_ptr<SyncedMemory> data_的
SyncedMemory::mutable_cpu_data()函数,该函数先调用
SyncedMemory::to_cpu()同步CPU/GPU数据后返回cpu_ptr_指针
再将datum.label数据复制到cpu_ptr_中 */
Dtype* top_label = batch->label_.mutable_cpu_data();
top_label[item_id] = datum.label();
}
trans_time += timer.MicroSeconds();
Next();
}
timer.Stop();
batch_timer.Stop();
DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
DLOG(INFO) << " Read time: " << read_time / 1000 << " ms.";
DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}