数据读取层,除了读取LMDB、LEVELDB之外,也可以从原始图像直接读取(ImageDataLayer).
一.数据结构描述
message DataParameter {
enum DB {
LEVELDB = 0;//使用LEVELDB
LMDB = 1;//使用LMDB
}
optional string source = 1;
optional uint32 batch_size = 4;
optional uint32 rand_skip = 7 [default = 0];
optional DB backend = 8 [default = LEVELDB];
optional float scale = 2 [default = 1];
optional string mean_file = 3;
optional uint32 crop_size = 5 [default = 0];
optional bool mirror = 6 [default = false];
//强制编码图像为3通道彩色图像
optional bool force_encoded_color = 9 [default = false];
//预取队列,预先放到主机内存中的队列数:默认为4个Batch
optional uint32 prefetch = 10 [default = 4];
}
二.数据读取层实现
数据读取层实现位于src/caffe/layers/base_data_layer.cpp中,内容如下:
(1)BaseDataLayer
template <typename Dtype>
//构造函数,初始化Layer参数、数据变换器transform参数
BaseDataLayer<Dtype>::BaseDataLayer(const LayerParameter& param)
: Layer<Dtype>(param),
transform_param_(param.transform_param()) {
}
//BaseDataLayer层设置
template <typename Dtype>
void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
if (top.size() == 1) {//判断输出Blob个数,若为1只输出data,若为2则输出data和label.
output_labels_ = false;
} else {
output_labels_ = true;
}
//初始化数据变换器对象
data_transformer_.reset(
new DataTransformer<Dtype>(transform_param_, this->phase_));
data_transformer_->InitRand();//生成随机数种子
// The subclasses should setup the size of bottom and top
DataLayerSetUp(bottom, top);//子类设置bottom和top的形状
}
(2)BasePrefetchingDataLayer
template <typename Dtype>
//BasePrefetchingDataLayer构造函数
BasePrefetchingDataLayer<Dtype>::BasePrefetchingDataLayer(
const LayerParameter& param)
: BaseDataLayer<Dtype>(param),
prefetch_(param.data_param().prefetch()),
prefetch_free_(), prefetch_full_(), prefetch_current_() {
for (int i = 0; i < prefetch_.size(); ++i) {
prefetch_[i].reset(new Batch<Dtype>());
prefetch_free_.push(prefetch_[i].get());//将Batch对象都放入空闲队列中
}
}
//BasePrefetchingDataLayer层配置函数
template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
BaseDataLayer<Dtype>::LayerSetUp(bottom, top);
// Before starting the prefetch thread, we make cpu_data and gpu_data
// calls so that the prefetch thread does not accidentally make simultaneous
// cudaMalloc calls when the main thread is running. In some GPUs this
// seems to cause failures if we do not so.
//编译CPU代码
for (int i = 0; i < prefetch_.size(); ++i) {
prefetch_[i]->data_.mutable_cpu_data();
if (this->output_labels_) {
prefetch_[i]->label_.mutable_cpu_data();
}
}
//如果编译选项没有CPU_ONLY,则需要编译GPU代码
#ifndef CPU_ONLY
if (Caffe::mode() == Caffe::GPU) {
for (int i = 0; i < prefetch_.size(); ++i) {
prefetch_[i]->data_.mutable_gpu_data();
if (this->output_labels_) {
prefetch_[i]->label_.mutable_gpu_data();
}
}
}
#endif
DLOG(INFO) << "Initializing prefetch";
this->data_transformer_->InitRand();
StartInternalThread();//开启内部预取线程
DLOG(INFO) << "Prefetch initialized.";
}
(3)InternalThreadEntry内部预取线程入口
template <typename Dtype>
//内部线程入口
void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
//创建CUDA Stream,非阻塞类型
#ifndef CPU_ONLY
cudaStream_t stream;
if (Caffe::mode() == Caffe::GPU) {
CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
}
#endif
try {
while (!must_stop()) {//循环载入批量数据
Batch<Dtype>* batch = prefetch_free_.pop();//拿到一个空闲的batch
load_batch(batch);//载入批量数据
#ifndef CPU_ONLY
if (Caffe::mode() == Caffe::GPU) {
batch->data_.data().get()->async_gpu_push(stream);
if (this->output_labels_) {
batch->label_.data().get()->async_gpu_push(stream);
}
CUDA_CHECK(cudaStreamSynchronize(stream));//同步到GPU
}
#endif
prefetch_full_.push(batch);//加入到带负载的Batch队列
}
} catch (boost::thread_interrupted&) {//捕获到异常退出while循环
// Interrupted exception is expected on shutdown
}
#ifndef CPU_ONLY
if (Caffe::mode() == Caffe::GPU) {
CUDA_CHECK(cudaStreamDestroy(stream));//销毁CUDA Stream
}
#endif
}
(4)Forward_cpu前向传播
template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
//从带负载的队列中取出一个Batch对象
if (prefetch_current_) {
prefetch_free_.push(prefetch_current_);
}
prefetch_current_ = prefetch_full_.pop("Waiting for data");
// Reshape to loaded data.
//Top Blob根据Batch中的data_形状进行变形
top[0]->ReshapeLike(prefetch_current_->data_);
top[0]->set_cpu_data(prefetch_current_->data_.mutable_cpu_data());
if (this->output_labels_) {//如果输出标签数据
// Reshape to loaded labels.
//Top Blob根据Batch中的label_形状进行变形
top[1]->ReshapeLike(prefetch_current_->label_);
top[1]->set_cpu_data(prefetch_current_->label_.mutable_cpu_data());
}
}