caffe源码解读(5)－image_data_layer.cpp

最新推荐文章于 2021-02-06 15:04:06 发布

冬后晚晴

最新推荐文章于 2021-02-06 15:04:06 发布

阅读量3.1k

点赞数 1

分类专栏： caffe源码学习文章标签：源码数据

本文链接：https://blog.csdn.net/weixin_37970694/article/details/79192778

版权

caffe源码学习专栏收录该内容

15 篇文章 2 订阅

订阅专栏

数据层：Image_data_layer层主要用于caffe来自图片的数据处理。
层类型：ImageData
必须设置的参数：①source:一个文本文件的名字，每行给定一个图片文件的名称和标签；②batch_size:每一次处理的数据个数，即图片数。
可选参数：①rand_skip:在开始时，跳过某个图片的输入；②shuffle:随机打乱顺序；③new_height,new_width:如果设置，则对图片进行Resize

(1)caffe.proto层参数定义

message ImageDataParameter {
  optional string source = 1;//数据文件
  optional uint32 batch_size = 4 [default = 1];
  optional uint32 rand_skip = 7 [default = 0];//随机跳过
  optional bool shuffle = 8 [default = false];//随机打乱
  optional uint32 new_height = 9 [default = 0];//高度
  optional uint32 new_width = 10 [default = 0];//宽度
  optional bool is_color = 11 [default = true];//彩色图片
  optional float scale = 2 [default = 1];
  optional string mean_file = 3;//均值文件
  optional uint32 crop_size = 5 [default = 0];//裁剪尺寸
  optional bool mirror = 6 [default = false];//镜像翻转
  optional string root_folder = 12 [default = ""];//图片存放根目录
}

(2)DataLayerSetUp层创建

template <typename Dtype>
void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  //根据层参数定义文件获取层参数：new_height、new_width、is_color、root_folder
  const int new_height = this->layer_param_.image_data_param().new_height();//图像宽度
  const int new_width  = this->layer_param_.image_data_param().new_width();//图像高度
  const bool is_color  = this->layer_param_.image_data_param().is_color();//是否彩色图像
  string root_folder = this->layer_param_.image_data_param().root_folder();//图像目录
  //只支持图像宽度和高度同样大小的图片
  CHECK((new_height == 0 && new_width == 0) ||
      (new_height > 0 && new_width > 0)) << "Current implementation requires "
      "new_height and new_width to be set at the same time.";
  // Read the file with filenames and labels
  //读取文件路径source（列表文件）和文件内容信息：文件名＋标签
  const string& source = this->layer_param_.image_data_param().source();
  LOG(INFO) << "Opening file " << source;
  std::ifstream infile(source.c_str());
  string line;
  size_t pos;
  int label;
  while (std::getline(infile, line)) {
    pos = line.find_last_of(' ');//查找最后一个空格
    label = atoi(line.substr(pos + 1).c_str());//将最后一个代表label信息的字符转化为对应int型
    //lines_存放图片的位置（pos）和标签信息(label)
    lines_.push_back(std::make_pair(line.substr(0, pos), label));
  }

  CHECK(!lines_.empty()) << "File is empty";
  //是否需要打乱文件顺序
  if (this->layer_param_.image_data_param().shuffle()) {
    //若指定shuffle参数，则对图片进行随机打乱
    // randomly shuffle data
    LOG(INFO) << "Shuffling data";
    const unsigned int prefetch_rng_seed = caffe_rng_rand();
    prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
    ShuffleImages();
  } else {
    if (this->phase_ == TRAIN && Caffe::solver_rank() > 0 &&
        this->layer_param_.image_data_param().rand_skip() == 0) {
      LOG(WARNING) << "Shuffling or skipping recommended for multi-GPU";
    }
  }
  LOG(INFO) << "A total of " << lines_.size() << " images.";

  lines_id_ = 0;
  // Check if we would need to randomly skip a few data points
  if (this->layer_param_.image_data_param().rand_skip()) {
    unsigned int skip = caffe_rng_rand() %//产生随机数
        this->layer_param_.image_data_param().rand_skip();
    LOG(INFO) << "Skipping first " << skip << " data points.";
    CHECK_GT(lines_.size(), skip) << "Not enough points to skip";
    lines_id_ = skip;
  }
  // Read an image, and use it to initialize the top blob.
  //读取图像文件:cv_image
  cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
                                    new_height, new_width, is_color);
  CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
  // Use data_transformer to infer the expected blob shape from a cv_image.
  //根据cv_image推断Blob的shape
  vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
  //设置transformed_data_的形状
  this->transformed_data_.Reshape(top_shape);
  // Reshape prefetch_data and top[0] according to the batch_size.
  //设置batch_size
  const int batch_size = this->layer_param_.image_data_param().batch_size();
  CHECK_GT(batch_size, 0) << "Positive batch size required";//batch_size>0
  top_shape[0] = batch_size;
  //设置预取数组中数据data_的形状
  for (int i = 0; i < this->prefetch_.size(); ++i) {
    this->prefetch_[i]->data_.Reshape(top_shape);
  }
  //设置输出数据top[0]的形状
  top[0]->Reshape(top_shape);

  LOG(INFO) << "output data size: " << top[0]->num() << ","
      << top[0]->channels() << "," << top[0]->height() << ","
      << top[0]->width();
  // label
  //设置输出标签top[1]的形状
  vector<int> label_shape(1, batch_size);
  top[1]->Reshape(label_shape);
  //设置预取数组中标签label_的形状
  for (int i = 0; i < this->prefetch_.size(); ++i) {
    this->prefetch_[i]->label_.Reshape(label_shape);
  }
}

(3)ShuffleImage随机打乱

template <typename Dtype>
void ImageDataLayer<Dtype>::ShuffleImages() {
  caffe::rng_t* prefetch_rng =
  　　　//产生打乱图像顺序的数组prefetch_rng
      static_cast<caffe::rng_t*>(prefetch_rng_->generator());
      //打乱lines顺序，每个lines对应存放一个图像的信息，即将图像顺序打乱
  shuffle(lines_.begin(), lines_.end(), prefetch_rng);
}

(4)load_batch批处理

template <typename Dtype>
void ImageDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
  CPUTimer batch_timer;
  batch_timer.Start();
  double read_time = 0;
  double trans_time = 0;
  CPUTimer timer;
  CHECK(batch->data_.count());
  CHECK(this->transformed_data_.count());
  //根据参数定义文件获取层参数：batch_size、new_height、new_width、is_color、root_folder
  ImageDataParameter image_data_param = this->layer_param_.image_data_param();
  const int batch_size = image_data_param.batch_size();
  const int new_height = image_data_param.new_height();
  const int new_width = image_data_param.new_width();
  const bool is_color = image_data_param.is_color();
  string root_folder = image_data_param.root_folder();

  // Reshape according to the first image of each batch
  // on single input batches allows for inputs of varying dimension.
  cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
      new_height, new_width, is_color);//读取图像
  CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
  // Use data_transformer to infer the expected blob shape from a cv_img.
  vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);//推断图像形状
  this->transformed_data_.Reshape(top_shape);//设置transformed_data_的形状
  // Reshape batch according to the batch_size.
  //设置batch_size
  top_shape[0] = batch_size;
  batch->data_.Reshape(top_shape);
　//预取数据data和标签label
  Dtype* prefetch_data = batch->data_.mutable_cpu_data();
  Dtype* prefetch_label = batch->label_.mutable_cpu_data();

  // datum scales
  //读取一批图像进行预处理
  const int lines_size = lines_.size();
  for (int item_id = 0; item_id < batch_size; ++item_id) {
    // get a blob
    timer.Start();
    CHECK_GT(lines_size, lines_id_);
    cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
        new_height, new_width, is_color);
    //加载第一张图像
    CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
    read_time += timer.MicroSeconds();
    timer.Start();
    // Apply transformations (mirror, crop...) to the image
    int offset = batch->data_.offset(item_id);
    this->transformed_data_.set_cpu_data(prefetch_data + offset);
    this->data_transformer_->Transform(cv_img, &(this->transformed_data_));
    trans_time += timer.MicroSeconds();//统计预处理时间

    prefetch_label[item_id] = lines_[lines_id_].second;
    // go to the next iter
    lines_id_++;
    if (lines_id_ >= lines_size) {//是否是图像目录中的最后一张图像
      // We have reached the end. Restart from the first.
      DLOG(INFO) << "Restarting data prefetching from start.";
      lines_id_ = 0;
      if (this->layer_param_.image_data_param().shuffle()) {
        ShuffleImages();//打乱图像索引的顺序
      }
    }
  }
  batch_timer.Stop();
  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
  DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";
  DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}

(5)Usage使用
用bottom来输入数据，top来输出数据。在数据层至少有一个命名为data的top,如果有第二个top，一般命名为label。这种（data,label）的配对是分类模型所必须的。

name: "CaffeNet"
layer {
  name: "data"//层名称，可随意取
  type: "ImageData"//层类型
  top: "data"
  top: "label"//只有输出top，没有输入bottom
  transform_param {//数据预处理
  　//是否开启镜像
    mirror: false
    //裁剪一个２２７＊２２７的图像块
    crop_size: 227
    //用一个配置文件进行去均值操作
    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
  }
  image_data_param {
    source: "examples/_temp/file_list.txt"
    batch_size: 50
    new_height: 256
    new_width: 256
  }
}

本文主要参考博文链接。如有错误，欢迎指正，谢谢！

冬后晚晴

关注

1
点赞
踩
8

收藏

觉得还不错? 一键收藏
0
评论
caffe源码解读(5)－image_data_layer.cpp

数据层：Image_data_layer层主要用于caffe来自图片的数据处理。层类型：ImageData 必须设置的参数：①source:一个文本文件的名字，每行给定一个图片文件的名称和标签；②batch_size:每一次处理的数据个数，即图片数。可选参数：①rand_skip:在开始时，跳过某个图片的输入；②shuffle:随机打乱顺序；③new_height,new_width:如
复制链接

扫一扫