caffe 更改源码，使得输入数据为多个标签

最新推荐文章于 2023-09-05 11:01:51 发布

海边的第八只螃蟹

最新推荐文章于 2023-09-05 11:01:51 发布

阅读量3.7k

点赞数 1

分类专栏： caffe

本文链接：https://blog.csdn.net/u011070171/article/details/52160224

版权

caffe 专栏收录该内容

28 篇文章 0 订阅

订阅专栏

caffe数据层（DataLayer）的数据来源有：

1.数据来自数据库（LevelDB或LMDB）

层类型（layer type）:Data

layer {
  top: "data"
  top: "label"
  name: "data"
  type: "Data"
  data_param {
    source: "/home/zhuangni/code/FaceDetection/Data/train"
    backend:LMDB
    batch_size: 128
  }
  transform_param {
     #mean_file: "/home/zhuangni/code/FaceDetection/Data/mean.binaryproto"
     mirror: true
  }
  include: { phase: TRAIN }
}

2.数据来自内存

层类型（layer type）:MemoryData

3.数据来自HDF5

层类型（layer type）:HDF5Data

4.数据来自图片文本文件路径

层类型（layer type）:ImageData

layer {
  name: "data"
  type: "ImageData"
  top: "data"
  top: "label"
  image_data_param {
    source: "/home/zhuangni/code/TransferMMD/experiment/TNet/data/age_to_gender_train.txt"
    batch_size: 64
    new_height: 256
    new_width: 256
    shuffle: True
  }
  transform_param {
    crop_size: 227
  }
  include: { phase: TRAIN }
}

这篇文章所要介绍的是针对于层类型（layer type）是ImageData 的数据处理。

通常，层类型（layer type）为ImageData 需要的数据文件（source）为一个后缀为.txt的文件，其内容为：图片路径图片标注。

例如：age_to_gender_train.txt

内容为： 100003415@N08/landmark_aligned_face.2189.9529433812_bb1f080d06_o.jpg 3

但是，我们现在对于一张图片需要不止一个标注。比如，在迁移学习中，一张图片来自源域，另一张图片来自目标域，这时在训练的时候就需要对两张图片进行区别。再比如，同一张图片，除了年龄标注，还需要同时标注性别。

例如所需数据文件内容格式为：

100003415@N08/landmark_aligned_face.2189.9529433812_bb1f080d06_o.jpg 3 -1 （源任务）

113445054@N07/landmark_aligned_face.1325.11764272784_8e6d9e3722_o.jpg -1 0 （目标任务）

//caffe中对于分类类别的标注一般从0开始的整数，（3，,-1）为源任务训练数据；（-1,0）为目标任务训练数据。

要使caffe接收如上数据格式，需要做3步：

1.更改/caffe-master/include/caffe/layers/image_data_layer.hpp

将

 vector<std::pair<std::string, int> > lines_;

改为

vector<std::pair<std::string, int*> > lines_;

2.更改/caffe-master/src/caffe/layers/image_data_layer.cpp 实现为：

主要是将int label 改为接收tuple形式的label.

#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>

#include <fstream>  // NOLINT(readability/streams)
#include <iostream>  // NOLINT(readability/streams)
#include <string>
#include <utility>
#include <vector>

#include "caffe/data_transformer.hpp"
#include "caffe/layers/base_data_layer.hpp"
#include "caffe/layers/image_data_layer.hpp"
#include "caffe/util/benchmark.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"

namespace caffe {

template <typename Dtype>
ImageDataLayer<Dtype>::~ImageDataLayer<Dtype>() {
  this->StopInternalThread();
}

template <typename Dtype>
void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const int new_height = this->layer_param_.image_data_param().new_height();
  const int new_width  = this->layer_param_.image_data_param().new_width();
  const bool is_color  = this->layer_param_.image_data_param().is_color();
  string root_folder = this->layer_param_.image_data_param().root_folder();

  CHECK((new_height == 0 && new_width == 0) ||
      (new_height > 0 && new_width > 0)) << "Current implementation requires "
      "new_height and new_width to be set at the same time.";
  // Read the file with filenames and labels
  const string& source = this->layer_param_.image_data_param().source();
  LOG(INFO) << "Opening file " << source;
  std::ifstream infile(source.c_str());
  string filename;
  int label_dim = this->layer_param_.image_data_param().label_dim();
  while (infile >> filename) {
    int* labels = new int[label_dim];
    for(int i = 0;i < label_dim;++i){
        infile >> labels[i];
    }
    lines_.push_back(std::make_pair(filename, labels));
  }

  CHECK(!lines_.empty()) << "File is empty";

  if (this->layer_param_.image_data_param().shuffle()) {
    // randomly shuffle data
    LOG(INFO) << "Shuffling data";
    const unsigned int prefetch_rng_seed = caffe_rng_rand();
    prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
    ShuffleImages();
  }
  LOG(INFO) << "A total of " << lines_.size() << " images.";

  lines_id_ = 0;
  // Check if we would need to randomly skip a few data points
  if (this->layer_param_.image_data_param().rand_skip()) {
    unsigned int skip = caffe_rng_rand() %
        this->layer_param_.image_data_param().rand_skip();
    LOG(INFO) << "Skipping first " << skip << " data points.";
    CHECK_GT(lines_.size(), skip) << "Not enough points to skip";
    lines_id_ = skip;
  }
  // Read an image, and use it to initialize the top blob.
  cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
                                    new_height, new_width, is_color);
  CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;

  // Use data_transformer to infer the expected blob shape from a cv_image.
  vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
  this->transformed_data_.Reshape(top_shape);
  // Reshape prefetch_data and top[0] according to the batch_size.
  const int batch_size = this->layer_param_.image_data_param().batch_size();
  CHECK_GT(batch_size, 0) << "Positive batch size required";
  top_shape[0] = batch_size;
  for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
    this->prefetch_[i].data_.Reshape(top_shape);
  }
  top[0]->Reshape(top_shape);

  LOG(INFO) << "output data size: " << top[0]->num() << ","
      << top[0]->channels() << "," << top[0]->height() << ","
      << top[0]->width();
  // label
  vector<int> label_shape;
  label_shape.push_back(batch_size);
  label_shape.push_back(label_dim);
  top[1]->Reshape(label_shape);
  for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
    this->prefetch_[i].label_.Reshape(label_shape);
  }
}

template <typename Dtype>
void ImageDataLayer<Dtype>::ShuffleImages() {
  caffe::rng_t* prefetch_rng =
      static_cast<caffe::rng_t*>(prefetch_rng_->generator());
  shuffle(lines_.begin(), lines_.end(), prefetch_rng);
}

// This function is called on prefetch thread
template <typename Dtype>
void ImageDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
  CPUTimer batch_timer;
  batch_timer.Start();
  double read_time = 0;
  double trans_time = 0;
  CPUTimer timer;
  CHECK(batch->data_.count());
  CHECK(this->transformed_data_.count());
  ImageDataParameter image_data_param = this->layer_param_.image_data_param();
  const int batch_size = image_data_param.batch_size();
  const int new_height = image_data_param.new_height();
  const int new_width = image_data_param.new_width();
  const bool is_color = image_data_param.is_color();
  string root_folder = image_data_param.root_folder();

  // Reshape according to the first image of each batch
  // on single input batches allows for inputs of varying dimension.
  cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
      new_height, new_width, is_color);
  CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
  // Use data_transformer to infer the expected blob shape from a cv_img.
  vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
  this->transformed_data_.Reshape(top_shape);
  // Reshape batch according to the batch_size.
  top_shape[0] = batch_size;
  batch->data_.Reshape(top_shape);

  Dtype* prefetch_data = batch->data_.mutable_cpu_data();
  Dtype* prefetch_label = batch->label_.mutable_cpu_data();

  // datum scales
  const int lines_size = lines_.size();
  int label_dim = this->layer_param_.image_data_param().label_dim();
  for (int item_id = 0; item_id < batch_size; ++item_id) {
    // get a blob
    timer.Start();
    CHECK_GT(lines_size, lines_id_);
    cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
        new_height, new_width, is_color);
    CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
    read_time += timer.MicroSeconds();
    timer.Start();
    // Apply transformations (mirror, crop...) to the image
    int offset = batch->data_.offset(item_id);
    this->transformed_data_.set_cpu_data(prefetch_data + offset);
    this->data_transformer_->Transform(cv_img, &(this->transformed_data_));
    trans_time += timer.MicroSeconds();

    for(int i = 0;i < label_dim;++i){
        prefetch_label[item_id * label_dim + i] = lines_[lines_id_].second[i];
    }
    // go to the next iter
    lines_id_++;
    if (lines_id_ >= lines_size) {
      // We have reached the end. Restart from the first.
      DLOG(INFO) << "Restarting data prefetching from start.";
      lines_id_ = 0;
      if (this->layer_param_.image_data_param().shuffle()) {
        ShuffleImages();
      }
    }
  }
  batch_timer.Stop();
  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
  DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";
  DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}

INSTANTIATE_CLASS(ImageDataLayer);
REGISTER_LAYER_CLASS(ImageData);

}  // namespace caffe
#endif  // USE_OPENCV

3.在/caffe-master/src/caffe/proto/caffe.proto 里的messageImageDataParameter 添加：

// Specify the label dim. default 2.
optional uint32 label_dim = 13 [default = 2];

注意：在使用loss进行损失函数计算时，需要使用

ignore_label: -1

对非分类标注进行排除。

layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "fc8"
  bottom: "label"
  loss_param {
    ignore_label: -1
  }
}

参考论文：《Learning Transferable Features with Deep Adaptation Networks》

github代码：https://github.com/littletotoro/mmd-caff

海边的第八只螃蟹

关注

1
点赞
踩
5

收藏

觉得还不错? 一键收藏
4
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录