最近在基于caffe做目标检测的问题,需要利用caffe来训练一个回归网络,用来预测object在图像中的位置(x1,y1,width,height)。但是现有的caffe版本(happynear版本)只适用于二分类问题的数据集转换,所以需要修改caffe源码,使之也可以转换回归问题的数据集。
主要是参照 http://blog.csdn.net/baobei0112/article/details/47606559 进行修改。但是这份博客使用的不是happynear的caffe版本,所以源码改动的地方差异较大。下面我会记录我改动的地方。
一.源码修改
1.修改caffe.proto,位于/src/caffe/proto
36行改成 repeated float label = 5;,然后运行extract_proto.bat
2.修改data_layer.hpp
#ifndef CAFFE_DATA_LAYERS_HPP_
#define CAFFE_DATA_LAYERS_HPP_
#include <string>
#include <utility>
#include <vector>
#include "hdf5/hdf5.h"
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/data_reader.hpp"
#include "caffe/data_transformer.hpp"
#include "caffe/filler.hpp"
#include "caffe/internal_thread.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/blocking_queue.hpp"
#include "caffe/util/db.hpp"
#define HDF5_DATA_DATASET_NAME "data"
#define HDF5_DATA_LABEL_NAME "label"
namespace caffe {
/**
* @brief Provides base for data layers that feed blobs to the Net.
*
* TODO(dox): thorough documentation for Forward and proto params.
*/
template <typename Dtype>
class BaseDataLayer : public Layer<Dtype> {
public:
explicit BaseDataLayer(const LayerParameter& param);
// LayerSetUp: implements common data layer setup functionality, and calls
// DataLayerSetUp to do special data layer setup for individual layer types.
// This method may not be overridden except by the BasePrefetchingDataLayer.
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
// Data layers should be shared by multiple solvers in parallel
virtual inline bool ShareInParallel() const { return true; }
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
}
// Data layers have no bottoms, so reshaping is trivial.
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
}
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
}
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
}
protected:
TransformationParameter transform_param_;
shared_ptr<DataTransformer<Dtype> > data_transformer_;
bool output_labels_;
};
template <typename Dtype>
class Batch {
public:
Blob<Dtype> data_, label_;
};
template <typename Dtype>
class BasePrefetchingDataLayer :
public BaseDataLayer<Dtype>, public InternalThread {
public:
explicit BasePrefetchingDataLayer(const LayerParameter& param);
// LayerSetUp: implements common data layer setup functionality, and calls
// DataLayerSetUp to do special data layer setup for individual layer types.
// This method may not be overridden.
void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
// Prefetches batches (asynchronously if to GPU memory)
static const int PREFETCH_COUNT = 3;
protected:
virtual void InternalThreadEntry();
virtual void load_batch(Batch<Dtype>* batch) = 0;
Batch<Dtype> prefetch_[PREFETCH_COUNT];
BlockingQueue<Batch<Dtype>*> prefetch_free_;
BlockingQueue<Batch<Dtype>*> prefetch_full_;
Blob<Dtype> transformed_data_;
};
template <typename Dtype>
class DataLayer : public BasePrefetchingDataLayer<Dtype> {
public:
explicit DataLayer(const LayerParameter& param);
virtual ~DataLayer();
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
// DataLayer uses DataReader instead for sharing for parallelism
virtual inline bool ShareInParallel() const { return false; }
virtual inline const char* type() const { return "Data"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int MinTopBlobs() const { return 1; }
virtual inline int MaxTopBlobs() const { return 2; }
protected:
virtual void load_batch(Batch<Dtype>* batch);
DataReader reader_;
};
/**
* @brief Provides data to the Net generated by a Filler.
*
* TODO(dox): thorough documentation for Forward and proto params.
*/
template <typename Dtype>
class DummyDataLayer : public Layer<Dtype> {
public:
explicit DummyDataLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
// Data layers should be shared by multiple solvers in parallel
virtual inline bool ShareInParallel() const { return true; }
// Data layers have no bottoms, so reshaping is trivial.
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
}
virtual inline const char* type() const { return "DummyData"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int MinTopBlobs() const { return 1; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
}
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
}
vector<shared_ptr<Filler<Dtype> > > fillers_;
vector<bool> refill_;
};
/**
* @brief Provides data to the Net from HDF5 files.
*
* TODO(dox): thorough documentation for Forward and proto params.
*/
template <typename Dtype>
class HDF5DataLayer : public Layer<Dtype> {
public:
explicit HDF5DataLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual ~HDF5DataLayer();
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
// Data layers should be shared by multiple solvers in parallel
virtual inline bool ShareInParallel() const { return true; }
// Data layers have no bottoms, so reshaping is trivial.
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
}
virtual inline const char* type() const { return "HDF5Data"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int MinTopBlobs() const { return 1; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
}
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
}
virtual void LoadHDF5FileData(const char* filename);
std::vector<std::string> hdf_filenames_;
unsigned int num_files_;
unsigned int current_file_;
hsize_t current_row_;
std::vector<shared_ptr<Blob<Dtype> > > hdf_blobs_;
std::vector<unsigned int> data_permutation_;
std::vector<unsigned int> file_permutation_;
};
/**
* @brief Write blobs to disk as HDF5 files.
*
* TODO(dox): thorough documentation for Forward and proto params.
*/
template <typename Dtype>
class HDF5OutputLayer : public Layer<Dtype> {
public:
explicit HDF5OutputLayer(const LayerParameter& param)
: Layer<Dtype>(param), file_opened_(false) {}
virtual ~HDF5OutputLayer();
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
// Data layers should be shared by multiple solvers in parallel
virtual inline bool ShareInParallel() const { return true; }
// Data layers have no bottoms, so reshaping is trivial.
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
}
virtual inline const char* type() const { return "HDF5Output"; }
// TODO: no limit on the number of blobs
virtual inline int ExactNumBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 0; }
inline std::string file_name() const { return file_name_; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void SaveBlobs();
bool file_opened_;
std::string file_name_;
hid_t file_id_;
Blob<Dtype> data_blob_;
Blob<Dtype> label_blob_;
};
/**
* @brief Provides data to the Net from image files.
*
* TODO(dox): thorough documentation for Forward and proto params.
*/
template <typename Dtype>
class ImageDataLayer : public BasePrefetchingDataLayer<Dtype> {
public:
explicit ImageDataLayer(const LayerParameter& param)
: BasePrefetchingDataLayer<Dtype>(param) {}
virtual ~ImageDataLayer();
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "ImageData"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int ExactNumTopBlobs() const { return 2; }
vector<std::pair<std::string, std:: vector<float>> > lines_;
shared_ptr<Caffe::RNG> prefetch_rng_;
virtual void ShuffleImages();
virtual void load_batch(Batch<Dtype>* batch);
int lines_id_;
};
/**
* @brief Provides data to the Net from memory.
*
* TODO(dox): thorough documentation for Forward and proto params.
*/
template <typename Dtype>
class MemoryDataLayer : public BaseDataLayer<Dtype> {
public:
explicit MemoryDataLayer(const LayerParameter& param)
: BaseDataLayer<Dtype>(param), has_new_data_(false) {}
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "MemoryData"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int ExactNumTopBlobs() const { return 2; }
virtual void AddDatumVector(const vector<Datum>& datum_vector);
#ifdef USE_OPENCV
virtual void AddMatVector(const vector<cv::Mat>& mat_vector,
const vector<int>& labels);
#endif // USE_OPENCV
// Reset should accept const pointers, but can't, because the memory
// will be given to Blob, which is mutable
void Reset(Dtype* data, Dtype* label, int n);
void set_batch_size(int new_size);
int batch_size() { return batch_size_; }
int channels() { return channels_; }
int height() { return height_; }
int width() { return width_; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
int batch_size_, channels_, height_, width_, size_;
Dtype* data_;
Dtype* labels_;
int n_;
size_t pos_;
Blob<Dtype> added_data_;
Blob<Dtype> added_label_;
bool has_new_data_;
};
/**
* @brief Provides data to the Net from windows of images files, specified
* by a window data file.
*
* TODO(dox): thorough documentation for Forward and proto params.
*/
template <typename Dtype>
class WindowDataLayer : public BasePrefetchingDataLayer<Dtype> {
public:
explicit WindowDataLayer(const LayerParameter& param)
: BasePrefetchingDataLayer<Dtype>(param) {}
virtual ~WindowDataLayer();
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "WindowData"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int ExactNumTopBlobs() const { return 2; }
protected:
virtual unsigned int PrefetchRand();
virtual void load_batch(Batch<Dtype>* batch);
shared_ptr<Caffe::RNG> prefetch_rng_;
vector<std::pair<std::string, vector<int> > > image_database_;
enum WindowField { IMAGE_INDEX, LABEL, OVERLAP, X1, Y1, X2, Y2, NUM };
vector<vector<float> > fg_windows_;
vector<vector<float> > bg_windows_;
Blob<Dtype> data_mean_;
vector<Dtype> mean_values_;
bool has_mean_file_;
bool has_mean_values_;
bool cache_images_;
vector<std::pair<std::string, Datum > > image_database_cache_;
};
/**
* @brief Provides data to the Net from image files.
*
* TODO(dox): thorough documentation for Forward and proto params.
*/
template <typename Dtype>
class MultiLabelImageDataLayer : public BasePrefetchingDataLayer<Dtype> {
public:
explicit MultiLabelImageDataLayer(const LayerParameter& param)
: BasePrefetchingDataLayer<Dtype>(param) {}
virtual ~MultiLabelImageDataLayer();
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "MultiLabelImageData"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int ExactNumTopBlobs() const { return 2; }
protected:
shared_ptr<Caffe::RNG> prefetch_rng_;
virtual void ShuffleImages();
virtual void load_batch(Batch<Dtype>* batch);
vector<std::pair<std::string, shared_ptr<vector<Dtype> > > > lines_;
int label_count;
int lines_id_;
};
} // namespace caffe
#endif // CAFFE_DATA_LAYERS_HPP_
3.改动data_layer.cpp
#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#endif // USE_OPENCV
#include <stdint.h>
#include <vector>
#include "caffe/data_layers.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/benchmark.hpp"
namespace caffe {
template <typename Dtype>
DataLayer<Dtype>::DataLayer(const LayerParameter& param)
: BasePrefetchingDataLayer<Dtype>(param),
reader_(param) {
}
template <typename Dtype>
DataLayer<Dtype>::~DataLayer() {
this->StopInternalThread();
}
template <typename Dtype>
void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int batch_size = this->layer_param_.data_param().batch_size();
// Read a data point, and use it to initialize the top blob.
Datum& datum = *(reader_.full().peek());
// Use data_transformer to infer the expected blob shape from datum.
vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
this->transformed_data_.Reshape(top_shape);
// Reshape top[0] and prefetch_data according to the batch_size.
top_shape[0] = batch_size;
top[0]->Reshape(top_shape);
for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
this->prefetch_[i].data_.Reshape(top_shape);
}
LOG(INFO) << "output data size: " << top[0]->num() << ","
<< top[0]->channels() << "," << top[0]->height() << ","
<< top[0]->width();
// label
if (this->output_labels_) {
/*
vector<int> label_shape(1, batch_size);
top[1]->Reshape(label_shape);
for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
this->prefetch_[i].label_.Reshape(label_shape);
}
*/
top[1]->Reshape(batch_size,4,1,1);
for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
this->prefetch_[i].label_.Reshape(batch_size, 4, 1, 1);
}
}
}
// This function is called on prefetch thread
template<typename Dtype>
void DataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
CPUTimer batch_timer;
batch_timer.Start();
double read_time = 0;
double trans_time = 0;
CPUTimer timer;
CHECK(batch->data_.count());
CHECK(this->transformed_data_.count());
// Reshape according to the first datum of each batch
// on single input batches allows for inputs of varying dimension.
const int batch_size = this->layer_param_.data_param().batch_size();
Datum& datum = *(reader_.full().peek());
// Use data_transformer to infer the expected blob shape from datum.
vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
this->transformed_data_.Reshape(top_shape);
// Reshape batch according to the batch_size.
top_shape[0] = batch_size;
batch->data_.Reshape(top_shape);
Dtype* top_data = batch->data_.mutable_cpu_data();
Dtype* top_label = NULL; // suppress warnings about uninitialized variables
if (this->output_labels_) {
top_label = batch->label_.mutable_cpu_data();
}
/*
if (this->output_labels_) {
for (int label_i = 0; label_i < datum.label_size(); label_i++){
top_label[item_id*datum.label_size() + label_i] = datum.label(label_i);
}
}
*/
for (int item_id = 0; item_id < batch_size; ++item_id) {
timer.Start();
// get a datum
Datum& datum = *(reader_.full().pop("Waiting for data"));
read_time += timer.MicroSeconds();
timer.Start();
// Apply data transformations (mirror, scale, crop...)
int offset = batch->data_.offset(item_id);
this->transformed_data_.set_cpu_data(top_data + offset);
this->data_transformer_->Transform(datum, &(this->transformed_data_));
// Copy label.
if (this->output_labels_) {
// top_label[item_id] = datum.label();
for (int label_i = 0; label_i < datum.label_size(); label_i++){
top_label[item_id*datum.label_size()+label_i] = datum.label(label_i);
}
}
trans_time += timer.MicroSeconds();
reader_.free().push(const_cast<Datum*>(&datum));
}
timer.Stop();
batch_timer.Stop();
DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
DLOG(INFO) << " Read time: " << read_time / 1000 << " ms.";
DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}
INSTANTIATE_CLASS(DataLayer);
REGISTER_LAYER_CLASS(Data);
} // namespace caffe
4.修改image_data_layer.cpp中label部分
#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#include <fstream> // NOLINT(readability/streams)
#include <iostream> // NOLINT(readability/streams)
#include <string>
#include <utility>
#include <vector>
#include "caffe/data_layers.hpp"
#include "caffe/util/benchmark.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"
namespace caffe {
template <typename Dtype>
ImageDataLayer<Dtype>::~ImageDataLayer<Dtype>() {
this->StopInternalThread();
}
template <typename Dtype>
void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int new_height = this->layer_param_.image_data_param().new_height();
const int new_width = this->layer_param_.image_data_param().new_width();
const bool is_color = this->layer_param_.image_data_param().is_color();
string root_folder = this->layer_param_.image_data_param().root_folder();
CHECK((new_height == 0 && new_width == 0) ||
(new_height > 0 && new_width > 0)) << "Current implementation requires "
"new_height and new_width to be set at the same time.";
// Read the file with filenames and labels
const string& source = this->layer_param_.image_data_param().source();
LOG(INFO) << "Opening file " << source;
std::ifstream infile(source.c_str());
string filename;
//int label;
float x1, y1, x2, y2;
while (infile >> filename >> x1 >> y1 >> x2 >> y2) {
std::vector<float> vec_label;
vec_label.push_back(x1);
vec_label.push_back(y1);
vec_label.push_back(x2);
vec_label.push_back(y2);
lines_.push_back(std::make_pair(filename, vec_label));
}
if (this->layer_param_.image_data_param().shuffle()) {
// randomly shuffle data
LOG(INFO) << "Shuffling data";
const unsigned int prefetch_rng_seed = caffe_rng_rand();
prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
ShuffleImages();
}
LOG(INFO) << "A total of " << lines_.size() << " images.";
lines_id_ = 0;
// Check if we would need to randomly skip a few data points
if (this->layer_param_.image_data_param().rand_skip()) {
unsigned int skip = caffe_rng_rand() %
this->layer_param_.image_data_param().rand_skip();
LOG(INFO) << "Skipping first " << skip << " data points.";
CHECK_GT(lines_.size(), skip) << "Not enough points to skip";
lines_id_ = skip;
}
// Read an image, and use it to initialize the top blob.
cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
new_height, new_width, is_color);
CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
// Use data_transformer to infer the expected blob shape from a cv_image.
vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
this->transformed_data_.Reshape(top_shape);
// Reshape prefetch_data and top[0] according to the batch_size.
const int batch_size = this->layer_param_.image_data_param().batch_size();
CHECK_GT(batch_size, 0) << "Positive batch size required";
top_shape[0] = batch_size;
for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
this->prefetch_[i].data_.Reshape(top_shape);
}
top[0]->Reshape(top_shape);
LOG(INFO) << "output data size: " << top[0]->num() << ","
<< top[0]->channels() << "," << top[0]->height() << ","
<< top[0]->width();
// label
vector<int> label_shape(1, batch_size);
top[1]->Reshape(batch_size,4,1,1);
for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
this->prefetch_[i].label_.Reshape(batch_size, 4, 1, 1);
}
}
template <typename Dtype>
void ImageDataLayer<Dtype>::ShuffleImages() {
caffe::rng_t* prefetch_rng =
static_cast<caffe::rng_t*>(prefetch_rng_->generator());
shuffle(lines_.begin(), lines_.end(), prefetch_rng);
}
// This function is called on prefetch thread
template <typename Dtype>
void ImageDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
CPUTimer batch_timer;
batch_timer.Start();
double read_time = 0;
double trans_time = 0;
CPUTimer timer;
CHECK(batch->data_.count());
CHECK(this->transformed_data_.count());
ImageDataParameter image_data_param = this->layer_param_.image_data_param();
const int batch_size = image_data_param.batch_size();
const int new_height = image_data_param.new_height();
const int new_width = image_data_param.new_width();
const bool is_color = image_data_param.is_color();
string root_folder = image_data_param.root_folder();
// Reshape according to the first image of each batch
// on single input batches allows for inputs of varying dimension.
cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
new_height, new_width, is_color);
CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
// Use data_transformer to infer the expected blob shape from a cv_img.
vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
this->transformed_data_.Reshape(top_shape);
// Reshape batch according to the batch_size.
top_shape[0] = batch_size;
batch->data_.Reshape(top_shape);
Dtype* prefetch_data = batch->data_.mutable_cpu_data();
//Dtype* prefetch_label = batch->label_.mutable_cpu_data();
Dtype* prefetch_label = NULL;
// datum scales
const int lines_size = lines_.size();
for (int item_id = 0; item_id < batch_size; ++item_id) {
// get a blob
timer.Start();
CHECK_GT(lines_size, lines_id_);
cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
new_height, new_width, is_color);
CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
read_time += timer.MicroSeconds();
timer.Start();
// Apply transformations (mirror, crop...) to the image
int offset = batch->data_.offset(item_id);
this->transformed_data_.set_cpu_data(prefetch_data + offset);
this->data_transformer_->Transform(cv_img, &(this->transformed_data_));
trans_time += timer.MicroSeconds();
for (int label_i = 0; label_i < (lines_[lines_id_].second).size(); label_i++){
prefetch_label[item_id*(lines_[lines_id_].second).size() + label_i] = (lines_[lines_id_].second)[label_i];
}
//prefetch_label[item_id] = lines_[lines_id_].second;
// go to the next iter
lines_id_++;
if (lines_id_ >= lines_size) {
// We have reached the end. Restart from the first.
DLOG(INFO) << "Restarting data prefetching from start.";
lines_id_ = 0;
if (this->layer_param_.image_data_param().shuffle()) {
ShuffleImages();
}
}
}
batch_timer.Stop();
DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
DLOG(INFO) << " Read time: " << read_time / 1000 << " ms.";
DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}
INSTANTIATE_CLASS(ImageDataLayer);
REGISTER_LAYER_CLASS(ImageData);
} // namespace caffe
#endif // USE_OPENCV
5.修改memory_data_layer.cpp
#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#endif // USE_OPENCV
#include <vector>
#include "caffe/data_layers.hpp"
namespace caffe {
template <typename Dtype>
void MemoryDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
batch_size_ = this->layer_param_.memory_data_param().batch_size();
channels_ = this->layer_param_.memory_data_param().channels();
height_ = this->layer_param_.memory_data_param().height();
width_ = this->layer_param_.memory_data_param().width();
size_ = channels_ * height_ * width_;
CHECK_GT(batch_size_ * size_, 0) <<
"batch_size, channels, height, and width must be specified and"
" positive in memory_data_param";
vector<int> label_shape(1, batch_size_);
top[0]->Reshape(batch_size_, channels_, height_, width_);
top[1]->Reshape(label_shape);
added_data_.Reshape(batch_size_, channels_, height_, width_);
added_label_.Reshape(label_shape);
data_ = NULL;
labels_ = NULL;
added_data_.cpu_data();
added_label_.cpu_data();
}
template <typename Dtype>
void MemoryDataLayer<Dtype>::AddDatumVector(const vector<Datum>& datum_vector) {
CHECK(!has_new_data_) <<
"Can't add data until current data has been consumed.";
size_t num = datum_vector.size();
CHECK_GT(num, 0) << "There is no datum to add.";
CHECK_EQ(num % batch_size_, 0) <<
"The added data must be a multiple of the batch size.";
added_data_.Reshape(num, channels_, height_, width_);
added_label_.Reshape(num, 1, 1, 1);
// Apply data transformations (mirror, scale, crop...)
this->data_transformer_->Transform(datum_vector, &added_data_);
// Copy Labels
Dtype* top_label = added_label_.mutable_cpu_data();
for (int item_id = 0; item_id < num; ++item_id) {
//top_label[item_id] = datum_vector[item_id].label();
int label_num = datum_vector[item_id].label_size();
for (int label_i = 0; label_i < label_num; label_i++){
top_label[item_id*label_num + label_i] = datum_vector[item_id].label(label_i);
}
}
// num_images == batch_size_
Dtype* top_data = added_data_.mutable_cpu_data();
Reset(top_data, top_label, num);
has_new_data_ = true;
}
#ifdef USE_OPENCV
template <typename Dtype>
void MemoryDataLayer<Dtype>::AddMatVector(const vector<cv::Mat>& mat_vector,
const vector<int>& labels) {
size_t num = mat_vector.size();
CHECK(!has_new_data_) <<
"Can't add mat until current data has been consumed.";
CHECK_GT(num, 0) << "There is no mat to add";
CHECK_EQ(num % batch_size_, 0) <<
"The added data must be a multiple of the batch size.";
added_data_.Reshape(num, channels_, height_, width_);
added_label_.Reshape(num, 1, 1, 1);
// Apply data transformations (mirror, scale, crop...)
this->data_transformer_->Transform(mat_vector, &added_data_);
// Copy Labels
Dtype* top_label = added_label_.mutable_cpu_data();
for (int item_id = 0; item_id < num; ++item_id) {
top_label[item_id] = labels[item_id];
}
// num_images == batch_size_
Dtype* top_data = added_data_.mutable_cpu_data();
Reset(top_data, top_label, num);
has_new_data_ = true;
}
#endif // USE_OPENCV
template <typename Dtype>
void MemoryDataLayer<Dtype>::Reset(Dtype* data, Dtype* labels, int n) {
CHECK(data);
CHECK(labels);
CHECK_EQ(n % batch_size_, 0) << "n must be a multiple of batch size";
// Warn with transformation parameters since a memory array is meant to
// be generic and no transformations are done with Reset().
if (this->layer_param_.has_transform_param()) {
LOG(WARNING) << this->type() << " does not transform array data on Reset()";
}
data_ = data;
labels_ = labels;
n_ = n;
pos_ = 0;
}
template <typename Dtype>
void MemoryDataLayer<Dtype>::set_batch_size(int new_size) {
CHECK(!has_new_data_) <<
"Can't change batch_size until current data has been consumed.";
batch_size_ = new_size;
added_data_.Reshape(batch_size_, channels_, height_, width_);
added_label_.Reshape(batch_size_, 1, 1, 1);
}
template <typename Dtype>
void MemoryDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
CHECK(data_) << "MemoryDataLayer needs to be initalized by calling Reset";
top[0]->Reshape(batch_size_, channels_, height_, width_);
top[1]->Reshape(batch_size_, 1, 1, 1);
top[0]->set_cpu_data(data_ + pos_ * size_);
top[1]->set_cpu_data(labels_ + pos_);
pos_ = (pos_ + batch_size_) % n_;
if (pos_ == 0)
has_new_data_ = false;
}
INSTANTIATE_CLASS(MemoryDataLayer);
REGISTER_LAYER_CLASS(MemoryData);
} // namespace caffe
6.修改convet_imaget.cpp
// This program converts a set of images to a lmdb/leveldb by storing them
// as Datum proto buffers.
// Usage:
// convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME
//
// where ROOTFOLDER is the root folder that holds all the images, and LISTFILE
// should be a list of files as well as their labels, in the format as
// subfolder1/file1.JPEG 7
// ....
#include <algorithm>
#include <fstream> // NOLINT(readability/streams)
#include <string>
#include <utility>
#include <vector>
#include "boost/scoped_ptr.hpp"
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/db.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/rng.hpp"
using namespace caffe; // NOLINT(build/namespaces)
using std::pair;
using boost::scoped_ptr;
DEFINE_bool(gray, false,
"When this option is on, treat images as grayscale ones");
DEFINE_bool(shuffle, false,
"Randomly shuffle the order of images and their labels");
DEFINE_string(backend, "lmdb",
"The backend {lmdb, leveldb} for storing the result");
DEFINE_int32(resize_width, 0, "Width images are resized to");
DEFINE_int32(resize_height, 0, "Height images are resized to");
DEFINE_bool(check_size, false,
"When this option is on, check that all the datum have the same size");
DEFINE_bool(encoded, false,
"When this option is on, the encoded image will be save in datum");
DEFINE_string(encode_type, "",
"Optional: What type should we encode the image as ('png','jpg',...).");
int main(int argc, char** argv) {
#ifdef USE_OPENCV
//::google::InitGoogleLogging(argv[0]);
// Print output to stderr (while still logging)
FLAGS_alsologtostderr = 1;
#ifndef GFLAGS_GFLAGS_H_
namespace gflags = google;
#endif
gflags::SetUsageMessage("Convert a set of images to the leveldb/lmdb\n"
"format used as input for Caffe.\n"
"Usage:\n"
" convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME\n"
"The ImageNet dataset for the training demo is at\n"
" http://www.image-net.org/download-images\n");
caffe::GlobalInit(&argc, &argv);
if (argc < 4) {
gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/convert_imageset");
return 1;
}
const bool is_color = !FLAGS_gray;
const bool check_size = FLAGS_check_size;
const bool encoded = FLAGS_encoded;
const string encode_type = FLAGS_encode_type;
std::ifstream infile(argv[2]);
std::vector<std::pair<std::string, vector<float>> > lines;
std::string filename;
/*
int label;
while (infile >> filename >> label) {
lines.push_back(std::make_pair(filename, label));
}
*/
float x1, y1, x2, y2;
while (infile >> filename >> x1 >> y1 >> x2 >> y2) {
std::vector<float> vec_label;
vec_label.push_back(x1);
vec_label.push_back(y1);
vec_label.push_back(x2);
vec_label.push_back(y2);
lines.push_back(std::make_pair(filename, vec_label));
}
if (FLAGS_shuffle) {
// randomly shuffle data
LOG(INFO) << "Shuffling data";
shuffle(lines.begin(), lines.end());
}
LOG(INFO) << "A total of " << lines.size() << " images.";
if (encode_type.size() && !encoded)
LOG(INFO) << "encode_type specified, assuming encoded=true.";
int resize_height = std::max<int>(0, FLAGS_resize_height);
int resize_width = std::max<int>(0, FLAGS_resize_width);
// Create new DB
scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend));
db->Open(argv[3], db::NEW);
scoped_ptr<db::Transaction> txn(db->NewTransaction());
// Storing to db
std::string root_folder(argv[1]);
Datum datum;
int count = 0;
const int kMaxKeyLength = 256;
char key_cstr[kMaxKeyLength];
int data_size = 0;
bool data_size_initialized = false;
for (int line_id = 0; line_id < lines.size(); ++line_id) {
bool status;
std::string enc = encode_type;
if (encoded && !enc.size()) {
// Guess the encoding type from the file name
string fn = lines[line_id].first;
size_t p = fn.rfind('.');
if ( p == fn.npos )
LOG(WARNING) << "Failed to guess the encoding of '" << fn << "'";
enc = fn.substr(p);
std::transform(enc.begin(), enc.end(), enc.begin(), ::tolower);
}
status = ReadImageToDatum(root_folder + lines[line_id].first,
lines[line_id].second, resize_height, resize_width, is_color,
enc, &datum);
if (status == false) continue;
if (check_size) {
if (!data_size_initialized) {
data_size = datum.channels() * datum.height() * datum.width();
data_size_initialized = true;
} else {
const std::string& data = datum.data();
CHECK_EQ(data.size(), data_size) << "Incorrect data field size "
<< data.size();
}
}
// sequential
int length = sprintf_s(key_cstr, kMaxKeyLength, "%08d_%s", line_id,
lines[line_id].first.c_str());
// Put in db
string out;
CHECK(datum.SerializeToString(&out));
txn->Put(string(key_cstr, length), out);
if (++count % 1000 == 0) {
// Commit db
txn->Commit();
txn.reset(db->NewTransaction());
LOG(INFO) << "Processed " << count << " files.";
}
}
// write the last batch
if (count % 1000 != 0) {
txn->Commit();
LOG(INFO) << "Processed " << count << " files.";
}
#else
LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV.";
#endif // USE_OPENCV
return 0;
}
7.修改io.cpp (只贴了部分需要修改的程序)
bool ReadImageToDatum(const string& filename, const std::vector<float> labels,
const int height, const int width, const bool is_color,
const std::string & encoding, Datum* datum) {
cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color);
if (cv_img.data) {
if (encoding.size()) {
if ( (cv_img.channels() == 3) == is_color && !height && !width &&
matchExt(filename, encoding) )
return ReadFileToDatum(filename, labels, datum);
std::vector<uchar> buf;
cv::imencode("."+encoding, cv_img, buf);
datum->set_data(std::string(reinterpret_cast<char*>(&buf[0]),
buf.size()));
// datum->set_label(label);
datum->mutable_label()->Clear();
for (int label_i = 0; label_i < labels.size(); label_i++){
datum->add_label(labels[label_i]);
}
datum->set_encoded(true);
return true;
}
CVMatToDatum(cv_img, datum);
// datum->set_label(label);
datum->mutable_label()->Clear();
for (int label_i = 0; label_i < labels.size(); label_i++){
datum->add_label(labels[label_i]);
}
return true;
} else {
return false;
}
}
#endif // USE_OPENCV
bool ReadFileToDatum(const string& filename, const std::vector<float> labels,
Datum* datum) {
std::streampos size;
fstream file(filename.c_str(), ios::in|ios::binary|ios::ate);
if (file.is_open()) {
size = file.tellg();
std::string buffer(size, ' ');
file.seekg(0, ios::beg);
file.read(&buffer[0], size);
file.close();
datum->set_data(buffer);
// datum->set_label(label);
datum->mutable_label()->Clear();
for (int label_i = 0; label_i < labels.size(); label_i++){
datum->add_label(labels[label_i]);
}
datum->set_encoded(true);
return true;
} else {
return false;
}
}
8.修改io.hpp (只贴了部分需要修改的程序)
bool ReadFileToDatum(const string& filename, const std::vector<float> labels, Datum* datum);
inline bool ReadFileToDatum(const string& filename, Datum* datum) {
// return ReadFileToDatum(filename, -1, datum);
return 0;
}
bool ReadImageToDatum(const string& filename, const std::vector<float> labels,
const int height, const int width, const bool is_color,
const std::string & encoding, Datum* datum);
inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels,
const int height, const int width, const bool is_color, Datum* datum) {
return ReadImageToDatum(filename, labels, height, width, is_color,
"", datum);
}
inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels,
const int height, const int width, Datum* datum) {
return ReadImageToDatum(filename, labels, height, width, true, datum);
}
inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels,
const bool is_color, Datum* datum) {
return ReadImageToDatum(filename, labels, 0, 0, is_color, datum);
}
inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels,
Datum* datum) {
return ReadImageToDatum(filename, labels, 0, 0, true, datum);
}
inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels,
const std::string & encoding, Datum* datum) {
return ReadImageToDatum(filename, labels, 0, 0, true, encoding, datum);
}
完成上述修改之后即可进行编译得到新的convert_image_set等可执行程序。
二.将自己的数据集转成leveldb格式
基本跟http://blog.csdn.net/messiran10/article/details/49159559的流程一样,主要是以下两点需要变化:
1.样本说明文件
train_samples/10007.jpg 0.491667 0.529412 0.450000 0.352941 需要把一维的label转成4维的label
2.模型配置文件
需要把softmax loss层换成 平方损失层
需要去掉accuracy层(否则会出错)