使用caffe实现多标签输入

最近需要使用caffe训练一个回归网络,这个网络的输入label是多维的float型数据,在搜索了大量资料后,整理了一份比较简洁的操作流程,这里记录下来。

本篇博客主要参照了这篇博客,并进行了一些小幅度的修改和完善:

http://blog.csdn.net/qq295456059/article/details/53142574

首先,我们用于做回归的训练文件txt的格式为:


cat_1.jpg 0.03 0.45 0.55 0.66  
cat_2.jpg 0.44 0.31 0.05 0.34  
dog_1.jpg 0.67 0.25 0.79 0.56  
dog_2.jpg 0.89 0.46 0.91 0.38 

那么我们一共需要对以下文件进行修改:

1.tools/convert_imageset.cpp

这里建议复制convert_imageset.cpp文件,并将其命名为convert_imageset_regression.cpp。依然将其放在tools文件夹下面。

首先我们添加一些头文件

#include <boost/tokenizer.hpp> //### To use tokenizer  
#include <iostream> //###  

using namespace std;  //###  

我们将int类型的label改成float,如下:

 //std::vector<std::pair<std::string, int> > lines;  //###  
  std::vector<std::pair<std::string, std::vector<float> > > lines;  
  std::string line;  
  //size_t pos;  
  //int label;  //###  
  std::vector<float> labels;  

在while函数中,将float型的label放入lines中

  while (std::getline(infile, line)) {  
    // pos = line.find_last_of(' ');  
    // label = atoi(line.substr(pos + 1).c_str());  
    // lines.push_back(std::make_pair(line.substr(0, pos), label));  
    //###  
    std::vector<std::string> tokens;  
    boost::char_separator<char> sep(" ");  
    boost::tokenizer<boost::char_separator<char> > tok(line, sep);  
    tokens.clear();  
    std::copy(tok.begin(), tok.end(), std::back_inserter(tokens));    
  
    for (int i = 1; i < tokens.size(); ++i)  
    {  
      labels.push_back(atof(tokens.at(i).c_str()));  
    }  
      
    lines.push_back(std::make_pair(tokens.at(0), labels));  
    //###To clear the vector labels  
    labels.clear();  
  }  

修改完之后的整个文件如下:

// This program converts a set of images to a lmdb/leveldb by storing them
// as Datum proto buffers.
// Usage:
//   convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME
//
// where ROOTFOLDER is the root folder that holds all the images, and LISTFILE
// should be a list of files as well as their labels, in the format as
//   subfolder1/file1.JPEG 7
//   ....

#include <algorithm>
#include <fstream>  // NOLINT(readability/streams)
#include <string>
#include <utility>
#include <vector>

#include "boost/scoped_ptr.hpp"
#include "gflags/gflags.h"
#include "glog/logging.h"

#include "caffe/proto/caffe.pb.h"
#include "caffe/util/db.hpp"
#include "caffe/util/format.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/rng.hpp"
#include <boost/tokenizer.hpp> //### To use tokenizer  
#include <iostream>

using namespace caffe;  // NOLINT(build/namespaces)
using std::pair;
using boost::scoped_ptr;

DEFINE_bool(gray, false,
    "When this option is on, treat images as grayscale ones");
DEFINE_bool(shuffle, false,
    "Randomly shuffle the order of images and their labels");
DEFINE_string(backend, "lmdb",
        "The backend {lmdb, leveldb} for storing the result");
DEFINE_int32(resize_width, 0, "Width images are resized to");
DEFINE_int32(resize_height, 0, "Height images are resized to");
DEFINE_bool(check_size, false,
    "When this option is on, check that all the datum have the same size");
DEFINE_bool(encoded, false,
    "When this option is on, the encoded image will be save in datum");
DEFINE_string(encode_type, "",
    "Optional: What type should we encode the image as ('png','jpg',...).");

int main(int argc, char** argv) {
#ifdef USE_OPENCV
  ::google::InitGoogleLogging(argv[0]);
  // Print output to stderr (while still logging)
  FLAGS_alsologtostderr = 1;

#ifndef GFLAGS_GFLAGS_H_
  namespace gflags = google;
#endif

  gflags::SetUsageMessage("Convert a set of images to the leveldb/lmdb\n"
        "format used as input for Caffe.\n"
        "Usage:\n"
        "    convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME\n"
        "The ImageNet dataset for the training demo is at\n"
        "    http://www.image-net.org/download-images\n");
  gflags::ParseCommandLineFlags(&argc, &argv, true);

  if (argc < 4) {
    gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/convert_imageset");
    return 1;
  }

  const bool is_color = !FLAGS_gray;
  const bool check_size = FLAGS_check_size;
  const bool encoded = FLAGS_encoded;
  const string encode_type = FLAGS_encode_type;

  std::ifstream infile(argv[2]);
  //std::vector<std::pair<std::string, int> > lines;
  std::vector<std::pair<std::string, std::vector<float> > > lines;  
  std::string line;
  //size_t pos;
  //int label;
  std::vector<float> labels;  

  while (std::getline(infile, line)) {
    // pos = line.find_last_of(' ');  
    // label = atoi(line.substr(pos + 1).c_str());  
    // lines.push_back(std::make_pair(line.substr(0, pos), label));  
    //###  
    std::vector<std::string> tokens;  
    boost::char_separator<char> sep(" ");  
    boost::tokenizer<boost::char_separator<char> > tok(line, sep);  
    tokens.clear();  
    std::copy(tok.begin(), tok.end(), std::back_inserter(tokens));    
  
    for (int i = 1; i < tokens.size(); ++i)  
    {  
      labels.push_back(atof(tokens.at(i).c_str()));  
    }  
      
    lines.push_back(std::make_pair(tokens.at(0), labels));  
    //###To clear the vector labels  
    labels.clear();  
  }
  if (FLAGS_shuffle) {
    // randomly shuffle data
    LOG(INFO) << "Shuffling data";
    shuffle(lines.begin(), lines.end());
  }
  LOG(INFO) << "A total of " << lines.size() << " images.";

  if (encode_type.size() && !encoded)
    LOG(INFO) << "encode_type specified, assuming encoded=true.";

  int resize_height = std::max<int>(0, FLAGS_resize_height);
  int resize_width = std::max<int>(0, FLAGS_resize_width);

  // Create new DB
  scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend));
  db->Open(argv[3], db::NEW);
  scoped_ptr<db::Transaction> txn(db->NewTransaction());

  // Storing to db
  std::string root_folder(argv[1]);
  Datum datum;
  int count = 0;
  int data_size = 0;
  bool data_size_initialized = false;

  for (int line_id = 0; line_id < lines.size(); ++line_id) {
    bool status;
    std::string enc = encode_type;
    if (encoded && !enc.size()) {
      // Guess the encoding type from the file name
      string fn = lines[line_id].first;
      size_t p = fn.rfind('.');
      if ( p == fn.npos )
        LOG(WARNING) << "Failed to guess the encoding of '" << fn << "'";
      enc = fn.substr(p);
      std::transform(enc.begin(), enc.end(), enc.begin(), ::tolower);
    }
    status = ReadImageToDatum(root_folder + lines[line_id].first,
        lines[line_id].second, resize_height, resize_width, is_color,
        enc, &datum);
    if (status == false) continue;
    if (check_size) {
      if (!data_size_initialized) {
        data_size = datum.channels() * datum.height() * datum.width();
        data_size_initialized = true;
      } else {
        const std::string& data = datum.data();
        CHECK_EQ(data.size(), data_size) << "Incorrect data field size "
            << data.size();
      }
    }
    // sequential
    string key_str = caffe::format_int(line_id, 8) + "_" + lines[line_id].first;

    // Put in db
    string out;
    CHECK(datum.SerializeToString(&out));
    txn->Put(key_str, out);

    if (++count % 1000 == 0) {
      // Commit db
      txn->Commit();
      txn.reset(db->NewTransaction());
      LOG(INFO) << "Processed " << count << " files.";
    }
  }
  // write the last batch
  if (count % 1000 != 0) {
    txn->Commit();
    LOG(INFO) << "Processed " << count << " files.";
  }
#else
  LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV.";
#endif  // USE_OPENCV
  return 0;
}

2. include/caffe/util/io.hpp

修改io.hpp文件,在这个文件中添加一个函数定义:

bool ReadImageToDatum(const string& filename, const vector<float> labels,  
    const int height, const int width, const bool is_color,  
    const std::string & encoding, Datum* datum);  

3. src/caffe/util/io.cpp

在io.cpp文件中写上之前io.hpp中添加的函数的实现:

bool ReadImageToDatum(const string& filename, const vector<float> labels,  
    const int height, const int width, const bool is_color,  
    const std::string & encoding, Datum* datum) {  
  cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color);  
  if (cv_img.data) {  
    // if (encoding.size()) {  
    //   if ( (cv_img.channels() == 3) == is_color && !height && !width &&  
    //       matchExt(filename, encoding) )  
    //     return ReadFileToDatum(filename, label, datum);  
    //   std::vector<uchar> buf;  
    //   cv::imencode("."+encoding, cv_img, buf);  
    //   datum->set_data(std::string(reinterpret_cast<char*>(&buf[0]),  
    //                   buf.size()));  
    //   datum->set_label(label);  
    //   datum->set_encoded(true);  
    //   return true;  
    // }  
                      
    CVMatToDatum(cv_img, datum);  
    //datum->set_label(label);  
  
    //###  
    for (int i = 0; i < labels.size(); ++i)  
    {  
      datum->add_float_data(labels.at(i));  
    }  
  
    return true;  
  } else {  
    return false;  
  }  
}  

4. src/caffe/proto/caffe.proto

修改caffe.proto文件,在message Datum中,将

optional uint32 label = 5;

改成

optional float label = 5;



optional float float_data = 6;

改成

repeated float float_data = 6;

修改完后的message Datum如下:

message Datum {
  optional int32 channels = 1;
  optional int32 height = 2;
  optional int32 width = 3;
  // the actual image data, in bytes
  optional bytes data = 4;
  optional float label = 5;
  // Optionally, the datum could also hold float data.
  repeated float float_data = 6;
  // If true data contains an encoded image that need to be decoded
  optional bool encoded = 7 [default = false];
}

在DataParameter中,在最下面添加一行

  optional uint32 label_num = 11 [default = 4];

修改完后的DataParameter如下:

message DataParameter {
  enum DB {
    LEVELDB = 0;
    LMDB = 1;
  }
  // Specify the data source.
  optional string source = 1;
  // Specify the batch size.
  optional uint32 batch_size = 4;
  // The rand_skip variable is for the data layer to skip a few data points
  // to avoid all asynchronous sgd clients to start at the same point. The skip
  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
  // be larger than the number of keys in the database.
  // DEPRECATED. Each solver accesses a different subset of the database.
  optional uint32 rand_skip = 7 [default = 0];
  optional DB backend = 8 [default = LEVELDB];
  // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
  // simple scaling and subtracting the data mean, if provided. Note that the
  // mean subtraction is always carried out before scaling.
  optional float scale = 2 [default = 1];
  optional string mean_file = 3;
  // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
  // crop an image.
  optional uint32 crop_size = 5 [default = 0];
  // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
  // data.
  optional bool mirror = 6 [default = false];
  // Force the encoded image to have 3 color channels
  optional bool force_encoded_color = 9 [default = false];
  // Prefetch queue (Increase if data feeding bandwidth varies, within the
  // limit of device memory for GPU training)
  optional uint32 prefetch = 10 [default = 4];
  optional uint32 label_num = 11 [default = 4];
}


5.src/caffe/layers/data_layer.cpp

首先是DataLayerSetup函数,

1.	// label
2.	  //###  
3.	  // if (this->output_labels_) {  
4.	  //   vector<int> label_shape(1, batch_size);  
5.	  //   top[1]->Reshape(label_shape);  
6.	  //   for (int i = 0; i < this->PREFETCH_COUNT; ++i) {  
7.	  //     this->prefetch_[i].label_.Reshape(label_shape);  
8.	  //   }  
9.	  // }  
10.	  
11.	  //###  
12.	  int labelNum = this->layer_param_.data_param().label_num();     //#####
13.	
14.	  if (this->output_labels_) {  
15.	  
16.	    vector<int> label_shape;  
17.	    label_shape.push_back(batch_size);  
18.	    label_shape.push_back(labelNum);  
19.	    label_shape.push_back(1);  
20.	    label_shape.push_back(1);  
21.	    top[1]->Reshape(label_shape);
22.	    for (int i = 0; i < this->prefetch_.size(); ++i) {
23.	      this->prefetch_[i]->label_.Reshape(label_shape);
24.	    }
25.	  }
26.	

第二处修改在load_batch函数

1.	// Copy label.
2.	    // ###
3.	    // if (this->output_labels_) {
4.	    //  Dtype* top_label = batch->label_.mutable_cpu_data();
5.	    //  top_label[item_id] = datum.label();
6.	    // }
7.	
8.	    // ###
9.	     
10.	    if (this->output_labels_) { 
11.	      Dtype* top_label = batch->label_.mutable_cpu_data(); 
12.	      for(int i=0;i<labelNum;i++){  
13.	        top_label[item_id*labelNum+i] = datum.float_data(i); //read float labels  
14.	      }  
15.	    }  

修改完后的data_layer.cpp文件如下:

#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#endif  // USE_OPENCV
#include <stdint.h>

#include <vector>

#include "caffe/data_transformer.hpp"
#include "caffe/layers/data_layer.hpp"
#include "caffe/util/benchmark.hpp"

namespace caffe {

template <typename Dtype>
DataLayer<Dtype>::DataLayer(const LayerParameter& param)
  : BasePrefetchingDataLayer<Dtype>(param),
    offset_() {
  db_.reset(db::GetDB(param.data_param().backend()));
  db_->Open(param.data_param().source(), db::READ);
  cursor_.reset(db_->NewCursor());
}

template <typename Dtype>
DataLayer<Dtype>::~DataLayer() {
  this->StopInternalThread();
}

template <typename Dtype>
void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const int batch_size = this->layer_param_.data_param().batch_size();
  
  // Read a data point, and use it to initialize the top blob.
  Datum datum;
  datum.ParseFromString(cursor_->value());
  // Use data_transformer to infer the expected blob shape from datum.
  vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
  this->transformed_data_.Reshape(top_shape);
  // Reshape top[0] and prefetch_data according to the batch_size.
  top_shape[0] = batch_size;
  top[0]->Reshape(top_shape);
  for (int i = 0; i < this->prefetch_.size(); ++i) {
    this->prefetch_[i]->data_.Reshape(top_shape);
  }
  LOG_IF(INFO, Caffe::root_solver())
      << "output data size: " << top[0]->num() << ","
      << top[0]->channels() << "," << top[0]->height() << ","
      << top[0]->width();
  // label
  //###  
  // if (this->output_labels_) {  
  //   vector<int> label_shape(1, batch_size);  
  //   top[1]->Reshape(label_shape);  
  //   for (int i = 0; i < this->PREFETCH_COUNT; ++i) {  
  //     this->prefetch_[i].label_.Reshape(label_shape);  
  //   }  
  // }  
  
  //###  
  int labelNum = this->layer_param_.data_param().label_num();     //#####
  if (this->output_labels_) {  
  
    vector<int> label_shape;  
    label_shape.push_back(batch_size);  
    label_shape.push_back(labelNum);  
    label_shape.push_back(1);  
    label_shape.push_back(1);  
    top[1]->Reshape(label_shape);
    for (int i = 0; i < this->prefetch_.size(); ++i) {
      this->prefetch_[i]->label_.Reshape(label_shape);
    }
  }
}

template <typename Dtype>
bool DataLayer<Dtype>::Skip() {
  int size = Caffe::solver_count();
  int rank = Caffe::solver_rank();
  bool keep = (offset_ % size) == rank ||
              // In test mode, only rank 0 runs, so avoid skipping
              this->layer_param_.phase() == TEST;
  return !keep;
}

template<typename Dtype>
void DataLayer<Dtype>::Next() {
  cursor_->Next();
  if (!cursor_->valid()) {
    LOG_IF(INFO, Caffe::root_solver())
        << "Restarting data prefetching from start.";
    cursor_->SeekToFirst();
  }
  offset_++;
}

// This function is called on prefetch thread
template<typename Dtype>
void DataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
  CPUTimer batch_timer;
  batch_timer.Start();
  double read_time = 0;
  double trans_time = 0;
  CPUTimer timer;
  CHECK(batch->data_.count());
  CHECK(this->transformed_data_.count());
  const int batch_size = this->layer_param_.data_param().batch_size();


  Datum datum;
  for (int item_id = 0; item_id < batch_size; ++item_id) {
    timer.Start();
    while (Skip()) {
      Next();
    }
    datum.ParseFromString(cursor_->value());
    read_time += timer.MicroSeconds();
    if (item_id == 0) {
      // Reshape according to the first datum of each batch
      // on single input batches allows for inputs of varying dimension.
      // Use data_transformer to infer the expected blob shape from datum.
      vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
      this->transformed_data_.Reshape(top_shape);
      // Reshape batch according to the batch_size.
      top_shape[0] = batch_size;
      batch->data_.Reshape(top_shape);
    }

    // Apply data transformations (mirror, scale, crop...)
    timer.Start();
    int offset = batch->data_.offset(item_id);
    Dtype* top_data = batch->data_.mutable_cpu_data();
    this->transformed_data_.set_cpu_data(top_data + offset);
    this->data_transformer_->Transform(datum, &(this->transformed_data_));
    // Copy label.
    // ###
    // if (this->output_labels_) {
    //  Dtype* top_label = batch->label_.mutable_cpu_data();
    //  top_label[item_id] = datum.label();
    // }

    // ###
    int labelNum = this->layer_param_.data_param().label_num();       //####
    if (this->output_labels_) { 
      Dtype* top_label = batch->label_.mutable_cpu_data(); 
      for(int i=0;i<labelNum;i++){  
        top_label[item_id*labelNum+i] = datum.float_data(i); //read float labels  
      }  
    }  
	
	
    trans_time += timer.MicroSeconds();
    Next();
  }
  timer.Stop();
  batch_timer.Stop();
  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
  DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";
  DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}

INSTANTIATE_CLASS(DataLayer);
REGISTER_LAYER_CLASS(Data);

}  // namespace caffe

至此,已经完成了所有源码的修改。重新编译caffe会在build/tools文件夹下生成一个convert_imageset_regression的可执行文件,这个文件就是我们用来生成LMDB的文件。之后我们就可以使用这个版本的caffe来完成多标签训练的任务,这里需要注意几点;

1.需要在网络结构文件中写明label的个数,就是在data层中添加一个参数label_num: 4。如果是4个参数就写4,   8个参数就写8.

2.做回归任务一般不使用accuracy层。

3.做回归任务的时候如果要对数据做crop和resize等操作的时候,也要对label数据进行相应的映射变换。


我们可以使用convert_imageset_regression文件生成用于训练和测试的lmdb文件:

convert_imageset_regression IMGROOT train.txt train_lmdb

生成好lmdb文件后,加上我们的网络,就可以使用caffe进行训练

./caffe train -–solverl= regression_solver.prototxt

最后附上我使用vgg16训练的回归网络的网络结构图

name: "VGG16"  
layer {  
  name: "data"  
  type: "Data"  
  top: "data"  
  top: "label"  
  include {  
    phase: TRAIN  
  }  
  transform_param {  
    mirror: true  
    crop_size: 224  
    mean_value: 103.939  
    mean_value: 116.779  
    mean_value: 123.68  
  }  
  data_param {  
    source: "data/ilsvrc12_shrt_256/ilsvrc12_train_leveldb"  
    batch_size: 64  
    label_num: 4
    backend: LEVELDB  
  }  
}  
layer {  
  name: "data"  
  type: "Data"  
  top: "data"  
  top: "label"  
  include {  
    phase: TEST  
  }  
  transform_param {  
    mirror: false  
    crop_size: 224  
    mean_value: 103.939  
    mean_value: 116.779  
    mean_value: 123.68  
  }  
  data_param {  
    source: "data/ilsvrc12_shrt_256/ilsvrc12_val_leveldb"  
    batch_size: 50  
    label_num: 4
    backend: LEVELDB  
  }  
}  
layer {  
  bottom: "data"  
  top: "conv1_1"  
  name: "conv1_1"  
  type: "Convolution"  
  param {  
    lr_mult: 1  
    decay_mult: 1  
  }  
  param {  
    lr_mult: 2  
    decay_mult: 0  
  }  
  convolution_param {  
    num_output: 64  
    pad: 1  
    kernel_size: 3  
    weight_filler {  
      type: "gaussian"  
      std: 0.01  
    }  
    bias_filler {  
      type: "constant"  
      value: 0  
    }  
  }  
}  
layer {  
  bottom: "conv1_1"  
  top: "conv1_1"  
  name: "relu1_1"  
  type: "ReLU"  
}  
layer {  
  bottom: "conv1_1"  
  top: "conv1_2"  
  name: "conv1_2"  
  type: "Convolution"  
  param {  
    lr_mult: 1  
    decay_mult: 1  
  }  
  param {  
    lr_mult: 2  
    decay_mult: 0  
  }  
  convolution_param {  
    num_output: 64  
    pad: 1  
    kernel_size: 3  
    weight_filler {  
      type: "gaussian"  
      std: 0.01  
    }  
    bias_filler {  
      type: "constant"  
      value: 0  
    }  
  }  
}  
layer {  
  bottom: "conv1_2"  
  top: "conv1_2"  
  name: "relu1_2"  
  type: "ReLU"  
}  
layer {  
  bottom: "conv1_2"  
  top: "pool1"  
  name: "pool1"  
  type: "Pooling"  
  pooling_param {  
    pool: MAX  
    kernel_size: 2  
    stride: 2  
  }  
}  
layer {  
  bottom: "pool1"  
  top: "conv2_1"  
  name: "conv2_1"  
  type: "Convolution"  
  param {  
    lr_mult: 1  
    decay_mult: 1  
  }  
  param {  
    lr_mult: 2  
    decay_mult: 0  
  }  
  convolution_param {  
    num_output: 128  
    pad: 1  
    kernel_size: 3  
    weight_filler {  
      type: "gaussian"  
      std: 0.01  
    }  
    bias_filler {  
      type: "constant"  
      value: 0  
    }  
  }  
}  
layer {  
  bottom: "conv2_1"  
  top: "conv2_1"  
  name: "relu2_1"  
  type: "ReLU"  
}  
layer {  
  bottom: "conv2_1"  
  top: "conv2_2"  
  name: "conv2_2"  
  type: "Convolution"  
  param {  
    lr_mult: 1  
    decay_mult: 1  
  }  
  param {  
    lr_mult: 2  
    decay_mult: 0  
  }  
  convolution_param {  
    num_output: 128  
    pad: 1  
    kernel_size: 3  
    weight_filler {  
      type: "gaussian"  
      std: 0.01  
    }  
    bias_filler {  
      type: "constant"  
      value: 0  
    }  
  }  
}  
layer {  
  bottom: "conv2_2"  
  top: "conv2_2"  
  name: "relu2_2"  
  type: "ReLU"  
}  
layer {  
  bottom: "conv2_2"  
  top: "pool2"  
  name: "pool2"  
  type: "Pooling"  
  pooling_param {  
    pool: MAX  
    kernel_size: 2  
    stride: 2  
  }  
}  
layer {  
  bottom: "pool2"  
  top: "conv3_1"  
  name: "conv3_1"  
  type: "Convolution"  
  param {  
    lr_mult: 1  
    decay_mult: 1  
  }  
  param {  
    lr_mult: 2  
    decay_mult: 0  
  }  
  convolution_param {  
    num_output: 256  
    pad: 1  
    kernel_size: 3  
    weight_filler {  
      type: "gaussian"  
      std: 0.01  
    }  
    bias_filler {  
      type: "constant"  
      value: 0  
    }  
  }  
}  
layer {  
  bottom: "conv3_1"  
  top: "conv3_1"  
  name: "relu3_1"  
  type: "ReLU"  
}  
layer {  
  bottom: "conv3_1"  
  top: "conv3_2"  
  name: "conv3_2"  
  type: "Convolution"  
  param {  
    lr_mult: 1  
    decay_mult: 1  
  }  
  param {  
    lr_mult: 2  
    decay_mult: 0  
  }  
  convolution_param {  
    num_output: 256  
    pad: 1  
    kernel_size: 3  
    weight_filler {  
      type: "gaussian"  
      std: 0.01  
    }  
    bias_filler {  
      type: "constant"  
      value: 0  
    }  
  }  
}  
layer {  
  bottom: "conv3_2"  
  top: "conv3_2"  
  name: "relu3_2"  
  type: "ReLU"  
}  
layer {  
  bottom: "conv3_2"  
  top: "conv3_3"  
  name: "conv3_3"  
  type: "Convolution"  
  param {  
    lr_mult: 1  
    decay_mult: 1  
  }  
  param {  
    lr_mult: 2  
    decay_mult: 0  
  }  
  convolution_param {  
    num_output: 256  
    pad: 1  
    kernel_size: 3  
    weight_filler {  
      type: "gaussian"  
      std: 0.01  
    }  
    bias_filler {  
      type: "constant"  
      value: 0  
    }  
  }  
}  
layer {  
  bottom: "conv3_3"  
  top: "conv3_3"  
  name: "relu3_3"  
  type: "ReLU"  
}  
layer {  
  bottom: "conv3_3"  
  top: "pool3"  
  name: "pool3"  
  type: "Pooling"  
  pooling_param {  
    pool: MAX  
    kernel_size: 2  
    stride: 2  
  }  
}  
layer {  
  bottom: "pool3"  
  top: "conv4_1"  
  name: "conv4_1"  
  type: "Convolution"  
  param {  
    lr_mult: 1  
    decay_mult: 1  
  }  
  param {  
    lr_mult: 2  
    decay_mult: 0  
  }  
  convolution_param {  
    num_output: 512  
    pad: 1  
    kernel_size: 3  
    weight_filler {  
      type: "gaussian"  
      std: 0.01  
    }  
    bias_filler {  
      type: "constant"  
      value: 0  
    }  
  }  
}  
layer {  
  bottom: "conv4_1"  
  top: "conv4_1"  
  name: "relu4_1"  
  type: "ReLU"  
}  
layer {  
  bottom: "conv4_1"  
  top: "conv4_2"  
  name: "conv4_2"  
  type: "Convolution"  
  param {  
    lr_mult: 1  
    decay_mult: 1  
  }  
  param {  
    lr_mult: 2  
    decay_mult: 0  
  }  
  convolution_param {  
    num_output: 512  
    pad: 1  
    kernel_size: 3  
    weight_filler {  
      type: "gaussian"  
      std: 0.01  
    }  
    bias_filler {  
      type: "constant"  
      value: 0  
    }  
  }  
}  
layer {  
  bottom: "conv4_2"  
  top: "conv4_2"  
  name: "relu4_2"  
  type: "ReLU"  
}  
layer {  
  bottom: "conv4_2"  
  top: "conv4_3"  
  name: "conv4_3"  
  type: "Convolution"  
  param {  
    lr_mult: 1  
    decay_mult: 1  
  }  
  param {  
    lr_mult: 2  
    decay_mult: 0  
  }  
  convolution_param {  
    num_output: 512  
    pad: 1  
    kernel_size: 3  
    weight_filler {  
      type: "gaussian"  
      std: 0.01  
    }  
    bias_filler {  
      type: "constant"  
      value: 0  
    }  
  }  
}  
layer {  
  bottom: "conv4_3"  
  top: "conv4_3"  
  name: "relu4_3"  
  type: "ReLU"  
}  
layer {  
  bottom: "conv4_3"  
  top: "pool4"  
  name: "pool4"  
  type: "Pooling"  
  pooling_param {  
    pool: MAX  
    kernel_size: 2  
    stride: 2  
  }  
}  
layer {  
  bottom: "pool4"  
  top: "conv5_1"  
  name: "conv5_1"  
  type: "Convolution"  
  param {  
    lr_mult: 1  
    decay_mult: 1  
  }  
  param {  
    lr_mult: 2  
    decay_mult: 0  
  }  
  convolution_param {  
    num_output: 512  
    pad: 1  
    kernel_size: 3  
    weight_filler {  
      type: "gaussian"  
      std: 0.01  
    }  
    bias_filler {  
      type: "constant"  
      value: 0  
    }  
  }  
}  
layer {  
  bottom: "conv5_1"  
  top: "conv5_1"  
  name: "relu5_1"  
  type: "ReLU"  
}  
layer {  
  bottom: "conv5_1"  
  top: "conv5_2"  
  name: "conv5_2"  
  type: "Convolution"  
  param {  
    lr_mult: 1  
    decay_mult: 1  
  }  
  param {  
    lr_mult: 2  
    decay_mult: 0  
  }  
  convolution_param {  
    num_output: 512  
    pad: 1  
    kernel_size: 3  
    weight_filler {  
      type: "gaussian"  
      std: 0.01  
    }  
    bias_filler {  
      type: "constant"  
      value: 0  
    }  
  }  
}  
layer {  
  bottom: "conv5_2"  
  top: "conv5_2"  
  name: "relu5_2"  
  type: "ReLU"  
}  
layer {  
  bottom: "conv5_2"  
  top: "conv5_3"  
  name: "conv5_3"  
  type: "Convolution"  
  param {  
    lr_mult: 1  
    decay_mult: 1  
  }  
  param {  
    lr_mult: 2  
    decay_mult: 0  
  }  
  convolution_param {  
    num_output: 512  
    pad: 1  
    kernel_size: 3  
    weight_filler {  
      type: "gaussian"  
      std: 0.01  
    }  
    bias_filler {  
      type: "constant"  
      value: 0  
    }  
  }  
}  
layer {  
  bottom: "conv5_3"  
  top: "conv5_3"  
  name: "relu5_3"  
  type: "ReLU"  
}  
layer {  
  bottom: "conv5_3"  
  top: "pool5"  
  name: "pool5"  
  type: "Pooling"  
  pooling_param {  
    pool: MAX  
    kernel_size: 2  
    stride: 2  
  }  
}  
layer {  
  bottom: "pool5"  
  top: "fc6"  
  name: "fc6"  
  type: "InnerProduct"  
  param {  
    lr_mult: 1  
    decay_mult: 1  
  }  
  param {  
    lr_mult: 2  
    decay_mult: 0  
  }  
  inner_product_param {  
    num_output: 4096  
    weight_filler {  
      type: "gaussian"  
      std: 0.005  
    }  
    bias_filler {  
      type: "constant"  
      value: 0.1  
    }  
  }  
}  
layer {  
  bottom: "fc6"  
  top: "fc6"  
  name: "relu6"  
  type: "ReLU"  
}  
layer {  
  bottom: "fc6"  
  top: "fc6"  
  name: "drop6"  
  type: "Dropout"  
  dropout_param {  
    dropout_ratio: 0.5  
  }  
}  
layer {  
  bottom: "fc6"  
  top: "fc7"  
  name: "fc7"  
  type: "InnerProduct"  
  param {  
    lr_mult: 1  
    decay_mult: 1  
  }  
  param {  
    lr_mult: 2  
    decay_mult: 0  
  }  
  inner_product_param {  
    num_output: 4096  
    weight_filler {  
      type: "gaussian"  
      std: 0.005  
    }  
    bias_filler {  
      type: "constant"  
      value: 0.1  
    }  
  }  
}  
layer {  
  bottom: "fc7"  
  top: "fc7"  
  name: "relu7"  
  type: "ReLU"  
}  
layer {  
  bottom: "fc7"  
  top: "fc7"  
  name: "drop7"  
  type: "Dropout"  
  dropout_param {  
    dropout_ratio: 0.5  
  }  
}  
layer {  
  bottom: "fc7"  
  top: "fc8"  
  name: "fc8"  
  type: "InnerProduct"  
  param {  
    lr_mult: 1  
    decay_mult: 1  
  }  
  param {  
    lr_mult: 2  
    decay_mult: 0  
  }  
  inner_product_param {  
    num_output: 4  
    weight_filler {  
      type: "gaussian"  
      std: 0.005  
    }  
    bias_filler {  
      type: "constant"  
      value: 0.1  
    }  
  }  
}    
layer {
  name: "EuclideanLoss"
  type: "EuclideanLoss"
  bottom: "fc8"
  bottom: "label" 
  top: "loss"
} 

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

训练好网络之后,我们需要测试网络的表现。这里附上我写的调用接口

CRegression.h

#include <caffe/caffe.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <algorithm>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
#include <vector>

using namespace caffe;
using namespace cv;
using std::string;

class Regression
{
public:
	Regression();
	~Regression();

	int Initial(const string &model_file, const string &trained_file);
	Rect FillBuf(Mat Img);
	int FillBuf(Mat Img, int tag);


private:
	shared_ptr<Net<float> > net_;
};

CRegression.cpp

#include "CRegression.h"

Regression::Regression()
{

}

Regression::~Regression()
{

}

int Regression::Initial(const string &model_file, const string &trained_file)
{
	Caffe::set_mode(Caffe::GPU);
	/* Load the network. */
	net_.reset(new Net<float>(model_file, TEST));
	net_->CopyTrainedLayersFrom(trained_file);
	
	return 0;
}
Rect Regression::FillBuf(Mat Img)
{
	Mat sample = Img.clone();
	sample.convertTo(sample, CV_32FC3);

	Blob<float>* inputBlob = net_->input_blobs()[0];
	int width = inputBlob->width();
	int height = inputBlob->height();
	Size inputSize = Size(width, height);
	CHECK(sample.size() == inputSize)
		<< "sample size is not equal to inputSize";

	Mat mean(sample.size(), CV_32FC3, Scalar(103.939, 116.779, 123.68));
	subtract(sample, mean, sample);


	float* data = inputBlob->mutable_cpu_data();   //将图片的像素值,复制进网络的输入Blob  
	for (int k = 0; k < 3; ++k){
		for (int i = 0; i < height; ++i){
			for (int j = 0; j < width; ++j){
				int index = (k*height + i)*width + j;  //获取偏移量  
				data[index] = sample.at<Vec3f>(i, j)[k];
			}
		}
	}
	vector<Blob<float>* > inputs(1, inputBlob);
	const vector<Blob<float>* >& outputBlobs = net_->Forward(inputs);   //进行前向传播,并返回最后一层的blob  
	Blob<float>* outputBlob = outputBlobs[0];      //输出blob  
	const float* value = outputBlob->cpu_data();

	std::cout << value[0] << "  " << value[1] << "  " << value[2] << " " << value[3] << std::endl;

	return Rect();
}







评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值