最近需要使用caffe训练一个回归网络,这个网络的输入label是多维的float型数据,在搜索了大量资料后,整理了一份比较简洁的操作流程,这里记录下来。
本篇博客主要参照了这篇博客,并进行了一些小幅度的修改和完善:
http://blog.csdn.net/qq295456059/article/details/53142574
首先,我们用于做回归的训练文件txt的格式为:
cat_1.jpg 0.03 0.45 0.55 0.66
cat_2.jpg 0.44 0.31 0.05 0.34
dog_1.jpg 0.67 0.25 0.79 0.56
dog_2.jpg 0.89 0.46 0.91 0.38
那么我们一共需要对以下文件进行修改:
1.tools/convert_imageset.cpp
这里建议复制convert_imageset.cpp文件,并将其命名为convert_imageset_regression.cpp。依然将其放在tools文件夹下面。
首先我们添加一些头文件
#include <boost/tokenizer.hpp> //### To use tokenizer
#include <iostream> //###
using namespace std; //###
我们将int类型的label改成float,如下:
//std::vector<std::pair<std::string, int> > lines; //###
std::vector<std::pair<std::string, std::vector<float> > > lines;
std::string line;
//size_t pos;
//int label; //###
std::vector<float> labels;
在while函数中,将float型的label放入lines中
while (std::getline(infile, line)) {
// pos = line.find_last_of(' ');
// label = atoi(line.substr(pos + 1).c_str());
// lines.push_back(std::make_pair(line.substr(0, pos), label));
//###
std::vector<std::string> tokens;
boost::char_separator<char> sep(" ");
boost::tokenizer<boost::char_separator<char> > tok(line, sep);
tokens.clear();
std::copy(tok.begin(), tok.end(), std::back_inserter(tokens));
for (int i = 1; i < tokens.size(); ++i)
{
labels.push_back(atof(tokens.at(i).c_str()));
}
lines.push_back(std::make_pair(tokens.at(0), labels));
//###To clear the vector labels
labels.clear();
}
修改完之后的整个文件如下:
// This program converts a set of images to a lmdb/leveldb by storing them
// as Datum proto buffers.
// Usage:
// convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME
//
// where ROOTFOLDER is the root folder that holds all the images, and LISTFILE
// should be a list of files as well as their labels, in the format as
// subfolder1/file1.JPEG 7
// ....
#include <algorithm>
#include <fstream> // NOLINT(readability/streams)
#include <string>
#include <utility>
#include <vector>
#include "boost/scoped_ptr.hpp"
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/db.hpp"
#include "caffe/util/format.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/rng.hpp"
#include <boost/tokenizer.hpp> //### To use tokenizer
#include <iostream>
using namespace caffe; // NOLINT(build/namespaces)
using std::pair;
using boost::scoped_ptr;
DEFINE_bool(gray, false,
"When this option is on, treat images as grayscale ones");
DEFINE_bool(shuffle, false,
"Randomly shuffle the order of images and their labels");
DEFINE_string(backend, "lmdb",
"The backend {lmdb, leveldb} for storing the result");
DEFINE_int32(resize_width, 0, "Width images are resized to");
DEFINE_int32(resize_height, 0, "Height images are resized to");
DEFINE_bool(check_size, false,
"When this option is on, check that all the datum have the same size");
DEFINE_bool(encoded, false,
"When this option is on, the encoded image will be save in datum");
DEFINE_string(encode_type, "",
"Optional: What type should we encode the image as ('png','jpg',...).");
int main(int argc, char** argv) {
#ifdef USE_OPENCV
::google::InitGoogleLogging(argv[0]);
// Print output to stderr (while still logging)
FLAGS_alsologtostderr = 1;
#ifndef GFLAGS_GFLAGS_H_
namespace gflags = google;
#endif
gflags::SetUsageMessage("Convert a set of images to the leveldb/lmdb\n"
"format used as input for Caffe.\n"
"Usage:\n"
" convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME\n"
"The ImageNet dataset for the training demo is at\n"
" http://www.image-net.org/download-images\n");
gflags::ParseCommandLineFlags(&argc, &argv, true);
if (argc < 4) {
gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/convert_imageset");
return 1;
}
const bool is_color = !FLAGS_gray;
const bool check_size = FLAGS_check_size;
const bool encoded = FLAGS_encoded;
const string encode_type = FLAGS_encode_type;
std::ifstream infile(argv[2]);
//std::vector<std::pair<std::string, int> > lines;
std::vector<std::pair<std::string, std::vector<float> > > lines;
std::string line;
//size_t pos;
//int label;
std::vector<float> labels;
while (std::getline(infile, line)) {
// pos = line.find_last_of(' ');
// label = atoi(line.substr(pos + 1).c_str());
// lines.push_back(std::make_pair(line.substr(0, pos), label));
//###
std::vector<std::string> tokens;
boost::char_separator<char> sep(" ");
boost::tokenizer<boost::char_separator<char> > tok(line, sep);
tokens.clear();
std::copy(tok.begin(), tok.end(), std::back_inserter(tokens));
for (int i = 1; i < tokens.size(); ++i)
{
labels.push_back(atof(tokens.at(i).c_str()));
}
lines.push_back(std::make_pair(tokens.at(0), labels));
//###To clear the vector labels
labels.clear();
}
if (FLAGS_shuffle) {
// randomly shuffle data
LOG(INFO) << "Shuffling data";
shuffle(lines.begin(), lines.end());
}
LOG(INFO) << "A total of " << lines.size() << " images.";
if (encode_type.size() && !encoded)
LOG(INFO) << "encode_type specified, assuming encoded=true.";
int resize_height = std::max<int>(0, FLAGS_resize_height);
int resize_width = std::max<int>(0, FLAGS_resize_width);
// Create new DB
scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend));
db->Open(argv[3], db::NEW);
scoped_ptr<db::Transaction> txn(db->NewTransaction());
// Storing to db
std::string root_folder(argv[1]);
Datum datum;
int count = 0;
int data_size = 0;
bool data_size_initialized = false;
for (int line_id = 0; line_id < lines.size(); ++line_id) {
bool status;
std::string enc = encode_type;
if (encoded && !enc.size()) {
// Guess the encoding type from the file name
string fn = lines[line_id].first;
size_t p = fn.rfind('.');
if ( p == fn.npos )
LOG(WARNING) << "Failed to guess the encoding of '" << fn << "'";
enc = fn.substr(p);
std::transform(enc.begin(), enc.end(), enc.begin(), ::tolower);
}
status = ReadImageToDatum(root_folder + lines[line_id].first,
lines[line_id].second, resize_height, resize_width, is_color,
enc, &datum);
if (status == false) continue;
if (check_size) {
if (!data_size_initialized) {
data_size = datum.channels() * datum.height() * datum.width();
data_size_initialized = true;
} else {
const std::string& data = datum.data();
CHECK_EQ(data.size(), data_size) << "Incorrect data field size "
<< data.size();
}
}
// sequential
string key_str = caffe::format_int(line_id, 8) + "_" + lines[line_id].first;
// Put in db
string out;
CHECK(datum.SerializeToString(&out));
txn->Put(key_str, out);
if (++count % 1000 == 0) {
// Commit db
txn->Commit();
txn.reset(db->NewTransaction());
LOG(INFO) << "Processed " << count << " files.";
}
}
// write the last batch
if (count % 1000 != 0) {
txn->Commit();
LOG(INFO) << "Processed " << count << " files.";
}
#else
LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV.";
#endif // USE_OPENCV
return 0;
}
2. include/caffe/util/io.hpp
修改io.hpp文件,在这个文件中添加一个函数定义:
bool ReadImageToDatum(const string& filename, const vector<float> labels,
const int height, const int width, const bool is_color,
const std::string & encoding, Datum* datum);
3. src/caffe/util/io.cpp
在io.cpp文件中写上之前io.hpp中添加的函数的实现:
bool ReadImageToDatum(const string& filename, const vector<float> labels,
const int height, const int width, const bool is_color,
const std::string & encoding, Datum* datum) {
cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color);
if (cv_img.data) {
// if (encoding.size()) {
// if ( (cv_img.channels() == 3) == is_color && !height && !width &&
// matchExt(filename, encoding) )
// return ReadFileToDatum(filename, label, datum);
// std::vector<uchar> buf;
// cv::imencode("."+encoding, cv_img, buf);
// datum->set_data(std::string(reinterpret_cast<char*>(&buf[0]),
// buf.size()));
// datum->set_label(label);
// datum->set_encoded(true);
// return true;
// }
CVMatToDatum(cv_img, datum);
//datum->set_label(label);
//###
for (int i = 0; i < labels.size(); ++i)
{
datum->add_float_data(labels.at(i));
}
return true;
} else {
return false;
}
}
4. src/caffe/proto/caffe.proto
修改caffe.proto文件,在message Datum中,将
optional uint32 label = 5;
改成
optional float label = 5;
将
optional float float_data = 6;
改成
repeated float float_data = 6;
修改完后的message Datum如下:
message Datum {
optional int32 channels = 1;
optional int32 height = 2;
optional int32 width = 3;
// the actual image data, in bytes
optional bytes data = 4;
optional float label = 5;
// Optionally, the datum could also hold float data.
repeated float float_data = 6;
// If true data contains an encoded image that need to be decoded
optional bool encoded = 7 [default = false];
}
在DataParameter中,在最下面添加一行
optional uint32 label_num = 11 [default = 4];
修改完后的DataParameter如下:
message DataParameter {
enum DB {
LEVELDB = 0;
LMDB = 1;
}
// Specify the data source.
optional string source = 1;
// Specify the batch size.
optional uint32 batch_size = 4;
// The rand_skip variable is for the data layer to skip a few data points
// to avoid all asynchronous sgd clients to start at the same point. The skip
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not
// be larger than the number of keys in the database.
// DEPRECATED. Each solver accesses a different subset of the database.
optional uint32 rand_skip = 7 [default = 0];
optional DB backend = 8 [default = LEVELDB];
// DEPRECATED. See TransformationParameter. For data pre-processing, we can do
// simple scaling and subtracting the data mean, if provided. Note that the
// mean subtraction is always carried out before scaling.
optional float scale = 2 [default = 1];
optional string mean_file = 3;
// DEPRECATED. See TransformationParameter. Specify if we would like to randomly
// crop an image.
optional uint32 crop_size = 5 [default = 0];
// DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
// data.
optional bool mirror = 6 [default = false];
// Force the encoded image to have 3 color channels
optional bool force_encoded_color = 9 [default = false];
// Prefetch queue (Increase if data feeding bandwidth varies, within the
// limit of device memory for GPU training)
optional uint32 prefetch = 10 [default = 4];
optional uint32 label_num = 11 [default = 4];
}
5.src/caffe/layers/data_layer.cpp
首先是DataLayerSetup函数,
1. // label
2. //###
3. // if (this->output_labels_) {
4. // vector<int> label_shape(1, batch_size);
5. // top[1]->Reshape(label_shape);
6. // for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
7. // this->prefetch_[i].label_.Reshape(label_shape);
8. // }
9. // }
10.
11. //###
12. int labelNum = this->layer_param_.data_param().label_num(); //#####
13.
14. if (this->output_labels_) {
15.
16. vector<int> label_shape;
17. label_shape.push_back(batch_size);
18. label_shape.push_back(labelNum);
19. label_shape.push_back(1);
20. label_shape.push_back(1);
21. top[1]->Reshape(label_shape);
22. for (int i = 0; i < this->prefetch_.size(); ++i) {
23. this->prefetch_[i]->label_.Reshape(label_shape);
24. }
25. }
26.
第二处修改在load_batch函数
1. // Copy label.
2. // ###
3. // if (this->output_labels_) {
4. // Dtype* top_label = batch->label_.mutable_cpu_data();
5. // top_label[item_id] = datum.label();
6. // }
7.
8. // ###
9.
10. if (this->output_labels_) {
11. Dtype* top_label = batch->label_.mutable_cpu_data();
12. for(int i=0;i<labelNum;i++){
13. top_label[item_id*labelNum+i] = datum.float_data(i); //read float labels
14. }
15. }
修改完后的data_layer.cpp文件如下:
#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#endif // USE_OPENCV
#include <stdint.h>
#include <vector>
#include "caffe/data_transformer.hpp"
#include "caffe/layers/data_layer.hpp"
#include "caffe/util/benchmark.hpp"
namespace caffe {
template <typename Dtype>
DataLayer<Dtype>::DataLayer(const LayerParameter& param)
: BasePrefetchingDataLayer<Dtype>(param),
offset_() {
db_.reset(db::GetDB(param.data_param().backend()));
db_->Open(param.data_param().source(), db::READ);
cursor_.reset(db_->NewCursor());
}
template <typename Dtype>
DataLayer<Dtype>::~DataLayer() {
this->StopInternalThread();
}
template <typename Dtype>
void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int batch_size = this->layer_param_.data_param().batch_size();
// Read a data point, and use it to initialize the top blob.
Datum datum;
datum.ParseFromString(cursor_->value());
// Use data_transformer to infer the expected blob shape from datum.
vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
this->transformed_data_.Reshape(top_shape);
// Reshape top[0] and prefetch_data according to the batch_size.
top_shape[0] = batch_size;
top[0]->Reshape(top_shape);
for (int i = 0; i < this->prefetch_.size(); ++i) {
this->prefetch_[i]->data_.Reshape(top_shape);
}
LOG_IF(INFO, Caffe::root_solver())
<< "output data size: " << top[0]->num() << ","
<< top[0]->channels() << "," << top[0]->height() << ","
<< top[0]->width();
// label
//###
// if (this->output_labels_) {
// vector<int> label_shape(1, batch_size);
// top[1]->Reshape(label_shape);
// for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
// this->prefetch_[i].label_.Reshape(label_shape);
// }
// }
//###
int labelNum = this->layer_param_.data_param().label_num(); //#####
if (this->output_labels_) {
vector<int> label_shape;
label_shape.push_back(batch_size);
label_shape.push_back(labelNum);
label_shape.push_back(1);
label_shape.push_back(1);
top[1]->Reshape(label_shape);
for (int i = 0; i < this->prefetch_.size(); ++i) {
this->prefetch_[i]->label_.Reshape(label_shape);
}
}
}
template <typename Dtype>
bool DataLayer<Dtype>::Skip() {
int size = Caffe::solver_count();
int rank = Caffe::solver_rank();
bool keep = (offset_ % size) == rank ||
// In test mode, only rank 0 runs, so avoid skipping
this->layer_param_.phase() == TEST;
return !keep;
}
template<typename Dtype>
void DataLayer<Dtype>::Next() {
cursor_->Next();
if (!cursor_->valid()) {
LOG_IF(INFO, Caffe::root_solver())
<< "Restarting data prefetching from start.";
cursor_->SeekToFirst();
}
offset_++;
}
// This function is called on prefetch thread
template<typename Dtype>
void DataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
CPUTimer batch_timer;
batch_timer.Start();
double read_time = 0;
double trans_time = 0;
CPUTimer timer;
CHECK(batch->data_.count());
CHECK(this->transformed_data_.count());
const int batch_size = this->layer_param_.data_param().batch_size();
Datum datum;
for (int item_id = 0; item_id < batch_size; ++item_id) {
timer.Start();
while (Skip()) {
Next();
}
datum.ParseFromString(cursor_->value());
read_time += timer.MicroSeconds();
if (item_id == 0) {
// Reshape according to the first datum of each batch
// on single input batches allows for inputs of varying dimension.
// Use data_transformer to infer the expected blob shape from datum.
vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
this->transformed_data_.Reshape(top_shape);
// Reshape batch according to the batch_size.
top_shape[0] = batch_size;
batch->data_.Reshape(top_shape);
}
// Apply data transformations (mirror, scale, crop...)
timer.Start();
int offset = batch->data_.offset(item_id);
Dtype* top_data = batch->data_.mutable_cpu_data();
this->transformed_data_.set_cpu_data(top_data + offset);
this->data_transformer_->Transform(datum, &(this->transformed_data_));
// Copy label.
// ###
// if (this->output_labels_) {
// Dtype* top_label = batch->label_.mutable_cpu_data();
// top_label[item_id] = datum.label();
// }
// ###
int labelNum = this->layer_param_.data_param().label_num(); //####
if (this->output_labels_) {
Dtype* top_label = batch->label_.mutable_cpu_data();
for(int i=0;i<labelNum;i++){
top_label[item_id*labelNum+i] = datum.float_data(i); //read float labels
}
}
trans_time += timer.MicroSeconds();
Next();
}
timer.Stop();
batch_timer.Stop();
DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
DLOG(INFO) << " Read time: " << read_time / 1000 << " ms.";
DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}
INSTANTIATE_CLASS(DataLayer);
REGISTER_LAYER_CLASS(Data);
} // namespace caffe
至此,已经完成了所有源码的修改。重新编译caffe会在build/tools文件夹下生成一个convert_imageset_regression的可执行文件,这个文件就是我们用来生成LMDB的文件。之后我们就可以使用这个版本的caffe来完成多标签训练的任务,这里需要注意几点;
1.需要在网络结构文件中写明label的个数,就是在data层中添加一个参数label_num: 4。如果是4个参数就写4, 8个参数就写8.
2.做回归任务一般不使用accuracy层。
3.做回归任务的时候如果要对数据做crop和resize等操作的时候,也要对label数据进行相应的映射变换。
我们可以使用convert_imageset_regression文件生成用于训练和测试的lmdb文件:
convert_imageset_regression IMGROOT train.txt train_lmdb
生成好lmdb文件后,加上我们的网络,就可以使用caffe进行训练
./caffe train -–solverl= regression_solver.prototxt
最后附上我使用vgg16训练的回归网络的网络结构图
name: "VGG16"
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mirror: true
crop_size: 224
mean_value: 103.939
mean_value: 116.779
mean_value: 123.68
}
data_param {
source: "data/ilsvrc12_shrt_256/ilsvrc12_train_leveldb"
batch_size: 64
label_num: 4
backend: LEVELDB
}
}
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
mirror: false
crop_size: 224
mean_value: 103.939
mean_value: 116.779
mean_value: 123.68
}
data_param {
source: "data/ilsvrc12_shrt_256/ilsvrc12_val_leveldb"
batch_size: 50
label_num: 4
backend: LEVELDB
}
}
layer {
bottom: "data"
top: "conv1_1"
name: "conv1_1"
type: "Convolution"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
bottom: "conv1_1"
top: "conv1_1"
name: "relu1_1"
type: "ReLU"
}
layer {
bottom: "conv1_1"
top: "conv1_2"
name: "conv1_2"
type: "Convolution"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
bottom: "conv1_2"
top: "conv1_2"
name: "relu1_2"
type: "ReLU"
}
layer {
bottom: "conv1_2"
top: "pool1"
name: "pool1"
type: "Pooling"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
bottom: "pool1"
top: "conv2_1"
name: "conv2_1"
type: "Convolution"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
bottom: "conv2_1"
top: "conv2_1"
name: "relu2_1"
type: "ReLU"
}
layer {
bottom: "conv2_1"
top: "conv2_2"
name: "conv2_2"
type: "Convolution"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
bottom: "conv2_2"
top: "conv2_2"
name: "relu2_2"
type: "ReLU"
}
layer {
bottom: "conv2_2"
top: "pool2"
name: "pool2"
type: "Pooling"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
bottom: "pool2"
top: "conv3_1"
name: "conv3_1"
type: "Convolution"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
bottom: "conv3_1"
top: "conv3_1"
name: "relu3_1"
type: "ReLU"
}
layer {
bottom: "conv3_1"
top: "conv3_2"
name: "conv3_2"
type: "Convolution"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
bottom: "conv3_2"
top: "conv3_2"
name: "relu3_2"
type: "ReLU"
}
layer {
bottom: "conv3_2"
top: "conv3_3"
name: "conv3_3"
type: "Convolution"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
bottom: "conv3_3"
top: "conv3_3"
name: "relu3_3"
type: "ReLU"
}
layer {
bottom: "conv3_3"
top: "pool3"
name: "pool3"
type: "Pooling"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
bottom: "pool3"
top: "conv4_1"
name: "conv4_1"
type: "Convolution"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
bottom: "conv4_1"
top: "conv4_1"
name: "relu4_1"
type: "ReLU"
}
layer {
bottom: "conv4_1"
top: "conv4_2"
name: "conv4_2"
type: "Convolution"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
bottom: "conv4_2"
top: "conv4_2"
name: "relu4_2"
type: "ReLU"
}
layer {
bottom: "conv4_2"
top: "conv4_3"
name: "conv4_3"
type: "Convolution"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
bottom: "conv4_3"
top: "conv4_3"
name: "relu4_3"
type: "ReLU"
}
layer {
bottom: "conv4_3"
top: "pool4"
name: "pool4"
type: "Pooling"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
bottom: "pool4"
top: "conv5_1"
name: "conv5_1"
type: "Convolution"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
bottom: "conv5_1"
top: "conv5_1"
name: "relu5_1"
type: "ReLU"
}
layer {
bottom: "conv5_1"
top: "conv5_2"
name: "conv5_2"
type: "Convolution"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
bottom: "conv5_2"
top: "conv5_2"
name: "relu5_2"
type: "ReLU"
}
layer {
bottom: "conv5_2"
top: "conv5_3"
name: "conv5_3"
type: "Convolution"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
bottom: "conv5_3"
top: "conv5_3"
name: "relu5_3"
type: "ReLU"
}
layer {
bottom: "conv5_3"
top: "pool5"
name: "pool5"
type: "Pooling"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
bottom: "pool5"
top: "fc6"
name: "fc6"
type: "InnerProduct"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
bottom: "fc6"
top: "fc6"
name: "relu6"
type: "ReLU"
}
layer {
bottom: "fc6"
top: "fc6"
name: "drop6"
type: "Dropout"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
bottom: "fc6"
top: "fc7"
name: "fc7"
type: "InnerProduct"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
bottom: "fc7"
top: "fc7"
name: "relu7"
type: "ReLU"
}
layer {
bottom: "fc7"
top: "fc7"
name: "drop7"
type: "Dropout"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
bottom: "fc7"
top: "fc8"
name: "fc8"
type: "InnerProduct"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "EuclideanLoss"
type: "EuclideanLoss"
bottom: "fc8"
bottom: "label"
top: "loss"
}
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
训练好网络之后,我们需要测试网络的表现。这里附上我写的调用接口
CRegression.h
#include <caffe/caffe.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <algorithm>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
#include <vector>
using namespace caffe;
using namespace cv;
using std::string;
class Regression
{
public:
Regression();
~Regression();
int Initial(const string &model_file, const string &trained_file);
Rect FillBuf(Mat Img);
int FillBuf(Mat Img, int tag);
private:
shared_ptr<Net<float> > net_;
};
CRegression.cpp
#include "CRegression.h"
Regression::Regression()
{
}
Regression::~Regression()
{
}
int Regression::Initial(const string &model_file, const string &trained_file)
{
Caffe::set_mode(Caffe::GPU);
/* Load the network. */
net_.reset(new Net<float>(model_file, TEST));
net_->CopyTrainedLayersFrom(trained_file);
return 0;
}
Rect Regression::FillBuf(Mat Img)
{
Mat sample = Img.clone();
sample.convertTo(sample, CV_32FC3);
Blob<float>* inputBlob = net_->input_blobs()[0];
int width = inputBlob->width();
int height = inputBlob->height();
Size inputSize = Size(width, height);
CHECK(sample.size() == inputSize)
<< "sample size is not equal to inputSize";
Mat mean(sample.size(), CV_32FC3, Scalar(103.939, 116.779, 123.68));
subtract(sample, mean, sample);
float* data = inputBlob->mutable_cpu_data(); //将图片的像素值,复制进网络的输入Blob
for (int k = 0; k < 3; ++k){
for (int i = 0; i < height; ++i){
for (int j = 0; j < width; ++j){
int index = (k*height + i)*width + j; //获取偏移量
data[index] = sample.at<Vec3f>(i, j)[k];
}
}
}
vector<Blob<float>* > inputs(1, inputBlob);
const vector<Blob<float>* >& outputBlobs = net_->Forward(inputs); //进行前向传播,并返回最后一层的blob
Blob<float>* outputBlob = outputBlobs[0]; //输出blob
const float* value = outputBlob->cpu_data();
std::cout << value[0] << " " << value[1] << " " << value[2] << " " << value[3] << std::endl;
return Rect();
}