caffe导读（1） caffe源码修改：抽取任意一张图片的特征

最新推荐文章于 2019-01-21 23:08:38 发布

lien0906

最新推荐文章于 2019-01-21 23:08:38 发布

阅读量922

点赞数

分类专栏：深度学习

深度学习专栏收录该内容

84 篇文章 2 订阅

订阅专栏

caffe源码修改：抽取任意一张图片的特征

目前caffe不是很完善，输入的图片数据需要在prototxt指定路径。但是我们往往有这么一个需求：训练后得到一个模型文件，我们想拿这个模型文件来对一张图片抽取特征或者预测分类等。如果非得在prototxt指定路径，就很不方便。因此，这样的工具才是我们需要的：给一个可执行文件通过命令行来传递图片路径，然后caffe读入图片数据，进行一次正向传播。

因此我做了这么一个工具，用来抽取任意一张图片的特征。

这工具的使用方法如下：

extract_one_feature.bin ./model/caffe_reference_imagenet_model ./examples/_temp/imagenet_val.prototxt fc7 ./examples/_temp/features /media/G/imageset/clothing/针织衫/针织衫_426.jpg CPU

参数1：./model/caffe_reference_imagenet_model是训练后的模型文件

参数2：./examples/_temp/imagenet_val.prototxt 网络配置文件

参数3：fc7是blob的名字

参数4：./examples/_temp/features 将该图片的特征保存在该文件

参数5：图片路径

参数6：GPU或者CPU模式

（其实我还想到更好的工具，如果该可执行文件是监听模式的，就是通过一定的方式，给该进程传递图片路径，进程接到任务就执行。

这样子的话，就不需要每次抽一张图片都要申请内存空间。(*^__^*) 嘻嘻……）

下面给出初步修改方法，大家可以根据自己需求再修改。

extract_one_feature.cpp（该文件参考过源码中extract_features.cpp修改）

[cpp]view plaincopy 
   
 #include <stdio.h>  // for snprintf  
 #include <string>  
 #include <vector>  
 #include <iostream>  
 #include <fstream>  
   
 #include "boost/algorithm/string.hpp"  
 #include "google/protobuf/text_format.h"  
 #include "leveldb/db.h"  
 #include "leveldb/write_batch.h"  
   
 #include "caffe/blob.hpp"  
 #include "caffe/common.hpp"  
 #include "caffe/net.hpp"  
 #include "caffe/proto/caffe.pb.h"  
 #include "caffe/util/io.hpp"  
 #include "caffe/vision_layers.hpp"  
   
 using namespace caffe;  // NOLINT(build/namespaces)  
   
 template<typename Dtype>  
 int feature_extraction_pipeline(int argc, char** argv);  
   
 int main(int argc, char** argv) {  
   return feature_extraction_pipeline<float>(argc, argv);  
 //  return feature_extraction_pipeline<double>(argc, argv);  
 }  
   
 template<typename Dtype>  
 class writeDb  
 {  
 public:  
     void open(string dbName)  
     {  
         db.open(dbName.c_str());  
     }  
     void write(const Dtype &data)  
     {  
         db<<data;  
     }  
     void write(const string &str)  
     {  
         db<<str;  
     }  
     virtual ~writeDb()  
     {  
         db.close();  
     }  
 private:  
     std::ofstream db;  
 };  
   
 template<typename Dtype>  
 int feature_extraction_pipeline(int argc, char** argv) {  
   ::google::InitGoogleLogging(argv[0]);  
   const int num_required_args = 6;  
   if (argc < num_required_args) {  
     LOG(ERROR)<<  
     "This program takes in a trained network and an input data layer, and then"  
     " extract features of the input data produced by the net.\n"  
     "Usage: extract_features  pretrained_net_param"  
     "  feature_extraction_proto_file  extract_feature_blob_name1[,name2,...]"  
     "  save_feature_leveldb_name1[,name2,...]  img_path  [CPU/GPU]"  
     "  [DEVICE_ID=0]\n"  
     "Note: you can extract multiple features in one pass by specifying"  
     " multiple feature blob names and leveldb names seperated by ','."  
     " The names cannot contain white space characters and the number of blobs"  
     " and leveldbs must be equal.";  
     return 1;  
   }  
   int arg_pos = num_required_args;  
   
   arg_pos = num_required_args;  
   if (argc > arg_pos && strcmp(argv[arg_pos], "GPU") == 0) {  
     LOG(ERROR)<< "Using GPU";  
     uint device_id = 0;  
     if (argc > arg_pos + 1) {  
       device_id = atoi(argv[arg_pos + 1]);  
       CHECK_GE(device_id, 0);  
     }  
     LOG(ERROR) << "Using Device_id=" << device_id;  
     Caffe::SetDevice(device_id);  
     Caffe::set_mode(Caffe::GPU);  
   } else {  
     LOG(ERROR) << "Using CPU";  
     Caffe::set_mode(Caffe::CPU);  
   }  
   Caffe::set_phase(Caffe::TEST);  
   
   arg_pos = 0;  // the name of the executable  
   string pretrained_binary_proto(argv[++arg_pos]);//网络模型参数文件  
   
   string feature_extraction_proto(argv[++arg_pos]);  
   
   shared_ptr<Net<Dtype> > feature_extraction_net(  
       new Net<Dtype>(feature_extraction_proto));  
   
   feature_extraction_net->CopyTrainedLayersFrom(pretrained_binary_proto);//将网络参数load进内存  
   
   
   string extract_feature_blob_names(argv[++arg_pos]);  
   vector<string> blob_names;//要抽取特征的layer的名字，可以是多个  
   boost::split(blob_names, extract_feature_blob_names, boost::is_any_of(","));  
   
   string save_feature_leveldb_names(argv[++arg_pos]);  
   vector<string> leveldb_names;// 这里我改写成一个levedb为一个文件，数据格式不使用真正的levedb，而是自定义  
   boost::split(leveldb_names, save_feature_leveldb_names,  
                boost::is_any_of(","));  
   CHECK_EQ(blob_names.size(), leveldb_names.size()) <<  
       " the number of blob names and leveldb names must be equal";  
   size_t num_features = blob_names.size();  
   
   for (size_t i = 0; i < num_features; i++) {  
     CHECK(feature_extraction_net->has_blob(blob_names[i]))  //检测blob的名字在网络中是否存在  
         << "Unknown feature blob name " << blob_names[i]  
         << " in the network " << feature_extraction_proto;  
   }  
   
   
   vector<shared_ptr<writeDb<Dtype> > > feature_dbs;  
   for (size_t i = 0; i < num_features; ++i) //打开db，准备写入数据  
   {  
     LOG(INFO)<< "Opening db " << leveldb_names[i];  
     writeDb<Dtype>* db = new writeDb<Dtype>();  
     db->open(leveldb_names[i]);  
     feature_dbs.push_back(shared_ptr<writeDb<Dtype> >(db));  
   }  
   
   
   
   LOG(ERROR)<< "Extacting Features";  
   
   const shared_ptr<Layer<Dtype> > layer = feature_extraction_net->layer_by_name("data");//获取第一层  
   MyImageDataLayer<Dtype>* my_layer = (MyImageDataLayer<Dtype>*)layer.get();  
   my_layer->setImgPath(argv[++arg_pos],1);//"/media/G/imageset/clothing/针织衫/针织衫_1.jpg"  
   //设置图片路径  
   
   vector<Blob<float>*> input_vec;  
   vector<int> image_indices(num_features, 0);  
   int num_mini_batches = 1;//atoi(argv[++arg_pos]);//共多少次迭代。  每次迭代的数量在prototxt用batchsize指定  
   for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) //共num_mini_batches次迭代  
   {  
     feature_extraction_net->Forward(input_vec);//一次正向传播  
     for (int i = 0; i < num_features; ++i) //多层特征  
     {  
       const shared_ptr<Blob<Dtype> > feature_blob = feature_extraction_net  
           ->blob_by_name(blob_names[i]);  
       int batch_size = feature_blob->num();  
       int dim_features = feature_blob->count() / batch_size;  
   
       Dtype* feature_blob_data;  
   
       for (int n = 0; n < batch_size; ++n)  
       {  
         feature_blob_data = feature_blob->mutable_cpu_data() +  
             feature_blob->offset(n);  
         feature_dbs[i]->write("3 ");  
         for (int d = 0; d < dim_features; ++d)  
         {  
           feature_dbs[i]->write((Dtype)(d+1));  
           feature_dbs[i]->write(":");  
           feature_dbs[i]->write(feature_blob_data[d]);  
           feature_dbs[i]->write(" ");  
         }  
         feature_dbs[i]->write("\n");  
   
       }  // for (int n = 0; n < batch_size; ++n)  
     }  // for (int i = 0; i < num_features; ++i)  
   }  // for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index)  
   
   
   LOG(ERROR)<< "Successfully extracted the features!";  
   return 0;  
 }  

my_data_layer.cpp（参考image_data_layer修改）

[cpp]view plaincopy 
   
 #include <fstream>  // NOLINT(readability/streams)  
 #include <iostream>  // NOLINT(readability/streams)  
 #include <string>  
 #include <utility>  
 #include <vector>  
   
 #include "caffe/layer.hpp"  
 #include "caffe/util/io.hpp"  
 #include "caffe/util/math_functions.hpp"  
 #include "caffe/util/rng.hpp"  
 #include "caffe/vision_layers.hpp"  
   
 namespace caffe {  
   
   
 template <typename Dtype>  
 MyImageDataLayer<Dtype>::~MyImageDataLayer<Dtype>() {  
 }  
   
   
 template <typename Dtype>  
 void MyImageDataLayer<Dtype>::setImgPath(string path,int label)  
 {  
     lines_.clear();  
     lines_.push_back(std::make_pair(path, label));  
 }  
   
   
 template <typename Dtype>  
 void MyImageDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,  
       vector<Blob<Dtype>*>* top) {  
   Layer<Dtype>::SetUp(bottom, top);  
   const int new_height  = this->layer_param_.image_data_param().new_height();  
   const int new_width  = this->layer_param_.image_data_param().new_width();  
   CHECK((new_height == 0 && new_width == 0) ||  
       (new_height > 0 && new_width > 0)) << "Current implementation requires "  
       "new_height and new_width to be set at the same time.";  
   
   /* 
    * 因为下面需要随便拿一张图片来初始化blob。 
    * 因此需要硬盘上有一张图片。 
    * 1 从prototxt读取一张图片的路径， 
    * 2 其实也可以在这里将用于初始化的图片路径写死 
   */  
   
   /*1*/  
   /* 
   const string& source = this->layer_param_.image_data_param().source(); 
   LOG(INFO) << "Opening file " << source; 
   std::ifstream infile(source.c_str()); 
   string filename; 
   int label; 
   while (infile >> filename >> label) { 
     lines_.push_back(std::make_pair(filename, label)); 
   } 
   */  
   
   /*2*/  
   lines_.push_back(std::make_pair("/home/linger/init.jpg",1));  
   
   //上面1和2代码可以任意用一段  
   
   lines_id_ = 0;  
   // Read a data point, and use it to initialize the top blob. (随便)读取一张图片，来初始化blob  
   Datum datum;  
   CHECK(ReadImageToDatum(lines_[lines_id_].first, lines_[lines_id_].second,  
                          new_height, new_width, &datum));  
   // image  
   const int crop_size = this->layer_param_.image_data_param().crop_size();  
   const int batch_size = 1;//this->layer_param_.image_data_param().batch_size();  
   const string& mean_file = this->layer_param_.image_data_param().mean_file();  
   if (crop_size > 0) {  
     (*top)[0]->Reshape(batch_size, datum.channels(), crop_size, crop_size);  
     prefetch_data_.Reshape(batch_size, datum.channels(), crop_size, crop_size);  
   } else {  
     (*top)[0]->Reshape(batch_size, datum.channels(), datum.height(),  
                        datum.width());  
     prefetch_data_.Reshape(batch_size, datum.channels(), datum.height(),  
         datum.width());  
   }  
   LOG(INFO) << "output data size: " << (*top)[0]->num() << ","  
       << (*top)[0]->channels() << "," << (*top)[0]->height() << ","  
       << (*top)[0]->width();  
   // label  
   (*top)[1]->Reshape(batch_size, 1, 1, 1);  
   prefetch_label_.Reshape(batch_size, 1, 1, 1);  
   // datum size  
   datum_channels_ = datum.channels();  
   datum_height_ = datum.height();  
   datum_width_ = datum.width();  
   datum_size_ = datum.channels() * datum.height() * datum.width();  
   CHECK_GT(datum_height_, crop_size);  
   CHECK_GT(datum_width_, crop_size);  
   // check if we want to have mean  
   if (this->layer_param_.image_data_param().has_mean_file()) {  
     BlobProto blob_proto;  
     LOG(INFO) << "Loading mean file from" << mean_file;  
     ReadProtoFromBinaryFile(mean_file.c_str(), &blob_proto);  
     data_mean_.FromProto(blob_proto);  
     CHECK_EQ(data_mean_.num(), 1);  
     CHECK_EQ(data_mean_.channels(), datum_channels_);  
     CHECK_EQ(data_mean_.height(), datum_height_);  
     CHECK_EQ(data_mean_.width(), datum_width_);  
   } else {  
     // Simply initialize an all-empty mean.  
     data_mean_.Reshape(1, datum_channels_, datum_height_, datum_width_);  
   }  
   // Now, start the prefetch thread. Before calling prefetch, we make two  
   // cpu_data calls so that the prefetch thread does not accidentally make  
   // simultaneous cudaMalloc calls when the main thread is running. In some  
   // GPUs this seems to cause failures if we do not so.  
   prefetch_data_.mutable_cpu_data();  
   prefetch_label_.mutable_cpu_data();  
   data_mean_.cpu_data();  
   
   
 }  
   
 //--------------------------------下面是读取一张图片数据-----------------------------------------------  
 template <typename Dtype>  
 void MyImageDataLayer<Dtype>::fetchData() {  
       Datum datum;  
       CHECK(prefetch_data_.count());  
       Dtype* top_data = prefetch_data_.mutable_cpu_data();  
       Dtype* top_label = prefetch_label_.mutable_cpu_data();  
       ImageDataParameter image_data_param = this->layer_param_.image_data_param();  
       const Dtype scale = image_data_param.scale();//image_data_layer相关参数  
       const int batch_size = 1;//image_data_param.batch_size(); 这里我们只需要一张图片  
   
       const int crop_size = image_data_param.crop_size();  
       const bool mirror = image_data_param.mirror();  
       const int new_height = image_data_param.new_height();  
       const int new_width = image_data_param.new_width();  
   
       if (mirror && crop_size == 0) {  
         LOG(FATAL) << "Current implementation requires mirror and crop_size to be "  
             << "set at the same time.";  
       }  
       // datum scales  
       const int channels = datum_channels_;  
       const int height = datum_height_;  
       const int width = datum_width_;  
       const int size = datum_size_;  
       const int lines_size = lines_.size();  
       const Dtype* mean = data_mean_.cpu_data();  
   
       for (int item_id = 0; item_id < batch_size; ++item_id) {//读取一图片  
         // get a blob  
         CHECK_GT(lines_size, lines_id_);  
         if (!ReadImageToDatum(lines_[lines_id_].first,  
               lines_[lines_id_].second,  
               new_height, new_width, &datum)) {  
           continue;  
         }  
         const string& data = datum.data();  
         if (crop_size) {  
           CHECK(data.size()) << "Image cropping only support uint8 data";  
           int h_off, w_off;  
           // We only do random crop when we do training.  
             h_off = (height - crop_size) / 2;  
             w_off = (width - crop_size) / 2;  
   
             // Normal copy 正常读取，把裁剪后的图片数据读给top_data  
             for (int c = 0; c < channels; ++c) {  
               for (int h = 0; h < crop_size; ++h) {  
                 for (int w = 0; w < crop_size; ++w) {  
                   int top_index = ((item_id * channels + c) * crop_size + h)  
                                   * crop_size + w;  
                   int data_index = (c * height + h + h_off) * width + w + w_off;  
                   Dtype datum_element =  
                       static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));  
                   top_data[top_index] = (datum_element - mean[data_index]) * scale;  
                 }  
               }  
             }  
   
         } else {  
           // Just copy the whole data 正常读取，把图片数据读给top_data  
           if (data.size()) {  
             for (int j = 0; j < size; ++j) {  
               Dtype datum_element =  
                   static_cast<Dtype>(static_cast<uint8_t>(data[j]));  
               top_data[item_id * size + j] = (datum_element - mean[j]) * scale;  
             }  
           } else {  
             for (int j = 0; j < size; ++j) {  
               top_data[item_id * size + j] =  
                   (datum.float_data(j) - mean[j]) * scale;  
             }  
           }  
         }  
         top_label[item_id] = datum.label();//读取该图片的标签  
   
       }  
 }  
   
 template <typename Dtype>  
 Dtype MyImageDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,  
       vector<Blob<Dtype>*>* top) {  
   
   //更新input  
     fetchData();  
   
   // Copy the data  
   caffe_copy(prefetch_data_.count(), prefetch_data_.cpu_data(),  
              (*top)[0]->mutable_cpu_data());  
   caffe_copy(prefetch_label_.count(), prefetch_label_.cpu_data(),  
              (*top)[1]->mutable_cpu_data());  
   
   return Dtype(0.);  
 }  
   
 #ifdef CPU_ONLY  
 STUB_GPU_FORWARD(ImageDataLayer, Forward);  
 #endif  
   
 INSTANTIATE_CLASS(MyImageDataLayer);  
   
 }  // namespace caffe  

在data_layers.hpp添加一下代码，参考ImageDataLayer写的。

[cpp]view plaincopy 
   
 template <typename Dtype>  
 class MyImageDataLayer : public Layer<Dtype>  {  
  public:  
   explicit MyImageDataLayer(const LayerParameter& param)  
       : Layer<Dtype>(param) {}  
   virtual ~MyImageDataLayer();  
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,  
       vector<Blob<Dtype>*>* top);  
   
   virtual inline LayerParameter_LayerType type() const {  
     return LayerParameter_LayerType_MY_IMAGE_DATA;  
   }  
   virtual inline int ExactNumBottomBlobs() const { return 0; }  
   virtual inline int ExactNumTopBlobs() const { return 2; }  
   void fetchData();  
   void setImgPath(string path,int label);  
  protected:  
   virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,  
       vector<Blob<Dtype>*>* top);  
   
   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,  
       const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {}  
   
   
   vector<std::pair<std::string, int> > lines_;  
   int lines_id_;  
   int datum_channels_;  
   int datum_height_;  
   int datum_width_;  
   int datum_size_;  
   Blob<Dtype> prefetch_data_;  
   Blob<Dtype> prefetch_label_;  
   Blob<Dtype> data_mean_;  
   Caffe::Phase phase_;  
 };  

修改caffe.proto,在适当的位置添加下面信息，也是参考image_data写的。

MY_IMAGE_DATA = 36;

optional MyImageDataParameter my_image_data_param = 36;

// Message that stores parameters used by MyImageDataLayer
message MyImageDataParameter {
// Specify the data source.
optional string source = 1;
// For data pre-processing, we can do simple scaling and subtracting the
// data mean, if provided. Note that the mean subtraction is always carried
// out before scaling.
optional float scale = 2 [default = 1];
optional string mean_file = 3;
// Specify the batch size.
optional uint32 batch_size = 4;
// Specify if we would like to randomly crop an image.
optional uint32 crop_size = 5 [default = 0];
// Specify if we want to randomly mirror data.
optional bool mirror = 6 [default = false];
// The rand_skip variable is for the data layer to skip a few data points
// to avoid all asynchronous sgd clients to start at the same point. The skip
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not
// be larger than the number of keys in the leveldb.
optional uint32 rand_skip = 7 [default = 0];
// Whether or not ImageLayer should shuffle the list of files at every epoch.
optional bool shuffle = 8 [default = false];
// It will also resize images if new_height or new_width are not zero.
optional uint32 new_height = 9 [default = 0];
optional uint32 new_width = 10 [default = 0];
}

以上每行位置不在一起，可以参考读一个image_data对应的位置。

本文作者：linger

本文链接：http://blog.csdn.net/lingerlanlan/article/details/39400375

lien0906

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
1
评论
caffe导读（1） caffe源码修改：抽取任意一张图片的特征

caffe源码修改：抽取任意一张图片的特征目前caffe不是很完善，输入的图片数据需要在prototxt指定路径。但是我们往往有这么一个需求：训练后得到一个模型文件，我们想拿这个模型文件来对一张图片抽取特征或者预测分类等。如果非得在prototxt指定路径，就很不方便。因此，这样的工具才是我们需要的：给一个可执行文件通过命令行来传递图片路径，然后caffe读入图片数据，进行一次正向传播
复制链接

扫一扫