caffe学习：通过研读classification.cpp了解如何使用caffe模型

最新推荐文章于 2024-08-13 08:27:57 发布

持久决心

最新推荐文章于 2024-08-13 08:27:57 发布

阅读量9.9k

点赞数 3

分类专栏： caffe 文章标签： caffe C++接口 caffe模型调用

本文链接：https://blog.csdn.net/u013832707/article/details/52960039

版权

caffe 专栏收录该内容

11 篇文章 0 订阅

订阅专栏

在学习caffe的过程中，安装caffe训练和测试手写字体识别例子成功时以为caffe不过如此；再用编译好的classification.bin分类小猫的图片时，发现自己能做的又非常的少。每次分类我都要提供这些文件？我能随便给一张图片吗？我能更加随心所欲的用这个模型吗？模型的输出应该怎么看？这些问题都可以通过研读classification.cpp得到一定的启发。所以我决定参照classification.cpp改一个自己写的程序。

一、工程建立

如我的一篇博客Caffe + ROS + OpenCV + Qt creator所说，我利用Cmake管理工程，利用qt creator编写程序。目录结构如下：

classify文件夹
- src文件夹
  - classify.cpp
- CmakeLists.txt

CmakeLists.txt内容如下：

cmake_minimum_required(VERSION 2.8.3)
project(classify)

#设置caffe的根目录，需改成你的路径
set(Caffe_DIR /home/gph/Desktop/caffe_cmake/caffe-master)
#设置OpenCV的根目录，需改成你的路径
set(OpenCV_DIR /home/gph/opencv/opencv-2.4.11/build)

find_package(Caffe)
include_directories(${Caffe_INCLUDE_DIRS})
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})

include_directories(include 
    ${OpenCV_INCLUDE_DIRS}
    ${Caffe_INCLUDE_DIRS}
)

set(CPP_SOURCES src/classify.cpp)

add_executable(classify ${CPP_SOURCES})
target_link_libraries(classify 
    ${OpenCV_LIBS}
    ${Caffe_LIBRARIES}
)

打开qt creator，在Welcome页点击Open Project打开相应的CmakeList.txt，再点击Run cmake即可。

二、改写程序

1.原程序解析

在classification.cpp中包含一个Classifier类，该类中包含：

Classifier函数：根据模型的配置文件.prototxt，训练好的模型文件.caffemodel，建立模型，得到net_；处理均值文件，得到mean_；读入labels文件，得到labels_。

classify函数：调用Predict函数对图像img进行分类，返回std::pair< std::string, float >形式的预测结果。

私有函数：仅供classifier函数和classify函数使用，包括

setmean函数：将均值文件读入，转化为一张均值图像mean_。
Predict函数：调用Process函数将图像输入到网络中，使用net_->Forward()函数进行预测；将输出层的输出保存到vector容器中返回。
Process函数：这里写代码片对图像的通道数、大小、数据形式进行改变，减去均值mean_，再写入到net_的输入层中。

私有变量：

net_：模型变量；
input_geometry_：输入层的图像的大小；
num_channels_：输入层的通道数；
mean_：均值文件处理得到的均值图像；
labels_：标签文件，输出的结果表示的含义；

2.改写程序

改写后的程序如下：

#include "caffe/caffe.hpp"
#include <string>
#include <opencv2/opencv.hpp>

using namespace std;
using namespace cv;
using namespace caffe;

//用于表存输出结果的，string保存的预测结果对应的字符，如cat；float表示概率
typedef pair<string, float> Prediction;

// 函数Argmax（）需要用到的子函数
static bool PairCompare(const std::pair<float, int>& lhs,
                        const std::pair<float, int>& rhs) {
  return lhs.first > rhs.first;
}

// 返回预测结果中概率从大到小的前N个预测结果的索引
static std::vector<int> Argmax(const std::vector<float>& v, int N) {
  std::vector<std::pair<float, int> > pairs;
  for (size_t i = 0; i < v.size(); ++i)
    pairs.push_back(std::make_pair(v[i], i));
  std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end(), PairCompare);

  std::vector<int> result;
  for (int i = 0; i < N; ++i)
    result.push_back(pairs[i].second);
  return result;
}

int main(int argc, char** argv)
{
    // 定义模型配置文件，模型文件，均值文件，标签文件以及带分类的图像
    string model_file   = "/home/gph/Desktop/caffe_cmake/caffe-master/models/bvlc_reference_caffenet/deploy.prototxt";
    string trained_file = "/home/gph/Desktop/caffe_cmake/caffe-master/models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel";
    string label_file   = "/home/gph/Desktop/caffe_cmake/caffe-master/data/ilsvrc12/synset_words.txt";
    string img_file     = "/home/gph/Desktop/caffe_cmake/caffe-master/examples/images/cat.jpg";
    string mean_file    = "/home/gph/Desktop/caffe_cmake/caffe-master/data/ilsvrc12/imagenet_mean.binaryproto";
    Mat img = imread(img_file);

    // 定义变量
    shared_ptr<Net<float> > net_;// 保存模型
    Size input_geometry_; // 模型输入图像的尺寸
    int num_channels_; // 图像的通道数
    Mat mean_; // 根据均值文件计算得到的均值图像
    vector<string> labels_; // 标签向量

    Caffe::set_mode(Caffe::GPU); // 是否使用GPU
    net_.reset(new Net<float>(model_file, TEST)); // 加载配置文件，设定模式为分类
    net_->CopyTrainedLayersFrom(trained_file); // 根据训练好的模型修改模型参数

    Blob<float>* input_layer = net_->input_blobs()[0]; // 定义输入层变量
    num_channels_ = input_layer->channels(); // 得到输入层的通道数
    LOG(INFO) << "num_channels_:" << num_channels_; // 输出通道数
    input_geometry_ = Size(input_layer->width(), input_layer->height()); // 得到输入层的图像大小

    // 处理均值文件，得到均值图像
    BlobProto blob_proto;
    ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto); // mean_file.c_str()将string类型转化为字符型
    Blob<float> mean_blob;
    mean_blob.FromProto(blob_proto);
    vector<Mat> channels;
    float* data = mean_blob.mutable_cpu_data();// data指针
    for (int i = 0; i < num_channels_; i++)
    {
        Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data);//将一副单通道图像的数据记录再channel中
        channels.push_back(channel);
        data += mean_blob.height() * mean_blob.width();// data指向下一个通道的开始
    }
    Mat mean;
    merge(channels, mean); //分离的通道融合，查看cv：：merge的作用
    Scalar channel_mean = cv::mean(mean);
    mean_ = Mat(input_geometry_, mean.type(), channel_mean);//得到均值图像

    // 得到标签
    ifstream labels(label_file.c_str());
    string line;
    while (getline(labels, line))
        labels_.push_back(string(line));
    //判断标签的类数和模型输出的类数是否相同
    Blob<float>* output_layer = net_->output_blobs()[0];
    LOG(INFO) << "output_layer dimension: " << output_layer->channels()
              << "; labels number: " << labels_.size();


    // 预测图像
    input_layer->Reshape(1, num_channels_, input_geometry_.height, input_geometry_.width);
    net_->Reshape(); 调整模型

    //将input_channels指向模型的输入层相关位置（大概是这样的）
    vector<Mat> input_channels;
    int width = input_layer->width();
    int height = input_layer->height();
    float* input_data = input_layer->mutable_cpu_data();
    for (int i = 0; i < input_layer->channels(); i++)
    {
        Mat channel(height, width, CV_32FC1, input_data);
        input_channels.push_back(channel);
        input_data += width * height;
    }

    //改变图像的大小等
    Mat sample;
    if (img.channels() == 3 && num_channels_ == 1)
      cv::cvtColor(img, sample, cv::COLOR_BGR2GRAY);
    else if (img.channels() == 4 && num_channels_ == 1)
      cv::cvtColor(img, sample, cv::COLOR_BGRA2GRAY);
    else if (img.channels() == 4 && num_channels_ == 3)
      cv::cvtColor(img, sample, cv::COLOR_BGRA2BGR);
    else if (img.channels() == 1 && num_channels_ == 3)
      cv::cvtColor(img, sample, cv::COLOR_GRAY2BGR);
    else
      sample = img;
    // change img size
    cv::Mat sample_resized;
    if (sample.size() != input_geometry_)
      cv::resize(sample, sample_resized, input_geometry_);
    else
      sample_resized = sample;
    // change img to float
    cv::Mat sample_float;
    if (num_channels_ == 3)
      sample_resized.convertTo(sample_float, CV_32FC3);
    else
      sample_resized.convertTo(sample_float, CV_32FC1);
    // img normalize
    cv::Mat sample_normalized;
    cv::subtract(sample_float, mean_, sample_normalized);

    //将图像通过input_channels变量传递给模型
    /* This operation will write the separate BGR planes directly to the
     * input layer of the network because it is wrapped by the cv::Mat
     * objects in input_channels. */
    cv::split(sample_normalized, input_channels);
    // 调用模型进行预测
    net_->Forward();

    // 得到输出
    const float* begin = output_layer->cpu_data();
    const float* end = begin + output_layer->channels();
    //将输出给vector容器
    vector<float> output = vector<float>(begin, end);
    //显示概率前N大的结果
    int N = 10;
    N = std::min<int>(labels_.size(), N);
    std::vector<int> maxN = Argmax(output, N);
    std::vector<Prediction> predictions;
    for (int i = 0; i < N; ++i) {
      int idx = maxN[i];
      predictions.push_back(std::make_pair(labels_[idx], output[idx]));
    }
    for (size_t i = 0; i < predictions.size(); ++i) {
      Prediction p = predictions[i];
      std::cout << std::fixed << std::setprecision(4) << p.second << " - \""
                << p.first << "\"" << std::endl;
    }

    return 0;
}// end for main

3.编译并运行

在classify文件夹路径下输入以下命令：

mkdir build
cd build
cmake ..
make

编译成功后运行程序：

./classify

结果：

I1028 21:36:29.941694 13761 classify.cpp:53] num_channels_:3
I1028 21:36:29.944052 13761 classify.cpp:80] output_layer dimension: 1000; labels number: 1000
0.3134 - "n02123045 tabby, tabby cat"
0.2380 - "n02123159 tiger cat"
0.1235 - "n02124075 Egyptian cat"
0.1003 - "n02119022 red fox, Vulpes vulpes"
0.0715 - "n02127052 lynx, catamount"
0.0539 - "n02119789 kit fox, Vulpes macrotis"
0.0144 - "n02123394 Persian cat"
0.0113 - "n04493381 tub, vat"
0.0063 - "n02120505 grey fox, gray fox, Urocyon cinereoargenteus"
0.0062 - "n02112018 Pomeranian"