【Caffe的C++接口使用说明六)】caffe中分类接口C++源代码的再次解读以及测试文件的完善

最新推荐文章于 2021-04-21 11:14:59 发布
马卫飞
最新推荐文章于 2021-04-21 11:14:59 发布
阅读量1.9k
点赞数 3
分类专栏： CNN--ANN--Deep Learning CV_DL_ML_Programming
本文链接：https://blog.csdn.net/maweifei/article/details/76158295
版权
CNN--ANN--Deep Learning 同时被 2 个专栏收录
97 篇文章 3 订阅
订阅专栏
CV_DL_ML_Programming
87 篇文章 0 订阅
订阅专栏
/************************************************************************************************************************
文件说明:
        1)caffe深度学习框架中C++分类接口的说明
        2)基于C++和caffe的分类程序
测试文件:
        https://pan.baidu.com/disk/home#list/vmode=list&path=%2Fcaffe%E6%BA%90%E7%A0%81%E8%A7%A3%E8%AF%BB%E7%AC%94%E8%AE%B0
使用流程:
        1)libcaffe项目编译成功之后，将classification项目设为启动项
		2)根据main函数中的路径，下载测试此程序需要的文件，并且将文件添加到相应的目录下
		3)将caffe源代码中的代码修改为此代码
		4)运行程序
开发环境:
        Caffe+NIVIDIA CUDA7.5+OpenCv+windows+VS2013+STL
时间地点:
        陕西师范大学 文津楼 2017.7.25
作    者:
        九 月
*************************************************************************************************************************/
#include <caffe/caffe.hpp>                    //【1】caffe深度学习框架的头文件

#ifdef USE_OPENCV                             //【2】预处理命令，以此选择参与编译的程序代码段
#include <opencv2/core/core.hpp>              //【3】OpenCv中的核心功能模块头文件
#include <opencv2/highgui/highgui.hpp>        //【4】高层GUI图形用户界面的头文件
#include <opencv2/imgproc/imgproc.hpp>        //【5】图像处理模块的头文件
#endif  // USE_OPENCV


#include <algorithm>                          //【6】C++特有的STL模板算法的头文件                   
#include <iosfwd>                             //【7】C++STL中输入输出流的前向定义头文件
#include <memory>                             //【8】C++中，内存管理相关函数的头文件
#include <string>                             //【9】C++中，STL中字符串序列容器的头文件
#include <utility>                            //【10】此头文件定义了std::pair类型，属于STL
#include <vector>                             //【11】C++中，STL向量容器的头文件
/*************************************************************************************************************************
**************************************************************************************************************************/

using namespace caffe;
using std::string;


typedef std::pair<string, float> Prediction;          //【1】记录每一个类别的名称及其概率，这是一个键值对
/*************************************************************************************************************************
模块说明:
         caffe源码中分类接口的类定义
**************************************************************************************************************************/
class Classifier
{
public:                                               //【0】带参数的构造函数
	Classifier(const string& model_file,              //【1】预测阶段的网络模型描述文件
		       const string& trained_file,            //【2】已经训练好的caffemodel模型文件所在的路径
		       const string& mean_file,               //【3】均值文件所在的路径
		       const string& label_file);             //【4】类别标签文件所在的路径
	                                                  //【5】Classify计算神经网络的前向传播，得到srcImg属于各个类别的概率(置信度)
	std::vector<Prediction> Classify(const cv::Mat& img, int N = 5);

private:
	void SetMean(const string& mean_file);            //【1】SetMean函数主要进行均值的设定，每张待分类检测的图像输入后，会减去均值，
	                                                  //     这个均值可以是模型使用的数据集图像的均值
	std::vector<float> Predict(const cv::Mat& img);   //【2】Predict函数是Classify类的主要组成部分，将srcImg送入网络进行前向传播，得
	                                                  //     到最后的类别
	                                                  //【3】WrapInputLayer函数将srcImg各通道(input_channels)放入网络的输入blob中                             
	void WrapInputLayer(std::vector<cv::Mat>* input_channels);
	                                                  //【4】Preprocess函数将输入图像img按通道分开
	void Preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels);

private:
	shared_ptr<Net<float> > net_;                     //【1】net_表示caffe中的网络
	cv::Size                input_geometry_;          //【2】input_geometry_表示了输入图像的高宽，同时也是网络数据层中单通道图像的高宽
	int                     num_channels_;            //【3】num_channels_表示了输入图像的通道数
	cv::Mat                 mean_;                    //【4】mean_表示了数据集的均值，格式为Mat
	std::vector<string>     labels_;                  //【5】字符串向量labels表示了各个标签
};
/***********************************************************************************************************************************
函数说明:
        类类型Classifier的构造函数
函数参数:
        1)const string& model_file--------------预测阶段的网络模型描述文件
        2)const string& trained_file------------已经训练好的caffemodel文件所在的路径
        3)const string& mean_file---------------均值文件所在的路径
        4)const string& label_file--------------类别标签文件所在的路径
************************************************************************************************************************************/
Classifier::Classifier(const string& model_file, const string& trained_file, const string& mean_file, const string& label_file)
{
#ifdef CPU_ONLY
	Caffe::set_mode(Caffe::CPU);
#else
	Caffe::set_mode(Caffe::GPU);
#endif

	//【1】加载【网络模型的描述文件】-----Load the network
	net_.reset(new Net<float>(model_file, TEST));         //【1】从model_file路径下的prototxt初始化网络结构
	net_->CopyTrainedLayersFrom(trained_file);            //【2】从trained_file路径下的caffemodel文件读入训练完毕的网络参数
	                                                      //【3】核验是不是只输入了一张图像，输入的blob结构为(N,C,H,W)，在这里，N只能为1
	CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input.";
	                                                      //【4】核验输出的blob结构，输出的blob结构同样为(N,C,H,W),在这里，N同样为1
	CHECK_EQ(net_->num_outputs(), 1) << "Network should have exactly one output.";


	//【2】网络的基本数据单元
	Blob<float>* input_layer = net_->input_blobs()[0];    //【5】获取网络输入的blob，表示网络的数据层
	num_channels_ = input_layer->channels();              //【6】获取输入的通道数
	                                                      //【7】核验输入图像的通道数是否为3或者1，网络只接收3通道或者1通道的图片
	CHECK(num_channels_ == 3 || num_channels_ == 1) << "Input layer should have 1 or 3 channels.";
	                                                      //【8】获取输入图像的尺寸(宽与高)
	input_geometry_ = cv::Size(input_layer->width(), input_layer->height());

	//【3】加载二进制的均值文件---------Load the binaryproto mean file
	SetMean(mean_file);

	//【4】加载标签文件-----------------Load labels
	std::ifstream labels(label_file.c_str());
	CHECK(labels) << "Unable to open labels file " << label_file;
	string line;
	while (std::getline(labels, line))
	{
		labels_.push_back(string(line));                   //【9】将所有的标签放入labels这个vector容器中
	}
														   //【10】output_layer指向网络最后的输出，举个例子，最后的分类器采用softmax
														   //      分类，且分类的类别有10类，那么，输出的blob就会有10个通道，每个通道
														   //      长宽都为1(因为是10个数，这10个数表征输入属于10类中每一类的概率，这
														   //      10个数之和应该为1，输出的blob的结构为(1,10,1,1))
	Blob<float>* output_layer = net_->output_blobs()[0];
	CHECK_EQ(labels_.size(), output_layer->channels()) << "Number of labels is different from the output layer dimension.";
}
/*************************************************************************************************************************
函数说明:
          PairCompare函数比较分类得到的物体属于某两个类别的概率的大小，若属于lhs的概率大于属于rhs的概率，返回真，否则返回假
函数参数:
         1)const std::pair<float, int>& lhs
         2)const std::pair<float, int>& rhs
**************************************************************************************************************************/
static bool PairCompare(const std::pair<float, int>& lhs, const std::pair<float, int>& rhs)
{
	return lhs.first > rhs.first;
}
/*************************************************************************************************************************
函数说明:
         1)Argmax函数返回前N个得分概率的类标
         2)Return the indices of the top N values of vector v
函数参数:
         1)const std::vector<float>& v
         2) int N
**************************************************************************************************************************/
static std::vector<int> Argmax(const std::vector<float>& v, int N)
{
	std::vector<std::pair<float, int> > pairs;
	for (size_t i = 0; i < v.size(); ++i)
	{
		pairs.push_back(std::make_pair(v[i], static_cast<int>(i)));//【1】按照分类结果存储输入每一个类的概率以及类别
	}
																   //【2】partial_sort函数按照概率大小筛选出pairs中概率最大的N
																   //     个组合，并将它们按照概率从大到小放在pairs的前N个位置
	std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end(), PairCompare);
	std::vector<int> result;

	for (int i = 0; i < N; ++i)
	{
		result.push_back(pairs[i].second);                          //【3】将前N个较大的概率对应的类标放在result中
	}
	return result;
}

/*************************************************************************************************************************
函数说明:
         1)返回前N个预测
         2)Return the top N predictions
函数参数:
         1)const cv::Mat& img
         2)int N
**************************************************************************************************************************/
std::vector<Prediction> Classifier::Classify(const cv::Mat& img, int N)
{
	std::vector<float> output = Predict(img);           //【1】进行网络的前向传输，得到输入属于每一类的概率，存储在output中  
	N = std::min<int>(labels_.size(), N);               //【2】找到想要得到的概率较大的前N类，这个N应该小于等于总的类别数目    
	std::vector<int> maxN = Argmax(output, N);          //【3】找到概率最大的前N类，将他们按概率由大到小将类标存储在maxN中  
	std::vector<Prediction> predictions;
	for (int i = 0; i < N; ++i)
	{
		int idx = maxN[i];                              //【4】在labels_找到分类得到的概率最大的N类对应的实际的名称    
		predictions.push_back(std::make_pair(labels_[idx], output[idx]));
	}
	return predictions;
}

/*************************************************************************************************************************
函数说明:
		1)加载一个binaryproto格式的均值文件
		2)Load the mean file in binaryproto format
		3)设置数据集的平均值
函数参数:
        const string& mean_file:均值文件的存储路径
**************************************************************************************************************************/
void Classifier::SetMean(const string& mean_file)
{
	BlobProto blob_proto;                            //【1】用定义的均值文件路径将均值文件读入proto中  
	ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
	                                                 //【2】Convert from BlobProto to Blob<float>  
	Blob<float> mean_blob;
	mean_blob.FromProto(blob_proto);                 //【3】将proto中存储的均值文件转移到blob中  
	                                                 //【4】核验均值的通道数是否等于输入图像的通道数，如果不相等的话则为异常  
	CHECK_EQ(mean_blob.channels(), num_channels_) << "Number of channels of mean file doesn't match input layer.";

													 //【5】The format of the mean file is planar 32-bit float BGR or grayscale  
													 //【6】均值文件的格式为32位的浮点型的BGR图像或者灰度图像  
	std::vector<cv::Mat> channels;                   //【7】将mean_blob中的数据转化为Mat时的存储向量   
	float* data = mean_blob.mutable_cpu_data();      //【8】指向均值blob的指针  

	for (int i = 0; i < num_channels_; ++i)
	{
													  //【1】提取一个单独的通道---Extract an individual channel  
													  //【2】存储均值文件的每一个通道转化得到的Mat   
		cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data);
		channels.push_back(channel);                  //【3】将均值文件的所有通道转化成的Mat一个一个地存储到channels中  
		                                              //【4】在均值文件上移动一个通道    
		data += mean_blob.height() * mean_blob.width();
	}

	cv::Mat mean;                                     //【5】将分离的通道合并成一个单独的图像  
	cv::merge(channels, mean);                        //【6】将得到的所有通道合成为一张图   

	/* Compute the global mean pixel value and create a mean image filled with this value. */
	cv::Scalar channel_mean = cv::mean(mean);         //【7】求得均值文件的每个通道的平均值，记录在channel_mean中  
	                                                  //【8】用上面求得的各个通道的平均值初始化mean_，作为数据集图像的均值  
	mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean);
}
/*************************************************************************************************************************
函数说明:
        神经网络的前向传播函数
函数参数:
        const cv::Mat& img
**************************************************************************************************************************/
std::vector<float> Classifier::Predict(const cv::Mat& img)
{
	Blob<float>* input_layer = net_->input_blobs()[0];//【1】input_layer是网络的输入blob  
													  //【2】表示网络只输入一张图像，图像的通道数是num_channels_，高为  
													  //     input_geometry_.height，宽为input_geometry_.width    
	input_layer->Reshape(1, num_channels_, input_geometry_.height, input_geometry_.width);

	net_->Reshape();                                  //【3】初始化网络的各层    

	std::vector<cv::Mat> input_channels;              //【4】存储输入图像的各个通道   
	WrapInputLayer(&input_channels);                  //【5】将存储输入图像的各个通道的input_channels放入网络的输入blob中  
	Preprocess(img, &input_channels);                 //【6】将img的各通道分开并存储在input_channels中    
	net_->ForwardPrefilled();                         //【7】进行网络的前向传输  

													  //【8】Copy the output layer to a std::vector  
													  //【9】output_layer指向网络输出的数据，存储网络输出数据的blob的规格是(1,c,1,1)    
	Blob<float>* output_layer = net_->output_blobs()[0];
	const float* begin = output_layer->cpu_data();    //【10】begin指向输入数据对应的第一类的概率  
	                                                  //【11】end指向输入数据对应的最后一类的概率  
	const float* end = begin + output_layer->channels();
	return std::vector<float>(begin, end);            //【12】返回输入数据经过网络前向计算后输出的对应于各个类的分数    
}
/*************************************************************************************************************************
函数说明:
        1)将网络的输入层封装在单独的cv::Mat对象(每个通道一个).这样我们可以节省一个memcpy操作。我么不需要依赖cudaMemcpy2D
        2)最后一个预处理操作将直接将单独的通道写入输入层
函数参数:
        std::vector<cv::Mat>* input_channels
**************************************************************************************************************************/
void Classifier::WrapInputLayer(std::vector<cv::Mat>* input_channels)
{
	Blob<float>* input_layer = net_->input_blobs()[0]; //【1】input_layer指向网络输入的blob    

	int width  = input_layer->width();                 //【2】得到网络指定的输入图像的宽    
	int height = input_layer->height();                //【3】得到网络指定的输入图像的高  
	                                                   //【4】input_data指向网络的输入blob   
	float* input_data = input_layer->mutable_cpu_data();

	for (int i = 0; i < input_layer->channels(); ++i)
	{                                                  //【5】将网络输入blob的数据同Mat关联起来  
		cv::Mat channel(height, width, CV_32FC1, input_data);
		input_channels->push_back(channel);            //【6】将上面的Mat同input_channels关联起来  
		input_data += width * height;                  //【7】一个一个通道地操作  
	}
}
/*************************************************************************************************************************
函数说明:
        1)将输入图像转化为网络中输出图像的格式
        2)Convert the input image to the input image format of the network
函数参数:
        1)const cv::Mat& img
        2)std::vector<cv::Mat>* input_channels
**************************************************************************************************************************/
void Classifier::Preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels)
{
	cv::Mat sample;
	if (img.channels() == 3 && num_channels_ == 1)
		cv::cvtColor(img, sample, CV_BGR2GRAY);
	else if (img.channels() == 4 && num_channels_ == 1)
		cv::cvtColor(img, sample, CV_BGRA2GRAY);
	else if (img.channels() == 4 && num_channels_ == 3)
		cv::cvtColor(img, sample, CV_BGRA2BGR);
	else if (img.channels() == 1 && num_channels_ == 3)
		cv::cvtColor(img, sample, CV_GRAY2BGR);
	else
		sample = img;

	cv::Mat sample_resized;
	if (sample.size() != input_geometry_)                //【1】将输入图像的尺寸强制转化为网络规定的输入尺寸  
		cv::resize(sample, sample_resized, input_geometry_);
	else
		sample_resized = sample;

	cv::Mat sample_float;
	if (num_channels_ == 3)                              //【2】将输入图像转化成为网络前传合法的数据规格  
		sample_resized.convertTo(sample_float, CV_32FC3);
	else
		sample_resized.convertTo(sample_float, CV_32FC1);

	cv::Mat sample_normalized;
	cv::subtract(sample_float, mean_, sample_normalized);//【3】将图像减去均值  
														 //【4】将减去均值的图像分散在input_channels中，由于在WrapInputLayer函数中，  
														 //     input_channels已经和网络的输入blob关联起来了，因此在这里实际上是把  
														 //     图像送入了网络的输入blob  
	cv::split(sample_normalized, *input_channels);

	CHECK(reinterpret_cast<float*>(input_channels->at(0).data) == net_->input_blobs()[0]->cpu_data())
		<< "Input channels are not wrapping the input layer of the network.";
}

int main(int argc, char** argv) 
{

	::google::InitGoogleLogging(argv[0]);

	string model_file   = "F:\\caffeInstall2013\\caffe-master\\models\\bvlc_reference_caffenet\\deploy.prototxt";
	string trained_file = "F:\\caffeInstall2013\\caffe-master\\models\\bvlc_reference_caffenet\\bvlc_reference_caffenet.caffemodel";
	string mean_file    = "F:\\caffeInstall2013\\caffe-master\\models\\bvlc_reference_caffenet\\imagenet_mean.binaryproto";
	string label_file   = "F:\\caffeInstall2013\\caffe-master\\models\\bvlc_reference_caffenet\\synset_words.txt";
	string file         = "F:\\caffeInstall2013\\caffe-master\\examples\\images\\cat.jpg";
	Classifier classifier(model_file, trained_file, mean_file, label_file);

	std::cout << "---------- Prediction for "<< file << " ----------" << std::endl;

	cv::Mat img = cv::imread(file, -1);
	CHECK(!img.empty()) << "Unable to decode image " << file;
	std::vector<Prediction> predictions = classifier.Classify(img);

	/* Print the top N predictions. */
	for (size_t i = 0; i < predictions.size(); ++i) 
	{
		Prediction p = predictions[i];
		std::cout << std::fixed << std::setprecision(4) << p.second << " - \"" << p.first << "\"" << std::endl;
	}
	std::system("pause");
}