本文主要解析caffe源码中/tools/caffe.cpp文件,此文件是caffe程序的入口main函数,包含了命令行参数代码实现,如tain,test,time等。
caffe结构请参考:
https://blog.csdn.net/c20081052/article/details/80585888
caffe命令行参数请参考:
https://blog.csdn.net/c20081052/article/details/80596572
代码注释如下:
#ifdef WITH_PYTHON_LAYER
#include "boost/python.hpp" //下面两个是Boost.Python实现Python C/C++混合编程(具体参考boost.python)
namespace bp = boost::python;
#endif
#include <gflags/gflags.h> //下面俩是google开源的命令行参数解析工具
#include <glog/logging.h>
#include <cstring>
#include <map>
#include <string>
#include <vector>
#include "boost/algorithm/string.hpp"
#include "caffe/caffe.hpp"
#include "caffe/util/signal_handler.h" //这个我暂且解析为信息处理头文件,会对作者输入的命令行参数做处理,比如stop,snapshot,none
using caffe::Blob;
using caffe::Caffe;
using caffe::Net;
using caffe::Layer;
using caffe::Solver;
using caffe::shared_ptr;
using caffe::string;
using caffe::Timer;
using caffe::vector;
using std::ostringstream;
/*gflags是google的一个开源的处理命令行参数的库。
在使用命令行参数的文件文件中(源文件或头文件),首先使用一下定义语句进行变量的定义。
DEFINE_int32,DEFINE_int64,DEFINE_bool,DEFINE_double,DEFINE_string等,
语法为:DEFINE_int32(name, default_value, "description")。
接着你就可以使用FLAGS_name变量了,这些变量的值则是由命令行参数传递,无则为默认值,
在其他代码文件中若想用该命令参数,可以用DECLARE_int32(name)声明(name为int32类型,也可以使用其他支持的类型)。
在caffe.cpp中有很多FLAGS_name定义,如DEFINE_string(gpu,"","some description"),则命令行后-gpu 0,表示FLAGS_gpu=0,默认值为空。*/
DEFINE_string(gpu, "",
"Optional; run in GPU mode on given device IDs separated by ','." //如果有多个GPU做处理,用,分隔
"Use '-gpu all' to run on all available GPUs. The effective training " //使用所有GPU
"batch size is multiplied by the number of devices."); //那么一次处理图片数是batchsize*GPU数
DEFINE_string(solver, "",
"The solver definition protocol buffer text file."); //一种‘.prototxt格式文件’
DEFINE_string(model, "",
"The model definition protocol buffer text file.");
DEFINE_string(phase, "", //定义是训练还是测试阶段
"Optional; network phase (TRAIN or TEST). Only used for 'time'.");
DEFINE_int32(level, 0,
"Optional; network level.");
DEFINE_string(stage, "",
"Optional; network stages (not to be confused with phase), "
"separated by ','.");
DEFINE_string(snapshot, "", //快照,便于恢复训练时使用
"Optional; the snapshot solver state to resume training.");
DEFINE_string(weights, "",
"Optional; the pretrained weights to initialize finetuning, " //加载预训练模型权重参数,便于网络finetune,不可与snapshot参数共用
"separated by ','. Cannot be set simultaneously with snapshot.");
DEFINE_int32(iterations, 50, //默认迭代次数是50,一次batchsize个
"The number of iterations to run.");
DEFINE_string(sigint_effect, "stop", //当有中断信号输入时使用的参数,用户可传入中断后训练的动作,比如stop,或记录snapshot或不做none.
"Optional; action to take when a SIGINT signal is received: "
"snapshot, stop or none.");
DEFINE_string(sighup_effect, "snapshot", //当训练被挂起时定义相应的动作,比如记录snapshot,stop,或none
"Optional; action to take when a SIGHUP signal is received: "
"snapshot, stop or none.");
// A simple registry for caffe commands.
typedef int (*BrewFunction)(); /*声明了一个BrewFunction函数指针类型,可以用它来定义一个函数指针*/
typedef std::map<caffe::string, BrewFunction> BrewMap; /*因为输入参数可能为train,test,device_query,time,所以定义一个容器类型*/
BrewMap g_brew_map; /*定义在上午key为string的map容器实例*/
/*这里巧妙的用宏定义的方式声明了分别包含train(),test(),
device_query(),time()四个函数的四个不同类*/
/*理解这个关键理解宏在预编译阶段是如何被展开*/
#define RegisterBrewFunction(func) \
namespace { \
class __Registerer_##func { \
public: /* NOLINT */ \
__Registerer_##func() { \
g_brew_map[#func] = &func; \
} \
}; \
__Registerer_##func g_registerer_##func; \
}
//在C/C++的宏中,"#"的功能是将其后面的宏参数进行字符串化操作(Stringfication),简单说就是在对它所引用的宏变量通过替换后在其左右各加上一个双引号。
//”##”被称为连接符(concatenator),用来将两个子串Token连接为一个Token。注意这里连接的对象是Token就行,而不一定是宏的变量。
/*在caffe.cpp 中 BrewFunction 作为GetBrewFunction()函数的返回类型,
可以是 train(),test(),device_query(),time() 这四个函数指针的其中一个。
在train(),test(),中可以调用solver类的函数,从而进入到net,进入到每一层,运行整个caffe程序。*/
static BrewFunction GetBrewFunction(const caffe::string& name) {
if (g_brew_map.count(name)) { //判断输入的是不是g_brew_map中train,test,device_query,time中一个
return g_brew_map[name]; //如果是的话,就调用相应的train(),test(),device_query(),time()
} else {
LOG(ERROR) << "Available caffe actions:";
for (BrewMap::iterator it = g_brew_map.begin();
it != g_brew_map.end(); ++it) {
LOG(ERROR) << "\t" << it->first;//LOG来源于google的glog库,控制程序的日志输出消息和测试消息
}
LOG(FATAL) << "Unknown action: " << name;
return NULL; // not reachable, just to suppress old compiler warnings.
}
}
// Parse GPU ids or use all available devices
//解析可用GPU,使用所有可用硬件
static void get_gpus(vector<int>* gpus) {
if (FLAGS_gpu == "all") {
int count = 0;
#ifndef CPU_ONLY //如果未定义了只用CPU的话,CUDA会寻找可用GPU
CUDA_CHECK(cudaGetDeviceCount(&count));
#else
NO_GPU;
#endif
for (int i = 0; i < count; ++i) {
gpus->push_back(i);
}
} else if (FLAGS_gpu.size()) {
vector<string> strings;
boost::split(strings, FLAGS_gpu, boost::is_any_of(","));
for (int i = 0; i < strings.size(); ++i) {
gpus->push_back(boost::lexical_cast<int>(strings[i])); //将所有GPU的id存进gpus
}
} else {
CHECK_EQ(gpus->size(), 0); //检查gpu是否没有
}
}
// Parse phase from flags //解析是训练还是测试阶段
caffe::Phase get_phase_from_flags(caffe::Phase default_value) {
if (FLAGS_phase == "") //用户未输入的话是默认train
return default_value;
if (FLAGS_phase == "TRAIN")
return caffe::TRAIN;
if (FLAGS_phase == "TEST")
return caffe::TEST;
LOG(FATAL) << "phase must be \"TRAIN\" or \"TEST\"";
return caffe::TRAIN; // Avoid warning
}
// Parse stages from flags
vector<string> get_stages_from_flags() {
vector<string> stages;
boost::split(stages, FLAGS_stage, boost::is_any_of(","));
return stages;
}
// caffe commands to call by
// caffe <command> <args>
//
// To add a command, define a function "int command()" and register it with
// RegisterBrewFunction(action);
// Device Query: show diagnostic information for a GPU device.
int device_query() { /*这里定义device_query函数*/ //显示gpu设备的诊断信息
LOG(INFO) << "Querying GPUs " << FLAGS_gpu;
vector<int> gpus;
get_gpus(&gpus);/*获得有几个GPU*/
for (int i = 0; i < gpus.size(); ++i) { /*依次查询每个GPU信息*/
caffe::Caffe::SetDevice(gpus[i]);
caffe::Caffe::DeviceQuery();
}
return 0;
}
RegisterBrewFunction(device_query); /*这里通过预编译阶段的宏替换,将定义的device_query函数指针赋值到map容器中*/
/*加载训练的或者传入的模型*/
// Load the weights from the specified caffemodel(s) into the train and
// test nets.
void CopyLayers(caffe::Solver<float>* solver, const std::string& model_list) {
std::vector<std::string> model_names;
boost::split(model_names, model_list, boost::is_any_of(",") ); //表明可加载多个模型文件
for (int i = 0; i < model_names.size(); ++i) {
LOG(INFO) << "Finetuning from " << model_names[i];
solver->net()->CopyTrainedLayersFrom(model_names[i]);
for (int j = 0; j < solver->test_nets().size(); ++j) {
solver->test_nets()[j]->CopyTrainedLayersFrom(model_names[i]);
}
}
}
//将交互端传来的string类型的标志转成枚举类型的变量
// Translate the signal effect the user specified on the command-line to the
// corresponding enumeration.
caffe::SolverAction::Enum GetRequestedAction( //枚举所有传入的交互端指令,可为'stop','snapshot','none'
const std::string& flag_value) {
if (flag_value == "stop") {
return caffe::SolverAction::STOP;
}
if (flag_value == "snapshot") {
return caffe::SolverAction::SNAPSHOT;
}
if (flag_value == "none") {
return caffe::SolverAction::NONE;
}
LOG(FATAL) << "Invalid signal effect \""<< flag_value << "\" was specified";
}
// Train / Finetune a model.
/*训练或者微调网络都是走这个分支*/
int train() { /*定义train函数*/
// google的glog库,检查--solver、--snapshot和--weight并输出消息;必须有指定solver,并且snapshot和weight两者只需指定其一;
CHECK_GT(FLAGS_solver.size(), 0) << "Need a solver definition to train."; //必须传入solver文件
CHECK(!FLAGS_snapshot.size() || !FLAGS_weights.size())
<< "Give a snapshot to resume training or weights to finetune "
"but not both."; //snapshot和weight两者不能同时存在
vector<string> stages = get_stages_from_flags();
/*SolverParameter是通过Google Protocol Buffer自动生成的一个类*/
caffe::SolverParameter solver_param; /*定义SolverParameter的对象,该类保存solver参数和相应的方法*/
caffe::ReadSolverParamsFromTextFileOrDie(FLAGS_solver, &solver_param);//将-solver指定solver.prototxt文件内容解析到solver_param中
solver_param.mutable_train_state()->set_level(FLAGS_level); //此处定义了level和stage ,应该在caffe::Net函数中有具体定义
for (int i = 0; i < stages.size(); i++) {
solver_param.mutable_train_state()->add_stage(stages[i]);
}
// If the gpus flag is not provided, allow the mode and device to be set
// in the solver prototxt.
if (FLAGS_gpu.size() == 0 //根据命令参数-gpu或者solver.prototxt提供的信息设置GPU
&& solver_param.solver_mode() == caffe::SolverParameter_SolverMode_GPU) {
if (solver_param.has_device_id()) { //如果solver.prototxt提供GPU信息就用其gpuid
FLAGS_gpu = "" +
boost::lexical_cast<string>(solver_param.device_id());
} else { // Set default GPU if unspecified
FLAGS_gpu = "" + boost::lexical_cast<string>(0);//boost::lexical_cast(0)是将数值0转换为字符串'“0”;
}
}
/*上述代码:::: 首先是判断用户在Command Line中是否输入了gpu相关的参数,
如果没有(FLAGS_gpu.size()==0)但是用户在solver的prototxt定义中提供了相关的参数,
那就把相关的参数放到FLAGS_gpu中,如果用户仅仅是选择了在solver的prototxt定义中选择了GPU模式,
但是没有指明具体的gpu_id,那么就默认设置为0。*/
//多GPU下,将GPU编号存入vector容器中(get_gpus()函数通过FLAGS_gpu获取)
vector<int> gpus;
get_gpus(&gpus);
if (gpus.size() == 0) {
LOG(INFO) << "Use CPU.";
Caffe::set_mode(Caffe::CPU);
} else {
ostringstream s;
for (int i = 0; i < gpus.size(); ++i) {
s << (i ? ", " : "") << gpus[i];
}
LOG(INFO) << "Using GPUs " << s.str();
#ifndef CPU_ONLY
cudaDeviceProp device_prop;
for (int i = 0; i < gpus.size(); ++i) {
cudaGetDeviceProperties(&device_prop, gpus[i]);
LOG(INFO) << "GPU " << gpus[i] << ": " << device_prop.name;
}
#endif
solver_param.set_device_id(gpus[0]);
Caffe::SetDevice(gpus[0]);
Caffe::set_mode(Caffe::GPU);
Caffe::set_solver_count(gpus.size());
}
//处理snapshot, stop or none信号,其声明在include/caffe/util/signal_Handler.h中;
//GetRequestedAction在caffe.cpp中,将‘stop’,‘snapshot’,‘none’转换为标准信号,即解析;
caffe::SignalHandler signal_handler(
GetRequestedAction(FLAGS_sigint_effect),
GetRequestedAction(FLAGS_sighup_effect));
//声明boost库中智能指针solver,指向caffe::Solver对象,该对象由CreateSolver创建;
shared_ptr<caffe::Solver<float> >
solver(caffe::SolverRegistry<float>::CreateSolver(solver_param)); /*这里初始化网络*/
/*通过GetActionFunction来处理获得的系统信号*/
/*在SetActionFunction中将GetActionFunction函数地址传给参数action_request_function_*/
/*在网络训练的过程中,在GetRequestedAction中来处理action_request_function_得到的函数指针*/
solver->SetActionFunction(signal_handler.GetActionFunction());
//判断了一下用户是否定义了snapshot或者weights这两个参数中的一个
//如果定义了则需要通过Solver提供的接口从snapshot或者weights文件
//中去读取已经训练好的网络的参数,来接着训练
if (FLAGS_snapshot.size()) {
LOG(INFO) << "Resuming from " << FLAGS_snapshot;
solver->Restore(FLAGS_snapshot.c_str());
} else if (FLAGS_weights.size()) {
CopyLayers(solver.get(), FLAGS_weights);
}
/*对于多个GPU方式,用线程并行优化网络*/
if (gpus.size() > 1) {
caffe::P2PSync<float> sync(solver, NULL, solver->param());
sync.Run(gpus);
} else {
LOG(INFO) << "Starting Optimization";
solver->Solve(); //开始优化网络
}
LOG(INFO) << "Optimization Done.";
return 0;
}
RegisterBrewFunction(train); /*将train函数指针,指到map容器中*/
// Test: score a model.
int test() { /*定义测试函数*/
CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to score."; /*判断是否传入网络*/
CHECK_GT(FLAGS_weights.size(), 0) << "Need model weights to score."; /*判断是否传入模型*/
vector<string> stages = get_stages_from_flags();
// Set device id and mode 设置设备id(如gpu)以及训练是否用gpu
vector<int> gpus;
get_gpus(&gpus); /*获得GPU信息*/
if (gpus.size() != 0) {
LOG(INFO) << "Use GPU with device ID " << gpus[0];
#ifndef CPU_ONLY /*如果没只用CPU那么就要获得GPU信息*/
cudaDeviceProp device_prop;
cudaGetDeviceProperties(&device_prop, gpus[0]);
LOG(INFO) << "GPU device name: " << device_prop.name;
#endif
Caffe::SetDevice(gpus[0]);
Caffe::set_mode(Caffe::GPU);
} else { /*如果没有GPU就在CPU上处理*/
LOG(INFO) << "Use CPU.";
Caffe::set_mode(Caffe::CPU);
}
// Instantiate the caffe net.
Net<float> caffe_net(FLAGS_model, caffe::TEST, FLAGS_level, &stages); /*创建一个网络对象*/
caffe_net.CopyTrainedLayersFrom(FLAGS_weights); /*加载模型*/
LOG(INFO) << "Running for " << FLAGS_iterations << " iterations.";
vector<int> test_score_output_id;
vector<float> test_score;
float loss = 0;
for (int i = 0; i < FLAGS_iterations; ++i) { /*FLAGS_iterations:::传入的迭代次数,一次迭代一个batch*/
float iter_loss; //每次迭代的损失
const vector<Blob<float>*>& result =
caffe_net.Forward(&iter_loss); /*按网络做前身传播*/
loss += iter_loss; /*累加每次的损失量,得到总迭代次数下的总损失*/
int idx = 0;
for (int j = 0; j < result.size(); ++j) { /*打印每次迭代的accuracy与loss信息*/
const float* result_vec = result[j]->cpu_data();
for (int k = 0; k < result[j]->count(); ++k, ++idx) {
const float score = result_vec[k];
if (i == 0) {
test_score.push_back(score);
test_score_output_id.push_back(j);
} else {
test_score[idx] += score;
}
const std::string& output_name = caffe_net.blob_names()[
caffe_net.output_blob_indices()[j]];
LOG(INFO) << "Batch " << i << ", " << output_name << " = " << score;
}
}
}
loss /= FLAGS_iterations; //所有迭代次数的总损失/迭代次数=平均每次迭代的损失
LOG(INFO) << "Loss: " << loss; /*打印测试的结果总的平均的accuracy与loss信息*/
for (int i = 0; i < test_score.size(); ++i) {
const std::string& output_name = caffe_net.blob_names()[
caffe_net.output_blob_indices()[test_score_output_id[i]]];
const float loss_weight = caffe_net.blob_loss_weights()[
caffe_net.output_blob_indices()[test_score_output_id[i]]];
std::ostringstream loss_msg_stream;
const float mean_score = test_score[i] / FLAGS_iterations;
if (loss_weight) {
loss_msg_stream << " (* " << loss_weight
<< " = " << loss_weight * mean_score << " loss)";
}
LOG(INFO) << output_name << " = " << mean_score << loss_msg_stream.str();
}
return 0;
}
RegisterBrewFunction(test); /*将test函数指针,指到map容器中*/
// Time: benchmark the execution time of a model.
int time() { /*定义测试性能函数,就是计时函数*/
CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to time."; //需要传入一个网络
caffe::Phase phase = get_phase_from_flags(caffe::TRAIN); //传入train命令参数
vector<string> stages = get_stages_from_flags();
// Set device id and mode
vector<int> gpus;
get_gpus(&gpus); /*读取GPU信息*/
if (gpus.size() != 0) { /*如果有就用GPU*/
LOG(INFO) << "Use GPU with device ID " << gpus[0];
Caffe::SetDevice(gpus[0]);
Caffe::set_mode(Caffe::GPU);
} else {
LOG(INFO) << "Use CPU."; /*如果没有就只用CPU*/
Caffe::set_mode(Caffe::CPU);
}
// Instantiate the caffe net.
Net<float> caffe_net(FLAGS_model, phase, FLAGS_level, &stages); /*根据传入的参数创建网络的对象*/
// Do a clean forward and backward pass, so that memory allocation are done
// and future iterations will be more stable.
LOG(INFO) << "Performing Forward";
// Note that for the speed benchmark, we will assume that the network does
// not take any input blobs.
float initial_loss;
caffe_net.Forward(&initial_loss); /*前向传播执行一遍*/
LOG(INFO) << "Initial loss: " << initial_loss;
LOG(INFO) << "Performing Backward";
caffe_net.Backward(); /*后向传播执行一遍*/
const vector<shared_ptr<Layer<float> > >& layers = caffe_net.layers();
const vector<vector<Blob<float>*> >& bottom_vecs = caffe_net.bottom_vecs();
const vector<vector<Blob<float>*> >& top_vecs = caffe_net.top_vecs();
const vector<vector<bool> >& bottom_need_backward =
caffe_net.bottom_need_backward();
LOG(INFO) << "*** Benchmark begins ***";
LOG(INFO) << "Testing for " << FLAGS_iterations << " iterations.";
Timer total_timer;
total_timer.Start();
Timer forward_timer; //前向传播用的计时器
Timer backward_timer; //反向传播用的计时器
Timer timer;
std::vector<double> forward_time_per_layer(layers.size(), 0.0);
std::vector<double> backward_time_per_layer(layers.size(), 0.0);
double forward_time = 0.0;
double backward_time = 0.0;
for (int j = 0; j < FLAGS_iterations; ++j) {
Timer iter_timer;
iter_timer.Start(); //每次迭代计时用的计时器
forward_timer.Start(); //每次迭代前向传播计时
for (int i = 0; i < layers.size(); ++i) {
timer.Start();
layers[i]->Forward(bottom_vecs[i], top_vecs[i]);
forward_time_per_layer[i] += timer.MicroSeconds(); /*统计前向传播每层所需的时间*/
}
forward_time += forward_timer.MicroSeconds();
backward_timer.Start(); //每次迭代反向传播计时
for (int i = layers.size() - 1; i >= 0; --i) {
timer.Start();
layers[i]->Backward(top_vecs[i], bottom_need_backward[i],
bottom_vecs[i]);
backward_time_per_layer[i] += timer.MicroSeconds(); /*统计后向传播每层所需的时间*/
}
backward_time += backward_timer.MicroSeconds();
LOG(INFO) << "Iteration: " << j + 1 << " forward-backward time: " //显示每次前向与反向所用时间
<< iter_timer.MilliSeconds() << " ms.";
}
LOG(INFO) << "Average time per layer: ";
for (int i = 0; i < layers.size(); ++i) { /*这里统计每层的平均耗时并打印*/
const caffe::string& layername = layers[i]->layer_param().name();
LOG(INFO) << std::setfill(' ') << std::setw(10) << layername <<
"\tforward: " << forward_time_per_layer[i] / 1000 /
FLAGS_iterations << " ms.";
LOG(INFO) << std::setfill(' ') << std::setw(10) << layername <<
"\tbackward: " << backward_time_per_layer[i] / 1000 /
FLAGS_iterations << " ms.";
}
total_timer.Stop(); /*统计总的前向传播与后向传播的平均耗时并打印*/
LOG(INFO) << "Average Forward pass: " << forward_time / 1000 /
FLAGS_iterations << " ms.";
LOG(INFO) << "Average Backward pass: " << backward_time / 1000 /
FLAGS_iterations << " ms.";
LOG(INFO) << "Average Forward-Backward: " << total_timer.MilliSeconds() /
FLAGS_iterations << " ms.";
LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() << " ms.";
LOG(INFO) << "*** Benchmark ends ***";
return 0;
}
RegisterBrewFunction(time);/*将time函数指针,指到map容器中*/
int main(int argc, char** argv) { //主函数入口
// Print output to stderr (while still logging).
FLAGS_alsologtostderr = 1;
// Set version
gflags::SetVersionString(AS_STRING(CAFFE_VERSION));
// Usage message.
gflags::SetUsageMessage("command line brew\n"
"usage: caffe <command> <args>\n\n"
"commands:\n"
" train train or finetune a model\n"
" test score a model\n"
" device_query show GPU diagnostic information\n"
" time benchmark model execution time");
// Run tool or show usage.
caffe::GlobalInit(&argc, &argv); /*解析传入的Google Flags*/
if (argc == 2) {
#ifdef WITH_PYTHON_LAYER
try {
#endif /*传入train,test,device_query,time命令时,进入这个分支调用这个函数*/
return GetBrewFunction(caffe::string(argv[1]))(); /*argv[1]就是传入的参数train/test/device_query,time*/
#ifdef WITH_PYTHON_LAYER
} catch (bp::error_already_set) {
PyErr_Print();
return 1;
}
#endif
} else {
gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/caffe"); //要求参数argv[0]包含tools/caffe
}
}
参考:https://blog.csdn.net/lanxuecc/article/details/52934133