第一次实实在在的写博客,感觉挺好玩的!
首先入口函数caffe.cpp
int main(int argc, char** argv) {
......
if (argc == 2) {
#ifdef WITH_PYTHON_LAYER
try {
#endif
return GetBrewFunction(caffe::string(argv[1]))(); //根据输入参数确定是train还是test,采用string到函数指针的映射实现,非常巧妙
#ifdef WITH_PYTHON_LAYER
} catch (bp::error_already_set) {
PyErr_Print();
return 1;
}
#endif
} else {
gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/caffe");
}
}
在main函数中GetBrewFunction函数调用了通过工厂模式生成的由string到函数指针的map
typedef int (*BrewFunction)();
typedef std::map<caffe::string, BrewFunction> BrewMap;
BrewMap g_brew_map;
在train、test、device_query、time函数后面都可以看到对这些函数的register,相当于这些函数指针已经在map中存在了
RegisterBrewFunction(train);
RegisterBrewFunction(test);
RegisterBrewFunction(device_query);
RegisterBrewFunction(time);
接着是train过程
// Train / Finetune a model.
int train() {
......
caffe::SolverParameter solver_param;
caffe::ReadSolverParamsFromTextFileOrDie(FLAGS_solver, &solver_param);//从-solver参数读取solver_param
......
shared_ptr<caffe::Solver<float> >
solver(caffe::SolverRegistry<float>::CreateSolver(solver_param));//从参数创建solver,同样采用string到函数指针的映射实现,用到了工厂模式
if (FLAGS_snapshot.size()) {//迭代snapshot次后保存模型一次
LOG(INFO) << "Resuming from " << FLAGS_snapshot;
solver->Restore(FLAGS_snapshot.c_str());
} else if (FLAGS_weights.size()) {//若采用finetuning,则拷贝weight到指定模型
CopyLayers(solver.get(), FLAGS_weights);
}
if (gpus.size() > 1) {
caffe::P2PSync<float> sync(solver, NULL, solver->param());
sync.Run(gpus);
} else {
LOG(INFO) << "Starting Optimization";
solver->Solve();//开始训练网络
}
LOG(INFO) << "Optimization Done.";
return 0;
}
首先来看CreateSolver函数是如何构建solver和net的,CreateSolver定义在solver_factory.hpp中,首先需要知道的是solver是一个基类,继承自它的类有SGD等,下面的实现就可以根据param的type构造一个指向特定solver的指针,比如SGD。
static Solver<Dtype>* CreateSolver(const SolverParameter& param) {
const string& type = param.type();
CreatorRegistry& registry = Registry();
CHECK_EQ(registry.count(type), 1) << "Unknown solver type: " << type
<< " (known types: " << SolverTypeListString() << ")";
return registry[type](param);
}
关键之处在于上面代码最后一行语句,它的作用是根据配置文件创建对应的Solver对象(默认为SGDSolver子类对象)。此处工厂模式和一个关键的宏REGISTER_SOLVER_CLASS(SGD)发挥了重要作用。
#define REGISTER_SOLVER_CLASS(type) \
template <typename Dtype> \
Solver<Dtype>* Creator_##type##Solver( \
const SolverParameter& param) \
{ \
return new type##Solver<Dtype>(param); \
} \
REGISTER_SOLVER_CREATOR(type, Creator_##type##Solver)
}
这样一个SGDSolver对象就调用其构造函数被构造出来了。
explicit SGDSolver(const SolverParameter& param)
: Solver<Dtype>(param) { PreSolve(); }
同时,Solver这个基类也被构造出来了,在solver.hpp里,
explicit Solver(const SolverParameter& param,
const Solver* root_solver = NULL);
Solver构造函数又会调用Init进行训练网络和测试网络的初始化,Init函数没有被声明为虚函数,不能被覆写,也就是说所有的solver都调用这个函数进行初始化。
template <typename Dtype>
void Solver<Dtype>::Init(const SolverParameter& param) {
......
// Scaffolding code
InitTrainNet();//初始化训练网络
if (Caffe::root_solver()) {
InitTestNets();//初始化测试网络
LOG(INFO) << "Solver scaffolding done.";
}
iter_ = 0;//迭代次数设为0
current_step_ = 0;
}
接下来看训练网络初始化函数InitTrainNet
template <typename Dtype>
void Solver<Dtype>::InitTrainNet() {
......
NetParameter net_param;
if (param_.has_train_net_param()) {
LOG_IF(INFO, Caffe::root_solver())
<< "Creating training net specified in train_net_param.";
net_param.CopyFrom(param_.train_net_param());
} else if (param_.has_train_net()) {
LOG_IF(INFO, Caffe::root_solver())
<< "Creating training net from train_net file: " << param_.train_net();
ReadNetParamsFromTextFileOrDie(param_.train_net(), &net_param);//从文件中读取网络参数
}
......
if (Caffe::root_solver()) {//构造网络
net_.reset(new Net<Dtype>(net_param));
} else {
net_.reset(new Net<Dtype>(net_param, root_solver_->net_.get()));
}
}
这里是采用含有net_param参数的构造函数来构造这个网络的,因此在net.cpp中找到其实现
template <typename Dtype>
Net<Dtype>::Net(const NetParameter& param, const Net* root_net)
: root_net_(root_net) {
Init(param);//调用Init函数初始化
}
Init函数的实现
template <typename Dtype>
void Net<Dtype>::Init(const NetParameter& in_param) {
......
NetParameter filtered_param;
FilterNet(in_param, &filtered_param);//过滤掉在train中没用的layer
......
NetParameter param;
InsertSplits(filtered_param, ¶m);//没懂
// Basically, build all the layers and set up their connections.
name_ = param.name();
map<string, int> blob_name_to_idx;
set<string> available_blobs;
memory_used_ = 0;
// For each layer, set up its input and output//bottom,top的初始化
bottom_vecs_.resize(param.layer_size());
top_vecs_.resize(param.layer_size());
bottom_id_vecs_.resize(param.layer_size());
param_id_vecs_.resize(param.layer_size());
top_id_vecs_.resize(param.layer_size());
bottom_need_backward_.resize(param.layer_size());
for (int layer_id = 0; layer_id < param.layer_size(); ++layer_id) {//对param中的layer进行遍历
......
// Setup layer.
const LayerParameter& layer_param = param.layer(layer_id);//获得当前layer的参数
......
if (share_from_root) {
LOG(INFO) << "Sharing layer " << layer_param.name() << " from root net";
layers_.push_back(root_net_->layers_[layer_id]);
layers_[layer_id]->SetShared(true);
} else {
layers_.push_back(LayerRegistry<Dtype>::CreateLayer(layer_param));//根据参数不同,构造不同layer,同样采用了工厂模式
}
layer_names_.push_back(layer_param.name());
LOG_IF(INFO, Caffe::root_solver())
<< "Creating Layer " << layer_param.name();
//默认该layer不需要backward
bool need_backward = false;
// Figure out this layer's input and output
//为当前layer append bottom blobs
for (int bottom_id = 0; bottom_id < layer_param.bottom_size();
++bottom_id) {
const int blob_id = AppendBottom(param, layer_id, bottom_id,
&available_blobs, &blob_name_to_idx);
// If a blob needs backward, this layer should provide it.
//是否需要backward
need_backward |= blob_need_backward_[blob_id];
}
//为当前layer append top layer
int num_top = layer_param.top_size();
for (int top_id = 0; top_id < num_top; ++top_id) {
AppendTop(param, layer_id, top_id, &available_blobs, &blob_name_to_idx);
// Collect Input layer tops as Net inputs.
if (layer_param.type() == "Input") {
const int blob_id = blobs_.size() - 1;
net_input_blob_indices_.push_back(blob_id);
net_input_blobs_.push_back(blobs_[blob_id].get());
}
}
......
// After this layer is connected, set it up.
if (share_from_root) {
// Set up size of top blobs using root_net_
const vector<Blob<Dtype>*>& base_top = root_net_->top_vecs_[layer_id];
const vector<Blob<Dtype>*>& this_top = this->top_vecs_[layer_id];
for (int top_id = 0; top_id < base_top.size(); ++top_id) {
this_top[top_id]->ReshapeLike(*base_top[top_id]);
LOG(INFO) << "Created top blob " << top_id << " (shape: "
<< this_top[top_id]->shape_string() << ") for shared layer "
<< layer_param.name();
}
} else {
//在layer这个基类中调用SetUp进行每一层的构建
layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]);
}
......
}
接下来看SetUp函数是如何构建的,SetUp调用LayerSetUp这个虚函数,这个虚函数被其所有子类所覆写,因此实现了不同layer的构建
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {}
void SetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
InitMutex();
CheckBlobCounts(bottom, top);
LayerSetUp(bottom, top);
Reshape(bottom, top);
SetLossWeights(top);
}
拿DataLayer举例,其LayerSetUp函数的实现是在其父类BasePrefetchingDataLayer中(base_data_layer.cpp),而且是不能被覆写的
template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
// 先调用父类LayerSetUp
BaseDataLayer<Dtype>::LayerSetUp(bottom, top);
// 线程开启前先分配内存&显存,防止在某些GPU上报错
for (int i = 0; i < PREFETCH_COUNT; ++i) {
prefetch_[i].data_.mutable_cpu_data();
if (this->output_labels_) {
prefetch_[i].label_.mutable_cpu_data();
}
}
#ifndef CPU_ONLY
if (Caffe::mode() == Caffe::GPU) {
for (int i = 0; i < PREFETCH_COUNT; ++i) {
prefetch_[i].data_.mutable_gpu_data();
if (this->output_labels_) {
prefetch_[i].label_.mutable_gpu_data();
}
}
}
#endif
DLOG(INFO) << "Initializing prefetch";
// 初始化随机数生成器
this->data_transformer_->InitRand();
// 开启线程
StartInternalThread();
DLOG(INFO) << "Prefetch initialized.";
}
而BasePrefetchingDataLayer又会调用其父类BaseDataLayer的LayerSetUp,因为BaseDataLayer的LayerSetUp是虚函数,因此需要显式调用,接下来便是见证奇迹的时刻。
template <typename Dtype>
void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// 如果top层size大于1,则包含有标签
if (top.size() == 1) {
output_labels_ = false;
} else {
output_labels_ = true;
}
data_transformer_.reset(
new DataTransformer<Dtype>(transform_param_, this->phase_));
// 初始化随机数生成器
data_transformer_->InitRand();
// 调用构建虚函数
DataLayerSetUp(bottom, top);
}
在BaseDataLayer::LayerSetUp这个函数中调用了虚函数DataLayerSetUp,而这个函数在BaseDataLayer是没有实现的,也就是说需要其子类实现。BasePrefetchingDataLayer这个子类并没有实现,而是BasePrefetchingDataLayer的子类DataLayer 实现了这个函数,也就是说在这里会调用DataLayer 的DataLayerSetUp函数。
template <typename Dtype>
void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int batch_size = this->layer_param_.data_param().batch_size();
// 读取一个dataum,用来初始化top blob维度
Datum& datum = *(reader_.full().peek());
// 从datum获取单个数据维度
vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
this->transformed_data_.Reshape(top_shape);
// 加上batch尺寸
top_shape[0] = batch_size;
// Reshape
top[0]->Reshape(top_shape);
for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
// Reshape,并分配data内存
this->prefetch_[i].data_.Reshape(top_shape);
}
// 输出尺寸信息
LOG(INFO) << "output data size: " << top[0]->num() << ","
<< top[0]->channels() << "," << top[0]->height() << ","
<< top[0]->width();
// label
if (this->output_labels_) {
vector<int> label_shape(1, batch_size);
top[1]->Reshape(label_shape);
for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
// Reshape,并分配label内存
this->prefetch_[i].label_.Reshape(label_shape);
}
}
}
对每一个不同的layer都进行同样的操作,这样就完成了训练过程中的网络构建,下次将接着写这个net是如何进行solve的!
参考博客:
Rolin的专栏