参考博客https://blog.csdn.net/mounty_fsc/article/details/51090114
以及http://alanse7en.github.io/caffedai-ma-jie-xi-1/caffe代码解析(1)-(4)
//在这里我们只分析训练过程,根据上述博客把训练流程中的重点整理一遍,写的比较粗糙
//在src\caffe\solvers中,有六种solver,在每种solver的最后有REGISTER_SOLVER_CLASS(XXX),以REGISTER_SOLVER_CLASS(SGD)为例
//首先调用17行
/*
分别定义了SolverRegisterer这个模板类的float和double类型的static对象,这会去调用各自
的构造函数,而在SolverRegisterer的构造函数中调用了之前提到的SolverRegistry类的
AddCreator函数,这个函数就是将刚才定义的Creator_SGDSolver这个函数的指针存到
g_registry指向的map里面。
*/
#define REGISTER_SOLVER_CREATOR(type, creator) \
static SolverRegisterer<float> g_creator_f_##type(#type, creator<float>); \
static SolverRegisterer<double> g_creator_d_##type(#type, creator<double>) \
template <typename Dtype>
class SolverRegisterer {
public:
SolverRegisterer(const string& type,
//函数的指针
Solver<Dtype>* (*creator)(const SolverParameter&))
{
// LOG(INFO) << "Registering solver type: " << type;
SolverRegistry<Dtype>::AddCreator(type, creator);
}
};
static void AddCreator(const string& type, Creator creator) {
CreatorRegistry& registry = Registry();
CHECK_EQ(registry.count(type), 0)
<< "Solver type " << type << " already registered.";
registry[type] = creator;
}
/*
这个宏会定义一个名为Creator_SGDSolver的函数,这个函数即为Creator类型的指针指向的函数,
在这个函数中定义了SGDSolver类对象并调用其构造函数,并将构造的这个对象得到的地址返回,这也就是Creator
类型函数的作用:构造一个对应类型的Solver对象,将其地址返回。然后在这个宏里又调用了
REGISTER_SOLVER_CREATOR这个宏
*/
#define REGISTER_SOLVER_CLASS(type) \
template <typename Dtype> \
Solver<Dtype>* Creator_##type##Solver( \
const SolverParameter& param) \
{ \
return new type##Solver<Dtype>(param); \
} \
REGISTER_SOLVER_CREATOR(type, Creator_##type##Solver)
//在这里我们只分析训练过程,根据上述博客把训练流程中的重点整理一遍,写的比较粗糙
//在src\caffe\solvers中,有六种solver,在每种solver的最后有REGISTER_SOLVER_CLASS(XXX),以REGISTER_SOLVER_CLASS(SGD)为例
//首先调用17行
/*
分别定义了SolverRegisterer这个模板类的float和double类型的static对象,这会去调用各自
的构造函数,而在SolverRegisterer的构造函数中调用了之前提到的SolverRegistry类的
AddCreator函数,这个函数就是将刚才定义的Creator_SGDSolver这个函数的指针存到
g_registry指向的map里面。
*/
#define REGISTER_SOLVER_CREATOR(type, creator) \
static SolverRegisterer<float> g_creator_f_##type(#type, creator<float>); \
static SolverRegisterer<double> g_creator_d_##type(#type, creator<double>) \
template <typename Dtype>
class SolverRegisterer {
public:
SolverRegisterer(const string& type,
//函数的指针
Solver<Dtype>* (*creator)(const SolverParameter&))
{
// LOG(INFO) << "Registering solver type: " << type;
SolverRegistry<Dtype>::AddCreator(type, creator);
}
};
static void AddCreator(const string& type, Creator creator) {
CreatorRegistry& registry = Registry();
CHECK_EQ(registry.count(type), 0)
<< "Solver type " << type << " already registered.";
registry[type] = creator;
}
/*
这个宏会定义一个名为Creator_SGDSolver的函数,这个函数即为Creator类型的指针指向的函数,
在这个函数中定义了SGDSolver类对象并调用其构造函数,并将构造的这个对象得到的地址返回,这也就是Creator
类型函数的作用:构造一个对应类型的Solver对象,将其地址返回。然后在这个宏里又调用了
REGISTER_SOLVER_CREATOR这个宏
*/
#define REGISTER_SOLVER_CLASS(type) \
template <typename Dtype> \
Solver<Dtype>* Creator_##type##Solver( \
const SolverParameter& param) \
{ \
return new type##Solver<Dtype>(param); \
} \
REGISTER_SOLVER_CREATOR(type, Creator_##type##Solver)
} // namespace caffe
//在tools/caffe.cpp的int train()中
//SolverParameter是通过Google Protocol Buffer自动生成的一个类
caffe::SolverParameter solver_param;
shared_ptr<caffe::Solver<float> > //初始化
solver(caffe::SolverRegistry<float>::CreateSolver(solver_param))
----------------------------------------------------------------------------------------------
// 在solver_factory.hpp中
//Creator是一个函数指针类型,指向的函数的参数为SolverParameter类型,返回类型为Solver<Dtype>*
typedef Solver<Dtype>* (*Creator)(const SolverParameter&);
typedef std::map<string, Creator> CreatorRegistry
//SolverParameter是通过Google Protocol Buffer自动生成的一个类
caffe::SolverParameter solver_param;
shared_ptr<caffe::Solver<float> > //初始化
solver(caffe::SolverRegistry<float>::CreateSolver(solver_param))
----------------------------------------------------------------------------------------------
// 在solver_factory.hpp中
//Creator是一个函数指针类型,指向的函数的参数为SolverParameter类型,返回类型为Solver<Dtype>*
typedef Solver<Dtype>* (*Creator)(const SolverParameter&);
typedef std::map<string, Creator> CreatorRegistry
static Solver<Dtype>* CreateSolver(const SolverParameter& param) {
// string类型的变量type,表示Solver的类型(‘SGD’/’Nestrov’等)
// 默认为SGD
const string& type = param.type();
// 定义了一个key类型为string,value类型为Creator的map:registry
// 返回为静态变量
CreatorRegistry& registry = Registry();
//此处又调用solver_factory.hpp中Registry()函数,具体如下:
static CreatorRegistry& Registry() {
//静态变量
static CreatorRegistry* g_registry_ = new CreatorRegistry();
return *g_registry_;
}
----------------------------------------------------------------------------------------------
for (typename CreatorRegistry::iterator iter = registry.begin();
iter != registry.end(); ++iter)
{
std::cout<<"key:"<<iter->first<<"``` "
<<"value:"<<iter->second<<std::endl;}
// string类型的变量type,表示Solver的类型(‘SGD’/’Nestrov’等)
// 默认为SGD
const string& type = param.type();
// 定义了一个key类型为string,value类型为Creator的map:registry
// 返回为静态变量
CreatorRegistry& registry = Registry();
//此处又调用solver_factory.hpp中Registry()函数,具体如下:
static CreatorRegistry& Registry() {
//静态变量
static CreatorRegistry* g_registry_ = new CreatorRegistry();
return *g_registry_;
}
----------------------------------------------------------------------------------------------
for (typename CreatorRegistry::iterator iter = registry.begin();
iter != registry.end(); ++iter)
{
std::cout<<"key:"<<iter->first<<"``` "
<<"value:"<<iter->second<<std::endl;}
/*
* 如果是一个已经register过的Solver类型,那么registry.count(type)应该为1,
* 然后通过registry这个map返回了我们需要类型的Solver的creator,并调用这个
* creator函数,将creator返回的Solver<Dtype>*返回。
*/
CHECK_EQ(registry.count(type), 1) << "Unknown solver type: " << type
<< " (known types: " << SolverTypeListString() << ")";
//通过static的g_registry_[type]获得type对应的solver的creator函数指针
return registry[type](param);//返回Solver<Dtype>*,但是是子类SGDSolver对象的地址返回给基类
}
//--------------------------------------------------------------------------------------------------
//在return registry[SGD](param)中调用了36行中的new SGDSolver<Dtype>(param),而SGDSolver继承基类Solver,所以首先
//调用基类的构造函数。在sgd_solvers.hpp中定义了
explicit SGDSolver(const SolverParameter& param): Solver<Dtype>(param) { PreSolve(); }
//子类构造函数先放这儿,因为基类构造函数太长,调用的函数太多。
template <typename Dtype>
void SGDSolver<Dtype>::PreSolve() {
// Initialize the history
const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
history_.clear();
update_.clear();
temp_.clear();
for (int i = 0; i < net_params.size(); ++i) {
const vector<int>& shape = net_params[i]->shape();
history_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
update_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
temp_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
}
}
在solver.hpp中又定义了
explicit Solver(const SolverParameter& param,const Solver* root_solver = NULL);
在solver.cpp中
//会调用Init()方法进行初始化,即Solver scaffolding
template <typename Dtype>
Solver<Dtype>::Solver(const SolverParameter& param, const Solver* root_solver)
: net_(), callbacks_(), root_solver_(root_solver),requested_early_exit_(false)
{
Init(param);
}
* 如果是一个已经register过的Solver类型,那么registry.count(type)应该为1,
* 然后通过registry这个map返回了我们需要类型的Solver的creator,并调用这个
* creator函数,将creator返回的Solver<Dtype>*返回。
*/
CHECK_EQ(registry.count(type), 1) << "Unknown solver type: " << type
<< " (known types: " << SolverTypeListString() << ")";
//通过static的g_registry_[type]获得type对应的solver的creator函数指针
return registry[type](param);//返回Solver<Dtype>*,但是是子类SGDSolver对象的地址返回给基类
}
//--------------------------------------------------------------------------------------------------
//在return registry[SGD](param)中调用了36行中的new SGDSolver<Dtype>(param),而SGDSolver继承基类Solver,所以首先
//调用基类的构造函数。在sgd_solvers.hpp中定义了
explicit SGDSolver(const SolverParameter& param): Solver<Dtype>(param) { PreSolve(); }
//子类构造函数先放这儿,因为基类构造函数太长,调用的函数太多。
template <typename Dtype>
void SGDSolver<Dtype>::PreSolve() {
// Initialize the history
const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
history_.clear();
update_.clear();
temp_.clear();
for (int i = 0; i < net_params.size(); ++i) {
const vector<int>& shape = net_params[i]->shape();
history_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
update_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
temp_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
}
}
在solver.hpp中又定义了
explicit Solver(const SolverParameter& param,const Solver* root_solver = NULL);
在solver.cpp中
//会调用Init()方法进行初始化,即Solver scaffolding
template <typename Dtype>
Solver<Dtype>::Solver(const SolverParameter& param, const Solver* root_solver)
: net_(), callbacks_(), root_solver_(root_solver),requested_early_exit_(false)
{
Init(param);
}
template <typename Dtype>
void Solver<Dtype>::Init(const SolverParameter& param) {
// 检查当前是否是root_solver(多GPU模式下,只有root_solver才运行这一部分的代码)
//Caffe::root_solver()此时应为真,集体在哪个位置还没找到
CHECK(Caffe::root_solver() || root_solver_)
<< "root_solver_ needs to be set for all non-root solvers";
LOG_IF(INFO, Caffe::root_solver()) << "Initializing solver from parameters: "
<< std::endl << param.DebugString();//DebugString()没找到在哪
void Solver<Dtype>::Init(const SolverParameter& param) {
// 检查当前是否是root_solver(多GPU模式下,只有root_solver才运行这一部分的代码)
//Caffe::root_solver()此时应为真,集体在哪个位置还没找到
CHECK(Caffe::root_solver() || root_solver_)
<< "root_solver_ needs to be set for all non-root solvers";
LOG_IF(INFO, Caffe::root_solver()) << "Initializing solver from parameters: "
<< std::endl << param.DebugString();//DebugString()没找到在哪
//为solver类的数据成员param_赋值
param_ = param;
param_ = param;
// 默认为1
CHECK_GE(param_.average_loss(), 1) << "average_loss should be non-negative.";
CHECK_GE(param_.average_loss(), 1) << "average_loss should be non-negative.";
//检测快照的的写入权限
CheckSnapshotWritePermissions();
CheckSnapshotWritePermissions();
//random_seed默认为-1,
if (Caffe::root_solver() && param_.random_seed() >= 0) {
//调用Caffe命名空间里的set_random_seed函数,而不是caffe类的set_random_seed函数;
//param_.random_seed()实际上调用的是::google::protobuf::int64 random_seed()
Caffe::set_random_seed(param_.random_seed());
}
if (Caffe::root_solver() && param_.random_seed() >= 0) {
//调用Caffe命名空间里的set_random_seed函数,而不是caffe类的set_random_seed函数;
//param_.random_seed()实际上调用的是::google::protobuf::int64 random_seed()
Caffe::set_random_seed(param_.random_seed());
}
// Scaffolding code
// 搭建网络结构
InitTrainNet();
// 搭建网络结构
InitTrainNet();
if (Caffe::root_solver()) {
//LOG(INFO) << "You big SB.";
InitTestNets();
//LOG(INFO) << "Solver scaffolding done.";
}
//LOG(INFO) << "You big SB.";
InitTestNets();
//LOG(INFO) << "Solver scaffolding done.";
}
// iter_初始化为0
iter_ = 0;
current_step_ = 0;
}
//InitTrainNet()具体如下:
// 初始化训练网络
template <typename Dtype>
void Solver<Dtype>::InitTrainNet() {
const int num_train_nets = param_.has_net() + param_.has_net_param() +
param_.has_train_net() + param_.has_train_net_param();
const string& field_names = "net, net_param, train_net, train_net_param";
iter_ = 0;
current_step_ = 0;
}
//InitTrainNet()具体如下:
// 初始化训练网络
template <typename Dtype>
void Solver<Dtype>::InitTrainNet() {
const int num_train_nets = param_.has_net() + param_.has_net_param() +
param_.has_train_net() + param_.has_train_net_param();
const string& field_names = "net, net_param, train_net, train_net_param";
//有且只能有一个train net
CHECK_GE(num_train_nets, 1) << "SolverParameter must specify a train net "
<< "using one of these fields: " << field_names;
CHECK_LE(num_train_nets, 1) << "SolverParameter must not contain more than "
<< "one of these fields specifying a train_net: " << field_names;
CHECK_GE(num_train_nets, 1) << "SolverParameter must specify a train net "
<< "using one of these fields: " << field_names;
CHECK_LE(num_train_nets, 1) << "SolverParameter must not contain more than "
<< "one of these fields specifying a train_net: " << field_names;
// 读取训练网络结构参数
NetParameter net_param;
if (param_.has_train_net_param()) {
LOG_IF(INFO, Caffe::root_solver())
<< "Creating training net specified in train_net_param.";
net_param.CopyFrom(param_.train_net_param());
}
else if (param_.has_train_net()) {
LOG_IF(INFO, Caffe::root_solver())
<< "Creating training net from train_net file: " << param_.train_net();
ReadNetParamsFromTextFileOrDie(param_.train_net(), &net_param);
}
if (param_.has_net_param()) {
LOG_IF(INFO, Caffe::root_solver())
<< "Creating training net specified in net_param.";
net_param.CopyFrom(param_.net_param());
}
if (param_.has_net()) {
LOG_IF(INFO, Caffe::root_solver())
<< "Creating training net from net file: " << param_.net();
ReadNetParamsFromTextFileOrDie(param_.net(), &net_param);
}
// Set the correct NetState. We start with the solver defaults (lowest
// precedence); then, merge in any NetState specified by the net_param itself;
// finally, merge in any NetState specified by the train_state (highest
// precedence).
NetParameter net_param;
if (param_.has_train_net_param()) {
LOG_IF(INFO, Caffe::root_solver())
<< "Creating training net specified in train_net_param.";
net_param.CopyFrom(param_.train_net_param());
}
else if (param_.has_train_net()) {
LOG_IF(INFO, Caffe::root_solver())
<< "Creating training net from train_net file: " << param_.train_net();
ReadNetParamsFromTextFileOrDie(param_.train_net(), &net_param);
}
if (param_.has_net_param()) {
LOG_IF(INFO, Caffe::root_solver())
<< "Creating training net specified in net_param.";
net_param.CopyFrom(param_.net_param());
}
if (param_.has_net()) {
LOG_IF(INFO, Caffe::root_solver())
<< "Creating training net from net file: " << param_.net();
ReadNetParamsFromTextFileOrDie(param_.net(), &net_param);
}
// Set the correct NetState. We start with the solver defaults (lowest
// precedence); then, merge in any NetState specified by the net_param itself;
// finally, merge in any NetState specified by the train_state (highest
// precedence).
//设置正确的网络状态,训练从默认开始,然后融入通过网络层规定在任何状态,
//最后融入训练状态(最优解)
NetState net_state;
net_state.set_phase(TRAIN);//哪里有net_state.set_phase,猜测在proto.pb.h中
//LOG(INFO) << net_state.phase()<<"You big SB.";
//最后融入训练状态(最优解)
NetState net_state;
net_state.set_phase(TRAIN);//哪里有net_state.set_phase,猜测在proto.pb.h中
//LOG(INFO) << net_state.phase()<<"You big SB.";
net_state.MergeFrom(net_param.state());
//LOG(INFO) << net_state.phase()<<"You big SB.";
//LOG(INFO) << net_state.phase()<<"You big SB.";
//从低到高获取state,最终从最高优先级SolverParameter类型中的train_state,
//显然这会覆盖掉之前获取的state。
net_state.MergeFrom(param_.train_state());
//LOG(INFO) << net_state.phase()<<"You big SB.";
//显然这会覆盖掉之前获取的state。
net_state.MergeFrom(param_.train_state());
//LOG(INFO) << net_state.phase()<<"You big SB.";
//这里获取的state可以为Netparameter中的state赋值,然后可以根据LayerParameter中的
//include和exclude来确定该层是否应该包含在网络中。
net_param.mutable_state()->CopyFrom(net_state);
//include和exclude来确定该层是否应该包含在网络中。
net_param.mutable_state()->CopyFrom(net_state);
//这是Initialize train net 的一部分工作。InitTestNets也是如此
if (Caffe::root_solver()) {
//调用模板类的构造函数,进行net的初始化
net_.reset(new Net<Dtype>(net_param));
}
else {
net_.reset(new Net<Dtype>(net_param, root_solver_->net_.get()));
}
}
207处net_.reset(new Net<Dtype>(net_param))的构造函数在net,hpp中声明如下:
explicit Net(const NetParameter& param, const Net* root_net = NULL);
调用net.cpp中的定义
template <typename Dtype>
Net<Dtype>::Net(const NetParameter& param, const Net* root_net)
: root_net_(root_net) {
Init(param);
}
//-----------------------网络结构初始化开始,通过Net的构造函数调用---------------------
template <typename Dtype>
void Net<Dtype>::Init(const NetParameter& in_param) {
CHECK(Caffe::root_solver() || root_net_)
<< "root_net_ needs to be set for all non-root solvers";
if (Caffe::root_solver()) {
//调用模板类的构造函数,进行net的初始化
net_.reset(new Net<Dtype>(net_param));
}
else {
net_.reset(new Net<Dtype>(net_param, root_solver_->net_.get()));
}
}
207处net_.reset(new Net<Dtype>(net_param))的构造函数在net,hpp中声明如下:
explicit Net(const NetParameter& param, const Net* root_net = NULL);
调用net.cpp中的定义
template <typename Dtype>
Net<Dtype>::Net(const NetParameter& param, const Net* root_net)
: root_net_(root_net) {
Init(param);
}
//-----------------------网络结构初始化开始,通过Net的构造函数调用---------------------
template <typename Dtype>
void Net<Dtype>::Init(const NetParameter& in_param) {
CHECK(Caffe::root_solver() || root_net_)
<< "root_net_ needs to be set for all non-root solvers";
// 得到是训练网络还是测试网络
phase_ = in_param.state().phase();
phase_ = in_param.state().phase();
/*
* 引用作为函数参数进行传递时,实质上传递的是实参本身,即传递进来的不是实参的一个拷贝,
* 因此对形参的修改其实是对实参的修改,所以在用引用进行参数传递时,不仅节约时间,而且
* 可以节约空间。
*/
// Filter layers based on their include/exclude rules and
// the current NetState.
// 传入网络结构参数,然后可以根据LayerParameter中的
// include和exclude来确定该层是否应该包含在网络中,返回过滤过后的网络参数
NetParameter filtered_param;
FilterNet(in_param, &filtered_param);
LOG_IF(INFO, Caffe::root_solver())
<< "Initializing net from parameters: " << std::endl
<< filtered_param.DebugString();
<< "Initializing net from parameters: " << std::endl
<< filtered_param.DebugString();
// Create a copy of filtered_param with splits added where necessary.
/*
* InsertSplits函数,若某层的top(即输出)被两个或两个以上的层作为输入或输入的一部分,
* 则对该层增加空间位置与其成并列关系的一个或若干个SplitLayer。(每仔细看)
*/
NetParameter param;
InsertSplits(filtered_param, ¶m);
/*
* InsertSplits函数,若某层的top(即输出)被两个或两个以上的层作为输入或输入的一部分,
* 则对该层增加空间位置与其成并列关系的一个或若干个SplitLayer。(每仔细看)
*/
NetParameter param;
InsertSplits(filtered_param, ¶m);
// Basically, build all the layers and set up their connections.
name_ = param.name();
map<string, int> blob_name_to_idx;
//LOG(INFO) << " -> " <<" "<<(blob_name_to_idx).size()<<"heheda";
name_ = param.name();
map<string, int> blob_name_to_idx;
//LOG(INFO) << " -> " <<" "<<(blob_name_to_idx).size()<<"heheda";
//
set<string> available_blobs;
memory_used_ = 0;
set<string> available_blobs;
memory_used_ = 0;
// For each layer, set up its input and output
// resize是改变容器的大小,并且使用默认构造函数创建对象
// 参数初始化
bottom_vecs_.resize(param.layer_size());//存每一层的输入(bottom)blob指针
top_vecs_.resize(param.layer_size());//存每一层输出(top)的blob指针
bottom_id_vecs_.resize(param.layer_size());//存每一层输入(bottom)blob的id
param_id_vecs_.resize(param.layer_size());//存每一层参数blob的id
top_id_vecs_.resize(param.layer_size());//存每一层输出(top)的blob的id
bottom_need_backward_.resize(param.layer_size());//该blob是需要返回的bool值
// resize是改变容器的大小,并且使用默认构造函数创建对象
// 参数初始化
bottom_vecs_.resize(param.layer_size());//存每一层的输入(bottom)blob指针
top_vecs_.resize(param.layer_size());//存每一层输出(top)的blob指针
bottom_id_vecs_.resize(param.layer_size());//存每一层输入(bottom)blob的id
param_id_vecs_.resize(param.layer_size());//存每一层参数blob的id
top_id_vecs_.resize(param.layer_size());//存每一层输出(top)的blob的id
bottom_need_backward_.resize(param.layer_size());//该blob是需要返回的bool值
// 循环每一层
for (int layer_id = 0; layer_id < param.layer_size(); ++layer_id) {
// For non-root solvers, whether this layer is shared from root_net_.
// 默认为false
bool share_from_root = !Caffe::root_solver()
&& root_net_->layers_[layer_id]->ShareInParallel();
// 默认为false
bool share_from_root = !Caffe::root_solver()
&& root_net_->layers_[layer_id]->ShareInParallel();
// Inherit phase from net if unset.
// 如果每一层没有设置phase,则从网络参数中继承
if (!param.layer(layer_id).has_phase()) {
param.mutable_layer(layer_id)->set_phase(phase_);
}
// Setup layer.
// 每一层的结构参数常量
const LayerParameter& layer_param = param.layer(layer_id);
// 如果每一层没有设置phase,则从网络参数中继承
if (!param.layer(layer_id).has_phase()) {
param.mutable_layer(layer_id)->set_phase(phase_);
}
// Setup layer.
// 每一层的结构参数常量
const LayerParameter& layer_param = param.layer(layer_id);
// 是否设置了对输入求导,参考caffe.proto里LayerParameter的propagate_down参数
if (layer_param.propagate_down_size() > 0) {
CHECK_EQ(layer_param.propagate_down_size(),
layer_param.bottom_size())
<< "propagate_down param must be specified "
<< "either 0 or bottom_size times ";
}
if (layer_param.propagate_down_size() > 0) {
CHECK_EQ(layer_param.propagate_down_size(),
layer_param.bottom_size())
<< "propagate_down param must be specified "
<< "either 0 or bottom_size times ";
}
if (share_from_root) {
LOG(INFO) << "Sharing layer " << layer_param.name() << " from root net";
layers_.push_back(root_net_->layers_[layer_id]);
layers_[layer_id]->SetShared(true);
}
else {
//把每一特定层的指针存放在容器中
layers_.push_back(LayerRegistry<Dtype>::CreateLayer(layer_param));
layers_.push_back(LayerRegistry<Dtype>::CreateLayer(layer_param));
}
//存放网络中每一层的名称
layer_names_.push_back(layer_param.name());
LOG_IF(INFO, Caffe::root_solver())
<< "Creating Layer " << layer_param.name();
layer_names_.push_back(layer_param.name());
LOG_IF(INFO, Caffe::root_solver())
<< "Creating Layer " << layer_param.name();
// 判断每层是否需要反向传播
bool need_backward = false;
bool need_backward = false;
// Figure out this layer's input and output
// 计算这一层的输入和输出,注意第一层他妈的没有输出bottom,所以在第一层的时候并不会进入循环
// 这个地方耽误了半天,妈的
for (int bottom_id = 0; bottom_id < layer_param.bottom_size();
++bottom_id) {
//LOG(INFO) << " -> " <<" "<<(blob_name_to_idx).size()<<"sbheheda";
const int blob_id = AppendBottom(param, layer_id, bottom_id,
&available_blobs, &blob_name_to_idx);
// 计算这一层的输入和输出,注意第一层他妈的没有输出bottom,所以在第一层的时候并不会进入循环
// 这个地方耽误了半天,妈的
for (int bottom_id = 0; bottom_id < layer_param.bottom_size();
++bottom_id) {
//LOG(INFO) << " -> " <<" "<<(blob_name_to_idx).size()<<"sbheheda";
const int blob_id = AppendBottom(param, layer_id, bottom_id,
&available_blobs, &blob_name_to_idx);
need_backward |= blob_need_backward_[blob_id];
}
}
// 每一层输出数据的个数
int num_top = layer_param.top_size();
int num_top = layer_param.top_size();
// 对每层的每个输出数据
for (int top_id = 0; top_id < num_top; ++top_id) {
AppendTop(param, layer_id, top_id, &available_blobs, &blob_name_to_idx);
// Collect Input layer tops as Net inputs.
if (layer_param.type() == "Input") {
if (layer_param.type() == "Input") {
const int blob_id = blobs_.size() - 1;
net_input_blob_indices_.push_back(blob_id);
net_input_blobs_.push_back(blobs_[blob_id].get());
net_input_blob_indices_.push_back(blob_id);
net_input_blobs_.push_back(blobs_[blob_id].get());
}
}
}
// If the layer specifies that AutoTopBlobs() -> true and the LayerParameter
// specified fewer than the required number (as specified by
// ExactNumTopBlobs() or MinTopBlobs()), allocate them here.
Layer<Dtype>* layer = layers_[layer_id].get();
if (layer->AutoTopBlobs()) {
const int needed_num_top =
std::max(layer->MinTopBlobs(), layer->ExactNumTopBlobs());
for (; num_top < needed_num_top; ++num_top) {
// Add "anonymous" top blobs -- do not modify available_blobs or
// blob_name_to_idx as we don't want these blobs to be usable as input
// to other layers.
AppendTop(param, layer_id, num_top, NULL, NULL);
}
}
// specified fewer than the required number (as specified by
// ExactNumTopBlobs() or MinTopBlobs()), allocate them here.
Layer<Dtype>* layer = layers_[layer_id].get();
if (layer->AutoTopBlobs()) {
const int needed_num_top =
std::max(layer->MinTopBlobs(), layer->ExactNumTopBlobs());
for (; num_top < needed_num_top; ++num_top) {
// Add "anonymous" top blobs -- do not modify available_blobs or
// blob_name_to_idx as we don't want these blobs to be usable as input
// to other layers.
AppendTop(param, layer_id, num_top, NULL, NULL);
}
}
// After this layer is connected, set it up.
if (share_from_root) {
// Set up size of top blobs using root_net_
const vector<Blob<Dtype>*>& base_top = root_net_->top_vecs_[layer_id];
const vector<Blob<Dtype>*>& this_top = this->top_vecs_[layer_id];
for (int top_id = 0; top_id < base_top.size(); ++top_id) {
this_top[top_id]->ReshapeLike(*base_top[top_id]);
LOG(INFO) << "Created top blob " << top_id << " (shape: "
<< this_top[top_id]->shape_string() << ") for shared layer "
<< layer_param.name();
}
}
if (share_from_root) {
// Set up size of top blobs using root_net_
const vector<Blob<Dtype>*>& base_top = root_net_->top_vecs_[layer_id];
const vector<Blob<Dtype>*>& this_top = this->top_vecs_[layer_id];
for (int top_id = 0; top_id < base_top.size(); ++top_id) {
this_top[top_id]->ReshapeLike(*base_top[top_id]);
LOG(INFO) << "Created top blob " << top_id << " (shape: "
<< this_top[top_id]->shape_string() << ") for shared layer "
<< layer_param.name();
}
}
else {
// 设置layers实例
// 调用layer类的Setup函数进行初始化,输入参数:每个layer的输入blobs以及输出blobs
// 为每个blob设置大小
// 设置每一层的可学习参数,保存在layer的成员blobs_中
layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]);
}
// 设置layers实例
// 调用layer类的Setup函数进行初始化,输入参数:每个layer的输入blobs以及输出blobs
// 为每个blob设置大小
// 设置每一层的可学习参数,保存在layer的成员blobs_中
layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]);
}
LOG_IF(INFO, Caffe::root_solver())
<< "Setting up " << layer_names_[layer_id];
<< "Setting up " << layer_names_[layer_id];
// 对每一层的输出blobs循环
for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
if (blob_loss_weights_.size() <= top_id_vecs_[layer_id][top_id]) {
blob_loss_weights_.resize(top_id_vecs_[layer_id][top_id] + 1, Dtype(0));
}
blob_loss_weights_[top_id_vecs_[layer_id][top_id]] = layer->loss(top_id);
LOG_IF(INFO, Caffe::root_solver())
<< "Top shape: " << top_vecs_[layer_id][top_id]->shape_string();
blob_loss_weights_.resize(top_id_vecs_[layer_id][top_id] + 1, Dtype(0));
}
blob_loss_weights_[top_id_vecs_[layer_id][top_id]] = layer->loss(top_id);
LOG_IF(INFO, Caffe::root_solver())
<< "Top shape: " << top_vecs_[layer_id][top_id]->shape_string();
if (layer->loss(top_id)) {
LOG_IF(INFO, Caffe::root_solver())
<< " with loss weight " << layer->loss(top_id);
}
LOG_IF(INFO, Caffe::root_solver())
<< " with loss weight " << layer->loss(top_id);
}
// 计算网络所使用的字节数
memory_used_ += top_vecs_[layer_id][top_id]->count();
}
memory_used_ += top_vecs_[layer_id][top_id]->count();
}
// 打印目前所需的存储
LOG_IF(INFO, Caffe::root_solver())
<< "Memory required for data: " << memory_used_ * sizeof(Dtype);
LOG_IF(INFO, Caffe::root_solver())
<< "Memory required for data: " << memory_used_ * sizeof(Dtype);
// param通常用来设置学习率之类的参数,每层的param有多少个则说明至少有这么多个
// 可学习参数
const int param_size = layer_param.param_size();
// 可学习参数
const int param_size = layer_param.param_size();
//可学习参数个数
const int num_param_blobs = layers_[layer_id]->blobs().size();
const int num_param_blobs = layers_[layer_id]->blobs().size();
CHECK_LE(param_size, num_param_blobs)
<< "Too many params specified for layer " << layer_param.name();
<< "Too many params specified for layer " << layer_param.name();
ParamSpec default_param_spec;
// 对每一个可学习的参数循环
for (int param_id = 0; param_id < num_param_blobs; ++param_id) {
for (int param_id = 0; param_id < num_param_blobs; ++param_id) {
// 如果prototxt文件没有设置param,则使用默认param
const ParamSpec* param_spec = (param_id < param_size) ?
&layer_param.param(param_id) : &default_param_spec;
const ParamSpec* param_spec = (param_id < param_size) ?
&layer_param.param(param_id) : &default_param_spec;
// 学习率不等于0,表示需要对这个可学习的参数反向求导
const bool param_need_backward = param_spec->lr_mult() != 0;
need_backward |= param_need_backward;
const bool param_need_backward = param_spec->lr_mult() != 0;
need_backward |= param_need_backward;
layers_[layer_id]->set_param_propagate_down(param_id,
param_need_backward);
}
param_need_backward);
}
// 接下来的工作是将每层的parameter的指针塞进params_,尤其是learnable_params_。
// 对每一层的每个可学习参数循环
for (int param_id = 0; param_id < num_param_blobs; ++param_id) {
// param:整个网络参数,layer_id:层数id,param_id:可学习参数id
// 设置每一层权值的一些参数,学习率,正则率,参数id等
AppendParam(param, layer_id, param_id);
}
// 对每一层的每个可学习参数循环
for (int param_id = 0; param_id < num_param_blobs; ++param_id) {
// param:整个网络参数,layer_id:层数id,param_id:可学习参数id
// 设置每一层权值的一些参数,学习率,正则率,参数id等
AppendParam(param, layer_id, param_id);
}
// Finally, set the backward flag
// 最后设置反向传播标志
layer_need_backward_.push_back(need_backward);
if (need_backward) {
for (int top_id = 0; top_id < top_id_vecs_[layer_id].size(); ++top_id) {
blob_need_backward_[top_id_vecs_[layer_id][top_id]] = true;
}
}
}
// 每一层的循环在这里结束
// 最后设置反向传播标志
layer_need_backward_.push_back(need_backward);
if (need_backward) {
for (int top_id = 0; top_id < top_id_vecs_[layer_id].size(); ++top_id) {
blob_need_backward_[top_id_vecs_[layer_id][top_id]] = true;
}
}
}
// 每一层的循环在这里结束
// Go through the net backwards to determine which blobs contribute to the
// loss. We can skip backward computation for blobs that don't contribute
// to the loss.
// 寻找反向传播过程中哪些blobs对最终的loss有影响,如果某个blob对最终的loss没有贡献,
// 则不需要对这个blob求梯度
// Also checks if all bottom blobs don't need backward computation (possible
// because the skip_propagate_down param) and so we can skip bacward
// computation for the entire layer
// 还要检查是否所有的bottom blobs都不需要求梯度
// loss. We can skip backward computation for blobs that don't contribute
// to the loss.
// 寻找反向传播过程中哪些blobs对最终的loss有影响,如果某个blob对最终的loss没有贡献,
// 则不需要对这个blob求梯度
// Also checks if all bottom blobs don't need backward computation (possible
// because the skip_propagate_down param) and so we can skip bacward
// computation for the entire layer
// 还要检查是否所有的bottom blobs都不需要求梯度
set<string> blobs_under_loss;
set<string> blobs_skip_backp;
set<string> blobs_skip_backp;
// 对每一层从后向前循环
for (int layer_id = layers_.size() - 1; layer_id >= 0; --layer_id) {
for (int layer_id = layers_.size() - 1; layer_id >= 0; --layer_id) {
bool layer_contributes_loss = false;
bool layer_skip_propagate_down = true;
bool layer_skip_propagate_down = true;
// 对每一层的每个top blob循环
for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
const string& blob_name = blob_names_[top_id_vecs_[layer_id][top_id]];
for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
const string& blob_name = blob_names_[top_id_vecs_[layer_id][top_id]];
if (layers_[layer_id]->loss(top_id) ||
(blobs_under_loss.find(blob_name) != blobs_under_loss.end())) {
layer_contributes_loss = true;
}
(blobs_under_loss.find(blob_name) != blobs_under_loss.end())) {
layer_contributes_loss = true;
}
if (blobs_skip_backp.find(blob_name) == blobs_skip_backp.end()) {
layer_skip_propagate_down = false;
}
layer_skip_propagate_down = false;
}
// 只要这一层有一个blob对loss有贡献,就说明这层对loss有贡献
if (layer_contributes_loss && !layer_skip_propagate_down)
break;
}
if (layer_contributes_loss && !layer_skip_propagate_down)
break;
}
// If this layer can skip backward computation, also all his bottom blobs
// don't need backpropagation
// 如果这一层跳过梯度计算,那么这一层所有的输入blobs都不需要计算梯度
if (layer_need_backward_[layer_id] && layer_skip_propagate_down) {
layer_need_backward_[layer_id] = false;
for (int bottom_id = 0; bottom_id < bottom_vecs_[layer_id].size();
++bottom_id) {
bottom_need_backward_[layer_id][bottom_id] = false;
}
}
// don't need backpropagation
// 如果这一层跳过梯度计算,那么这一层所有的输入blobs都不需要计算梯度
if (layer_need_backward_[layer_id] && layer_skip_propagate_down) {
layer_need_backward_[layer_id] = false;
for (int bottom_id = 0; bottom_id < bottom_vecs_[layer_id].size();
++bottom_id) {
bottom_need_backward_[layer_id][bottom_id] = false;
}
}
if (!layer_contributes_loss) { layer_need_backward_[layer_id] = false; }
if (Caffe::root_solver()) {
if (layer_need_backward_[layer_id]) {
LOG(INFO) << layer_names_[layer_id] << " needs backward computation.";
}
else {
LOG(INFO) << layer_names_[layer_id]
<< " does not need backward computation.";
}
}
if (layer_need_backward_[layer_id]) {
LOG(INFO) << layer_names_[layer_id] << " needs backward computation.";
}
else {
LOG(INFO) << layer_names_[layer_id]
<< " does not need backward computation.";
}
}
for (int bottom_id = 0; bottom_id < bottom_vecs_[layer_id].size();
++bottom_id) {
if (layer_contributes_loss) {
const string& blob_name =
blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
blobs_under_loss.insert(blob_name);
}
else {
bottom_need_backward_[layer_id][bottom_id] = false;
}
if (!bottom_need_backward_[layer_id][bottom_id]) {
const string& blob_name =
blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
blobs_skip_backp.insert(blob_name);
}
}
}
// 从后向前循环结束
++bottom_id) {
if (layer_contributes_loss) {
const string& blob_name =
blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
blobs_under_loss.insert(blob_name);
}
else {
bottom_need_backward_[layer_id][bottom_id] = false;
}
if (!bottom_need_backward_[layer_id][bottom_id]) {
const string& blob_name =
blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
blobs_skip_backp.insert(blob_name);
}
}
}
// 从后向前循环结束
// Handle force_backward if needed.
// 如果设置强制计算梯度
if (param.force_backward()) {
for (int layer_id = 0; layer_id < layers_.size(); ++layer_id) {
layer_need_backward_[layer_id] = true;
for (int bottom_id = 0;
bottom_id < bottom_need_backward_[layer_id].size(); ++bottom_id) {
bottom_need_backward_[layer_id][bottom_id] =
bottom_need_backward_[layer_id][bottom_id] ||
layers_[layer_id]->AllowForceBackward(bottom_id);
blob_need_backward_[bottom_id_vecs_[layer_id][bottom_id]] =
blob_need_backward_[bottom_id_vecs_[layer_id][bottom_id]] ||
bottom_need_backward_[layer_id][bottom_id];
}
for (int param_id = 0; param_id < layers_[layer_id]->blobs().size();
++param_id) {
layers_[layer_id]->set_param_propagate_down(param_id, true);
}
}
}
// In the end, all remaining blobs are considered output blobs.
// 最后,输入输出blob中除了输入blob剩下的都作为网络的输出,比如loss blob
for (set<string>::iterator it = available_blobs.begin();
it != available_blobs.end(); ++it) {
LOG_IF(INFO, Caffe::root_solver())
<< "This network produces output " << *it;
net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get());
net_output_blob_indices_.push_back(blob_name_to_idx[*it]);
}
// 最后,输入输出blob中除了输入blob剩下的都作为网络的输出,比如loss blob
for (set<string>::iterator it = available_blobs.begin();
it != available_blobs.end(); ++it) {
LOG_IF(INFO, Caffe::root_solver())
<< "This network produces output " << *it;
net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get());
net_output_blob_indices_.push_back(blob_name_to_idx[*it]);
}
for (size_t blob_id = 0; blob_id < blob_names_.size(); ++blob_id) {
blob_names_index_[blob_names_[blob_id]] = blob_id;
}
blob_names_index_[blob_names_[blob_id]] = blob_id;
}
for (size_t layer_id = 0; layer_id < layer_names_.size(); ++layer_id) {
layer_names_index_[layer_names_[layer_id]] = layer_id;
}
layer_names_index_[layer_names_[layer_id]] = layer_id;
}
ShareWeights();
debug_info_ = param.debug_info();
LOG_IF(INFO, Caffe::root_solver()) << "Network initialization done.";
}
//在Net<Dtype>::Init中有几个重要的函数如下:
// 每一层的结构参数常量
LayerParameter layer_param_
vector<shared_ptr<Layer<Dtype> > > layers_
const LayerParameter& layer_param = param.layer(layer_id);
layers_.push_back(LayerRegistry<Dtype>::CreateLayer(layer_param));//把每一特定层的指针存放在容器中
// Get a layer using a LayerParameter.
// 通过LayerParameter,返回特定层的实例智能指针
static shared_ptr<Layer<Dtype> > CreateLayer(const LayerParameter& param) {
if (Caffe::root_solver()) {
LOG(INFO) << "Creating layer " << param.name();
}
const string& type = param.type();
CreatorRegistry& registry = Registry();
CHECK_EQ(registry.count(type), 1) << "Unknown layer type: " << type
<< " (known types: " << LayerTypeListString() << ")";
return registry[type](param);
}
return registry[type](param)会调用611行return shared_ptr<Layer<Dtype> >(new type##Layer<Dtype>(param));
type##Layer继承于Layer,所以先调用基类的构造函数Layer,如下:
// 构造方法只复制层参数说明的值,如果层说明参数中提供了权值和偏置参数,也复制
// 继承自Layer类的子类都会显示的调用Layer的构造函数
explicit Layer(const LayerParameter& param)
: layer_param_(param), is_shared_(false) {
// Set phase and copy blobs (if there are any).
phase_ = param.phase(); //训练还是测试
debug_info_ = param.debug_info();
LOG_IF(INFO, Caffe::root_solver()) << "Network initialization done.";
}
//在Net<Dtype>::Init中有几个重要的函数如下:
// 每一层的结构参数常量
LayerParameter layer_param_
vector<shared_ptr<Layer<Dtype> > > layers_
const LayerParameter& layer_param = param.layer(layer_id);
layers_.push_back(LayerRegistry<Dtype>::CreateLayer(layer_param));//把每一特定层的指针存放在容器中
// Get a layer using a LayerParameter.
// 通过LayerParameter,返回特定层的实例智能指针
static shared_ptr<Layer<Dtype> > CreateLayer(const LayerParameter& param) {
if (Caffe::root_solver()) {
LOG(INFO) << "Creating layer " << param.name();
}
const string& type = param.type();
CreatorRegistry& registry = Registry();
CHECK_EQ(registry.count(type), 1) << "Unknown layer type: " << type
<< " (known types: " << LayerTypeListString() << ")";
return registry[type](param);
}
return registry[type](param)会调用611行return shared_ptr<Layer<Dtype> >(new type##Layer<Dtype>(param));
type##Layer继承于Layer,所以先调用基类的构造函数Layer,如下:
// 构造方法只复制层参数说明的值,如果层说明参数中提供了权值和偏置参数,也复制
// 继承自Layer类的子类都会显示的调用Layer的构造函数
explicit Layer(const LayerParameter& param)
: layer_param_(param), is_shared_(false) {
// Set phase and copy blobs (if there are any).
phase_ = param.phase(); //训练还是测试
// 在layer类中被初始化,如果blobs_size() > 0
// 在prototxt文件中一般没有提供blobs参数,所以这段代码一般不执行
if (layer_param_.blobs_size() > 0) {
blobs_.resize(layer_param_.blobs_size());
// 在prototxt文件中一般没有提供blobs参数,所以这段代码一般不执行
if (layer_param_.blobs_size() > 0) {
blobs_.resize(layer_param_.blobs_size());
for (int i = 0; i < layer_param_.blobs_size(); ++i) {
blobs_[i].reset(new Blob<Dtype>());
blobs_[i]->FromProto(layer_param_.blobs(i));
}
}
}
//层的注册类似于solver,在src\caffe\layers\中每层末尾都有REGISTER_LAYER_CLASS(XXX)进行注册,在layer_factory.hpp中
/*
* 宏 REGISTER_LAYER_CLASS 为每个type生成了create方法,并和type一起注册到了LayerRegistry中
* ,保存在一个map里面。
*/
#define REGISTER_LAYER_CLASS(type) \
template <typename Dtype> \
shared_ptr<Layer<Dtype> > Creator_##type##Layer(const LayerParameter& param) \
{ \
return shared_ptr<Layer<Dtype> >(new type##Layer<Dtype>(param)); \
} \
REGISTER_LAYER_CREATOR(type, Creator_##type##Layer)
blobs_[i].reset(new Blob<Dtype>());
blobs_[i]->FromProto(layer_param_.blobs(i));
}
}
}
//层的注册类似于solver,在src\caffe\layers\中每层末尾都有REGISTER_LAYER_CLASS(XXX)进行注册,在layer_factory.hpp中
/*
* 宏 REGISTER_LAYER_CLASS 为每个type生成了create方法,并和type一起注册到了LayerRegistry中
* ,保存在一个map里面。
*/
#define REGISTER_LAYER_CLASS(type) \
template <typename Dtype> \
shared_ptr<Layer<Dtype> > Creator_##type##Layer(const LayerParameter& param) \
{ \
return shared_ptr<Layer<Dtype> >(new type##Layer<Dtype>(param)); \
} \
REGISTER_LAYER_CREATOR(type, Creator_##type##Layer)
}
#define REGISTER_LAYER_CREATOR(type, creator) \
static LayerRegisterer<float> g_creator_f_##type(#type, creator<float>); \
static LayerRegisterer<double> g_creator_d_##type(#type, creator<double>) \
#define REGISTER_LAYER_CREATOR(type, creator) \
static LayerRegisterer<float> g_creator_f_##type(#type, creator<float>); \
static LayerRegisterer<double> g_creator_d_##type(#type, creator<double>) \
template <typename Dtype>
class LayerRegisterer {
public:
LayerRegisterer(const string& type,
shared_ptr<Layer<Dtype> > (*creator)(const LayerParameter&))
{
// LOG(INFO) << "Registering layer type: " << type;
LayerRegistry<Dtype>::AddCreator(type, creator);
}
};
// Adds a creator.
// 给定类型,以及函数指针,加入到注册表
static void AddCreator(const string& type, Creator creator) {
class LayerRegisterer {
public:
LayerRegisterer(const string& type,
shared_ptr<Layer<Dtype> > (*creator)(const LayerParameter&))
{
// LOG(INFO) << "Registering layer type: " << type;
LayerRegistry<Dtype>::AddCreator(type, creator);
}
};
// Adds a creator.
// 给定类型,以及函数指针,加入到注册表
static void AddCreator(const string& type, Creator creator) {
CreatorRegistry& registry = Registry();
static CreatorRegistry& Registry() {
static CreatorRegistry* g_registry_ = new CreatorRegistry();
return *g_registry_;
}
static CreatorRegistry& Registry() {
static CreatorRegistry* g_registry_ = new CreatorRegistry();
return *g_registry_;
}
/*
for (typename CreatorRegistry::iterator iter = registry.begin();
iter != registry.end(); ++iter) {
std::cout<<"Layer names:"<<(iter->first)<<std::endl;
}
*/
for (typename CreatorRegistry::iterator iter = registry.begin();
iter != registry.end(); ++iter) {
std::cout<<"Layer names:"<<(iter->first)<<std::endl;
}
*/
CHECK_EQ(registry.count(type), 0)
<< "Layer type " << type << " already registered.";
<< "Layer type " << type << " already registered.";
registry[type] = creator;
}
在Net<Dtype>::Init中重要的函数:
// 设置layers实例
// 调用layer类的Setup函数进行初始化,输入参数:每个layer的输入blobs以及输出blobs
// 为每个blob设置大小
// 设置每一层的可学习参数,保存在layer的成员blobs_中
layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]);
//在layer.hpp中
// 1. 检查输入输出blob个数是否满足要求,每个层能处理的输入输出数据不一样
// * 2. 调用LayerSetUp函数初始化特殊的层,每个Layer子类需重写这个函数完成定制的初始化
// * 3. 调用Reshape函数为top blob分配合适大小的存储空间
// * 4. 为每个top blob设置损失权重乘子,非LossLayer为的top blob其值为零
// *
// * 此方法非虚函数,不用重写,模式固定
//bottom 层的输入数据,blob中的存储空间已申请,
//top 层的输出数据,blob对象已构造但是其中的存储空间未申请,
//具体空间大小需根据bottom blob大小和layer_param_共同决定,具体在Reshape函数现实.
void SetUp(const vector<Blob<Dtype>*>& bottom, //在模型初始化时重置 layers 及其相互之间的连接 ;
const vector<Blob<Dtype>*>& top) {
InitMutex();
CheckBlobCounts(bottom, top);
LayerSetUp(bottom, top);
Reshape(bottom, top);
SetLossWeights(top);
}
// ----------------------------------网络初始化完成--------------------------------------------
//在tools/caffe.cpp的int train()中,紧接着
shared_ptr<caffe::Solver<float> > solver(caffe::SolverRegistry<float>::CreateSolver(solver_param))之后是
solver->SetActionFunction(signal_handler.GetActionFunction());具体介绍见上述博客;再之后是:
solver->Solve();
在solver.hpp中有:
// solver函数的主要入口,默认iter为0。非0的iter输入到预训练的网络中来进行继续训练。
virtual void Solve(const char* resume_file = NULL);//注意不是构造函数
在solver.cpp中有:
/*
对整个网络进行训练(也就是你运行Caffe训练某个模型)的时候,实际上是在运行caffe.cpp中的
train()函数,而这个函数实际上是实例化一个Solver对象,初始化后调用了Solver中的Solve()方法
调用此方法训练网络,其中会调用Step()方法来迭代,迭代 param_.max_iter() - iter_ 次
*/
template <typename Dtype>
void Solver<Dtype>::Solve(const char* resume_file) {
// 检查当前是否是root_solver(多GPU模式下,只有root_solver才运行这一部分的代码)
CHECK(Caffe::root_solver());//为真
LOG(INFO) << "Solving " << net_->name();
LOG(INFO) << "Learning Rate Policy: " << param_.lr_policy();
}
在Net<Dtype>::Init中重要的函数:
// 设置layers实例
// 调用layer类的Setup函数进行初始化,输入参数:每个layer的输入blobs以及输出blobs
// 为每个blob设置大小
// 设置每一层的可学习参数,保存在layer的成员blobs_中
layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]);
//在layer.hpp中
// 1. 检查输入输出blob个数是否满足要求,每个层能处理的输入输出数据不一样
// * 2. 调用LayerSetUp函数初始化特殊的层,每个Layer子类需重写这个函数完成定制的初始化
// * 3. 调用Reshape函数为top blob分配合适大小的存储空间
// * 4. 为每个top blob设置损失权重乘子,非LossLayer为的top blob其值为零
// *
// * 此方法非虚函数,不用重写,模式固定
//bottom 层的输入数据,blob中的存储空间已申请,
//top 层的输出数据,blob对象已构造但是其中的存储空间未申请,
//具体空间大小需根据bottom blob大小和layer_param_共同决定,具体在Reshape函数现实.
void SetUp(const vector<Blob<Dtype>*>& bottom, //在模型初始化时重置 layers 及其相互之间的连接 ;
const vector<Blob<Dtype>*>& top) {
InitMutex();
CheckBlobCounts(bottom, top);
LayerSetUp(bottom, top);
Reshape(bottom, top);
SetLossWeights(top);
}
// ----------------------------------网络初始化完成--------------------------------------------
//在tools/caffe.cpp的int train()中,紧接着
shared_ptr<caffe::Solver<float> > solver(caffe::SolverRegistry<float>::CreateSolver(solver_param))之后是
solver->SetActionFunction(signal_handler.GetActionFunction());具体介绍见上述博客;再之后是:
solver->Solve();
在solver.hpp中有:
// solver函数的主要入口,默认iter为0。非0的iter输入到预训练的网络中来进行继续训练。
virtual void Solve(const char* resume_file = NULL);//注意不是构造函数
在solver.cpp中有:
/*
对整个网络进行训练(也就是你运行Caffe训练某个模型)的时候,实际上是在运行caffe.cpp中的
train()函数,而这个函数实际上是实例化一个Solver对象,初始化后调用了Solver中的Solve()方法
调用此方法训练网络,其中会调用Step()方法来迭代,迭代 param_.max_iter() - iter_ 次
*/
template <typename Dtype>
void Solver<Dtype>::Solve(const char* resume_file) {
// 检查当前是否是root_solver(多GPU模式下,只有root_solver才运行这一部分的代码)
CHECK(Caffe::root_solver());//为真
LOG(INFO) << "Solving " << net_->name();
LOG(INFO) << "Learning Rate Policy: " << param_.lr_policy();
// Initialize to false every time we start solving.
// requested_early_exit_`一开始被赋值为false,也就是现在没有要求在优化结束前退出
requested_early_exit_ = false;
// 判断`resume_file`这个指针是否NULL,
//如果不是则需要从resume_file存储的路径里读取之前训练的状态
if (resume_file) {
LOG(INFO) << "Restoring previous solver status from " << resume_file;
Restore(resume_file);
}
// requested_early_exit_`一开始被赋值为false,也就是现在没有要求在优化结束前退出
requested_early_exit_ = false;
// 判断`resume_file`这个指针是否NULL,
//如果不是则需要从resume_file存储的路径里读取之前训练的状态
if (resume_file) {
LOG(INFO) << "Restoring previous solver status from " << resume_file;
Restore(resume_file);
}
// For a network that is trained by the solver, no bottom or top vecs
// should be given, and we will just provide dummy vecs.
int start_iter = iter_;
//对于一个正在训练的网络,没有bottom或top向量被给,而且仅仅提供dummy vecs
// should be given, and we will just provide dummy vecs.
int start_iter = iter_;
//对于一个正在训练的网络,没有bottom或top向量被给,而且仅仅提供dummy vecs
// 然后调用了'Step'函数,这个函数执行了实际的逐步的迭代过程
// 最大迭代次数
Step(param_.max_iter() - iter_);
// 最大迭代次数
Step(param_.max_iter() - iter_);
// If we haven't already, save a snapshot after optimization, unless
// overridden by setting snapshot_after_train := false
// 迭代结束或者遇到系统信号提前结束后,判断是否需要在训练结束之后snapshot
// 这个可以在solver.prototxt里设置
if (param_.snapshot_after_train()
&& (!param_.snapshot() || iter_ % param_.snapshot() != 0)) {
Snapshot();
}
// 如果在`Step`函数的迭代过程中遇到了系统信号,且我们的处理方式设置为`STOP`,
// 那么`requested_early_exit_`会被修改为true,迭代提前结束,输出相关信息
if (requested_early_exit_) {
LOG(INFO) << "Optimization stopped early.";
return;
}
// After the optimization is done, run an additional train and test pass to
// display the train and test loss/outputs if appropriate (based on the
// display and test_interval settings, respectively). Unlike in the rest of
// training, for the train net we only run a forward pass as we've already
// updated the parameters "max_iter" times -- this final pass is only done to
// display the loss, which is computed in the forward pass.
// 优化完后,运行一个额外的训练和测试过程展示训练测试的loss或者输出。
// 判断是否需要输出最后的loss
if (param_.display() && iter_ % param_.display() == 0) {
int average_loss = this->param_.average_loss();
Dtype loss;
net_->Forward(&loss);
// overridden by setting snapshot_after_train := false
// 迭代结束或者遇到系统信号提前结束后,判断是否需要在训练结束之后snapshot
// 这个可以在solver.prototxt里设置
if (param_.snapshot_after_train()
&& (!param_.snapshot() || iter_ % param_.snapshot() != 0)) {
Snapshot();
}
// 如果在`Step`函数的迭代过程中遇到了系统信号,且我们的处理方式设置为`STOP`,
// 那么`requested_early_exit_`会被修改为true,迭代提前结束,输出相关信息
if (requested_early_exit_) {
LOG(INFO) << "Optimization stopped early.";
return;
}
// After the optimization is done, run an additional train and test pass to
// display the train and test loss/outputs if appropriate (based on the
// display and test_interval settings, respectively). Unlike in the rest of
// training, for the train net we only run a forward pass as we've already
// updated the parameters "max_iter" times -- this final pass is only done to
// display the loss, which is computed in the forward pass.
// 优化完后,运行一个额外的训练和测试过程展示训练测试的loss或者输出。
// 判断是否需要输出最后的loss
if (param_.display() && iter_ % param_.display() == 0) {
int average_loss = this->param_.average_loss();
Dtype loss;
net_->Forward(&loss);
UpdateSmoothedLoss(loss, start_iter, average_loss);
LOG(INFO) << "Iteration " << iter_ << ", loss = " << smoothed_loss_;
}
// 判断是否需要最后Test
if (param_.test_interval() && iter_ % param_.test_interval() == 0) {
TestAll();
}
LOG(INFO) << "Optimization Done.";
}
//-----------------------------------Step(param_.max_iter() - iter_)-----------函数
其中的Step(param_.max_iter() - iter_)函数也在solver.cpp中
template <typename Dtype>
void Solver<Dtype>::Step(int iters) {
// 设置开始的迭代次数(如果是从之前的snapshot恢复的,那iter_
// 等于snapshot时的迭代次数)和结束的迭代次数
const int start_iter = iter_;
}
// 判断是否需要最后Test
if (param_.test_interval() && iter_ % param_.test_interval() == 0) {
TestAll();
}
LOG(INFO) << "Optimization Done.";
}
//-----------------------------------Step(param_.max_iter() - iter_)-----------函数
其中的Step(param_.max_iter() - iter_)函数也在solver.cpp中
template <typename Dtype>
void Solver<Dtype>::Step(int iters) {
// 设置开始的迭代次数(如果是从之前的snapshot恢复的,那iter_
// 等于snapshot时的迭代次数)和结束的迭代次数
const int start_iter = iter_;
// iters = param_.max_iter() - iter_
const int stop_iter = iter_ + iters;
const int stop_iter = iter_ + iters;
// 输出的loss为前average_loss次loss的平均值,在solver.prototxt里设置,默认为1,
// losses存储之前的average_loss个loss,smoothed_loss为最后要输出的均值
int average_loss = this->param_.average_loss();//默认为1
losses_.clear();
smoothed_loss_ = 0;
// losses存储之前的average_loss个loss,smoothed_loss为最后要输出的均值
int average_loss = this->param_.average_loss();//默认为1
losses_.clear();
smoothed_loss_ = 0;
//迭代
while (iter_ < stop_iter) {
// zero-init the params
// 清空上一次所有参数的梯度
net_->ClearParamDiffs();
while (iter_ < stop_iter) {
// zero-init the params
// 清空上一次所有参数的梯度
net_->ClearParamDiffs();
// test_initialization默认为true
// 判断是否需要测试
if (param_.test_interval() && iter_ % param_.test_interval() == 0
&& (iter_ > 0 || param_.test_initialization())
&& Caffe::root_solver()) {
TestAll();
// 判断是否需要测试
if (param_.test_interval() && iter_ % param_.test_interval() == 0
&& (iter_ > 0 || param_.test_initialization())
&& Caffe::root_solver()) {
TestAll();
// 判断是否需要提前结束迭代
if (requested_early_exit_) {
// Break out of the while loop because stop was requested while testing.
break;
}
}
if (requested_early_exit_) {
// Break out of the while loop because stop was requested while testing.
break;
}
}
for (int i = 0; i < callbacks_.size(); ++i) {
callbacks_[i]->on_start();
}
callbacks_[i]->on_start();
}
// 判断当前迭代次数是否需要显示loss等信息
const bool display = param_.display() && iter_ % param_.display() == 0;
net_->set_debug_info(display && param_.debug_info());
// accumulate the loss and gradient
Dtype loss = 0;
const bool display = param_.display() && iter_ % param_.display() == 0;
net_->set_debug_info(display && param_.debug_info());
// accumulate the loss and gradient
Dtype loss = 0;
// iter_size也是在solver.prototxt里设置,实际上的batch_size=iter_size*网络定义里的batch_size,
// 因此每一次迭代的loss是iter_size次迭代的和,再除以iter_size,这个loss是通过调用`Net::ForwardBackward`函数得到的
// 这个设置我的理解是在GPU的显存不够的时候使用,比如我本来想把batch_size设置为128,但是会out_of_memory,
// 借助这个方法,可以设置batch_size=32,iter_size=4,那实际上每次迭代还是处理了128个数据。
// accumulate gradients over `iter_size` x `batch_size` instances
for (int i = 0; i < param_.iter_size(); ++i) {
/*
* 调用了Net中的代码,主要完成了前向后向的计算,
* 前向用于计算模型的最终输出和Loss,后向用于
* 计算每一层网络和参数的梯度。
*/
loss += net_->ForwardBackward();
}
// 因此每一次迭代的loss是iter_size次迭代的和,再除以iter_size,这个loss是通过调用`Net::ForwardBackward`函数得到的
// 这个设置我的理解是在GPU的显存不够的时候使用,比如我本来想把batch_size设置为128,但是会out_of_memory,
// 借助这个方法,可以设置batch_size=32,iter_size=4,那实际上每次迭代还是处理了128个数据。
// accumulate gradients over `iter_size` x `batch_size` instances
for (int i = 0; i < param_.iter_size(); ++i) {
/*
* 调用了Net中的代码,主要完成了前向后向的计算,
* 前向用于计算模型的最终输出和Loss,后向用于
* 计算每一层网络和参数的梯度。
*/
loss += net_->ForwardBackward();
}
//accumulate(累积) gradients over `iter_size` x `batch_size` instances。
//默认情况下,iter_size=1,即默认情况下,一个iteratio一个batch
loss /= param_.iter_size();
//默认情况下,iter_size=1,即默认情况下,一个iteratio一个batch
loss /= param_.iter_size();
// 计算要输出的smoothed_loss,如果losses里还没有存够average_loss个loss
//则将当前的loss插入,如果已经存够了,则将之前的替换掉
// average the loss across iterations for smoothed reporting
/*
* 这个函数主要做Loss的平滑。由于Caffe的训练方式是SGD,我们无法把所有的数据同时
* 放入模型进行训练,那么部分数据产生的Loss就可能会和全样本的平均Loss不同,在必要
* 时候将Loss和历史过程中更新的Loss求平均就可以减少Loss的震荡问题。
*/
UpdateSmoothedLoss(loss, start_iter, average_loss);
//则将当前的loss插入,如果已经存够了,则将之前的替换掉
// average the loss across iterations for smoothed reporting
/*
* 这个函数主要做Loss的平滑。由于Caffe的训练方式是SGD,我们无法把所有的数据同时
* 放入模型进行训练,那么部分数据产生的Loss就可能会和全样本的平均Loss不同,在必要
* 时候将Loss和历史过程中更新的Loss求平均就可以减少Loss的震荡问题。
*/
UpdateSmoothedLoss(loss, start_iter, average_loss);
//输出当前迭代信息
if (display) {
LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << iter_
<< ", loss = " << smoothed_loss_;
const vector<Blob<Dtype>*>& result = net_->output_blobs();
int score_index = 0;
for (int j = 0; j < result.size(); ++j) {
const Dtype* result_vec = result[j]->cpu_data();
const string& output_name =
net_->blob_names()[net_->output_blob_indices()[j]];
const Dtype loss_weight =
net_->blob_loss_weights()[net_->output_blob_indices()[j]];
for (int k = 0; k < result[j]->count(); ++k) {
ostringstream loss_msg_stream;
if (loss_weight) {
loss_msg_stream << " (* " << loss_weight
<< " = " << loss_weight * result_vec[k] << " loss)";
}
LOG_IF(INFO, Caffe::root_solver()) << " Train net output #"
<< score_index++ << ": " << output_name << " = "
<< result_vec[k] << loss_msg_stream.str();
}
}
}
if (display) {
LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << iter_
<< ", loss = " << smoothed_loss_;
const vector<Blob<Dtype>*>& result = net_->output_blobs();
int score_index = 0;
for (int j = 0; j < result.size(); ++j) {
const Dtype* result_vec = result[j]->cpu_data();
const string& output_name =
net_->blob_names()[net_->output_blob_indices()[j]];
const Dtype loss_weight =
net_->blob_loss_weights()[net_->output_blob_indices()[j]];
for (int k = 0; k < result[j]->count(); ++k) {
ostringstream loss_msg_stream;
if (loss_weight) {
loss_msg_stream << " (* " << loss_weight
<< " = " << loss_weight * result_vec[k] << " loss)";
}
LOG_IF(INFO, Caffe::root_solver()) << " Train net output #"
<< score_index++ << ": " << output_name << " = "
<< result_vec[k] << loss_msg_stream.str();
}
}
}
for (int i = 0; i < callbacks_.size(); ++i) {
callbacks_[i]->on_gradients_ready();
}
// 执行梯度的更新,这个函数在基类`Solver`中没有实现,会调用每个子类自己的实现
//,后面具体分析`SGDSolver`的实现
ApplyUpdate();
//,后面具体分析`SGDSolver`的实现
ApplyUpdate();
// Increment the internal iter_ counter -- its value should always indicate
// the number of times the weights have been updated.
// 迭代次数加1
++iter_;
// 调用GetRequestedAction,实际是通过action_request_function_函数指针调用之前设置好(通过`SetRequestedAction`)的
// signal_handler的`CheckForSignals`函数,这个函数的作用是
// 会根据之前是否遇到系统信号以及信号的类型和我们设置(或者默认)的方式返回处理的方式
SolverAction::Enum request = GetRequestedAction();
// the number of times the weights have been updated.
// 迭代次数加1
++iter_;
// 调用GetRequestedAction,实际是通过action_request_function_函数指针调用之前设置好(通过`SetRequestedAction`)的
// signal_handler的`CheckForSignals`函数,这个函数的作用是
// 会根据之前是否遇到系统信号以及信号的类型和我们设置(或者默认)的方式返回处理的方式
SolverAction::Enum request = GetRequestedAction();
// Save a snapshot if needed.
// 判断当前迭代是否需要snapshot,如果request等于`SNAPSHOT`则也需要
if ((param_.snapshot()
&& iter_ % param_.snapshot() == 0
&& Caffe::root_solver()) ||
(request == SolverAction::SNAPSHOT)) {
Snapshot();
}
// 如果request为`STOP`则修改`requested_early_exit_`为true,之后就会提前结束迭代
if (SolverAction::STOP == request) {
requested_early_exit_ = true;
// Break out of training loop.
break;
}
}
}
//------------------------------------loss += net_->ForwardBackward()函数--------------------------
loss += net_->ForwardBackward()
在net.hpp中:
// 进行一次正向传播,一次反向传播
Dtype ForwardBackward() {
Dtype loss;
Forward(&loss);
Backward();
return loss;
}
// 前向传播
template <typename Dtype>
const vector<Blob<Dtype>*>& Net<Dtype>::Forward(Dtype* loss) {
// 判断当前迭代是否需要snapshot,如果request等于`SNAPSHOT`则也需要
if ((param_.snapshot()
&& iter_ % param_.snapshot() == 0
&& Caffe::root_solver()) ||
(request == SolverAction::SNAPSHOT)) {
Snapshot();
}
// 如果request为`STOP`则修改`requested_early_exit_`为true,之后就会提前结束迭代
if (SolverAction::STOP == request) {
requested_early_exit_ = true;
// Break out of training loop.
break;
}
}
}
//------------------------------------loss += net_->ForwardBackward()函数--------------------------
loss += net_->ForwardBackward()
在net.hpp中:
// 进行一次正向传播,一次反向传播
Dtype ForwardBackward() {
Dtype loss;
Forward(&loss);
Backward();
return loss;
}
// 前向传播
template <typename Dtype>
const vector<Blob<Dtype>*>& Net<Dtype>::Forward(Dtype* loss) {
//应该是训练过程的前向传播
if (loss != NULL) {
*loss = ForwardFromTo(0, layers_.size() - 1);比如层一共有12层应该是0~11.
}
if (loss != NULL) {
*loss = ForwardFromTo(0, layers_.size() - 1);比如层一共有12层应该是0~11.
}
else {
ForwardFromTo(0, layers_.size() - 1);
}
return net_output_blobs_;
}
//----------------------------------ForwardFromTo(0, layers_.size() - 1)函数-----------------------
在net.cpp中有:
template <typename Dtype>
Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
CHECK_GE(start, 0);
CHECK_LT(end, layers_.size());
Dtype loss = 0;
for (int i = start; i <= end; ++i) {
// LOG(ERROR) << "Forwarding " << layer_names_[i];
ForwardFromTo(0, layers_.size() - 1);
}
return net_output_blobs_;
}
//----------------------------------ForwardFromTo(0, layers_.size() - 1)函数-----------------------
在net.cpp中有:
template <typename Dtype>
Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
CHECK_GE(start, 0);
CHECK_LT(end, layers_.size());
Dtype loss = 0;
for (int i = start; i <= end; ++i) {
// LOG(ERROR) << "Forwarding " << layer_names_[i];
// 对每一层进行前向计算,返回每层的loss,其实只有最后一层loss不为0
Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i]);
loss += layer_loss;
if (debug_info_) { ForwardDebugInfo(i); }
}
return loss;
}
//----------------------------layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i])函数----------------
在layer.hpp中
// 前向传播和反向传播接口。 每个Layer的派生类都应该实现Forward_cpu()
template <typename Dtype>
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// Lock during forward to ensure sequential forward
Lock();
Dtype loss = 0;
Reshape(bottom, top);
switch (Caffe::mode()) {
case Caffe::CPU:
Forward_cpu(bottom, top);
Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i]);
loss += layer_loss;
if (debug_info_) { ForwardDebugInfo(i); }
}
return loss;
}
//----------------------------layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i])函数----------------
在layer.hpp中
// 前向传播和反向传播接口。 每个Layer的派生类都应该实现Forward_cpu()
template <typename Dtype>
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// Lock during forward to ensure sequential forward
Lock();
Dtype loss = 0;
Reshape(bottom, top);
switch (Caffe::mode()) {
case Caffe::CPU:
Forward_cpu(bottom, top);
// 计算loss
for (int top_id = 0; top_id < top.size(); ++top_id) {
if (!this->loss(top_id)) { continue; }
const int count = top[top_id]->count();
const Dtype* data = top[top_id]->cpu_data();
const Dtype* loss_weights = top[top_id]->cpu_diff();
loss += caffe_cpu_dot(count, data, loss_weights);
}
break;
case Caffe::GPU:
Forward_gpu(bottom, top);
#ifndef CPU_ONLY
for (int top_id = 0; top_id < top.size(); ++top_id) {
if (!this->loss(top_id)) { continue; }
const int count = top[top_id]->count();
const Dtype* data = top[top_id]->gpu_data();
const Dtype* loss_weights = top[top_id]->gpu_diff();
Dtype blob_loss = 0;
caffe_gpu_dot(count, data, loss_weights, &blob_loss);
loss += blob_loss;
}
#endif
break;
default:
LOG(FATAL) << "Unknown caffe mode.";
}
Unlock();
return loss;
}
//假如我们是CPU模式则调用Forward_cpu(bottom, top)函数;
在layer.hpp中
//---------------* 纯虚函数,子类必须实现,使用cpu经行前向计算-------------
*/
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) = 0;//此处开始调用子类层的前传函数
//-------------------936行Backward()函数----------------
在net.cpp中
template <typename Dtype>
void Net<Dtype>::Backward() {
BackwardFromTo(layers_.size() - 1, 0);
if (debug_info_) {
Dtype asum_data = 0, asum_diff = 0, sumsq_data = 0, sumsq_diff = 0;
for (int i = 0; i < learnable_params_.size(); ++i) {
asum_data += learnable_params_[i]->asum_data();
asum_diff += learnable_params_[i]->asum_diff();
sumsq_data += learnable_params_[i]->sumsq_data();
sumsq_diff += learnable_params_[i]->sumsq_diff();
}
const Dtype l2norm_data = std::sqrt(sumsq_data);
const Dtype l2norm_diff = std::sqrt(sumsq_diff);
LOG(ERROR) << " [Backward] All net params (data, diff): "
<< "L1 norm = (" << asum_data << ", " << asum_diff << "); "
<< "L2 norm = (" << l2norm_data << ", " << l2norm_diff << ")";
}
}
//-------------------------- BackwardFromTo(layers_.size() - 1, 0)-----------------
// 与前向传播一样,反向传播也有很多相关函数,但都是对BackwardFromTo(int start, int end)的封装
template <typename Dtype>
void Net<Dtype>::BackwardFromTo(int start, int end) {
CHECK_GE(end, 0);
CHECK_LT(start, layers_.size());
for (int i = start; i >= end; --i) {
if (layer_need_backward_[i]) {
// 对每一层经行反向传播计算
layers_[i]->Backward(
top_vecs_[i], bottom_need_backward_[i], bottom_vecs_[i]);
if (debug_info_) { BackwardDebugInfo(i); }
}
}
}
//----------layers_[i]->Backward(top_vecs_[i], bottom_need_backward_[i], bottom_vecs_[i])--------;
在layer.hpp中有:
//给定相对于 top 层输出的梯度,计算其相对于输入的梯度,并传递到 bottom层
//一个有参数的 layer 需要计算相对于各个参数的梯度值并存储在内部
inline void Backward(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,const vector<Blob<Dtype>*>& bottom);
template <typename Dtype>
inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
switch (Caffe::mode()) {
case Caffe::CPU:
Backward_cpu(top, propagate_down, bottom);
break;
case Caffe::GPU:
Backward_gpu(top, propagate_down, bottom);
break;
default:
LOG(FATAL) << "Unknown caffe mode.";
}
//假如是cpu模式
//----------------------Backward_cpu(top, propagate_down, bottom)函数--------
在layer.hpp中
//纯虚函数,派生类必须实现
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) = 0;//开始调用子类的后项传播函数,前后项传播结束
//869行的UpdateSmoothedLoss(loss, start_iter, average_loss);
//和903行的ApplyUpdate()见博客分析,写的比较清楚;
for (int top_id = 0; top_id < top.size(); ++top_id) {
if (!this->loss(top_id)) { continue; }
const int count = top[top_id]->count();
const Dtype* data = top[top_id]->cpu_data();
const Dtype* loss_weights = top[top_id]->cpu_diff();
loss += caffe_cpu_dot(count, data, loss_weights);
}
break;
case Caffe::GPU:
Forward_gpu(bottom, top);
#ifndef CPU_ONLY
for (int top_id = 0; top_id < top.size(); ++top_id) {
if (!this->loss(top_id)) { continue; }
const int count = top[top_id]->count();
const Dtype* data = top[top_id]->gpu_data();
const Dtype* loss_weights = top[top_id]->gpu_diff();
Dtype blob_loss = 0;
caffe_gpu_dot(count, data, loss_weights, &blob_loss);
loss += blob_loss;
}
#endif
break;
default:
LOG(FATAL) << "Unknown caffe mode.";
}
Unlock();
return loss;
}
//假如我们是CPU模式则调用Forward_cpu(bottom, top)函数;
在layer.hpp中
//---------------* 纯虚函数,子类必须实现,使用cpu经行前向计算-------------
*/
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) = 0;//此处开始调用子类层的前传函数
//-------------------936行Backward()函数----------------
在net.cpp中
template <typename Dtype>
void Net<Dtype>::Backward() {
BackwardFromTo(layers_.size() - 1, 0);
if (debug_info_) {
Dtype asum_data = 0, asum_diff = 0, sumsq_data = 0, sumsq_diff = 0;
for (int i = 0; i < learnable_params_.size(); ++i) {
asum_data += learnable_params_[i]->asum_data();
asum_diff += learnable_params_[i]->asum_diff();
sumsq_data += learnable_params_[i]->sumsq_data();
sumsq_diff += learnable_params_[i]->sumsq_diff();
}
const Dtype l2norm_data = std::sqrt(sumsq_data);
const Dtype l2norm_diff = std::sqrt(sumsq_diff);
LOG(ERROR) << " [Backward] All net params (data, diff): "
<< "L1 norm = (" << asum_data << ", " << asum_diff << "); "
<< "L2 norm = (" << l2norm_data << ", " << l2norm_diff << ")";
}
}
//-------------------------- BackwardFromTo(layers_.size() - 1, 0)-----------------
// 与前向传播一样,反向传播也有很多相关函数,但都是对BackwardFromTo(int start, int end)的封装
template <typename Dtype>
void Net<Dtype>::BackwardFromTo(int start, int end) {
CHECK_GE(end, 0);
CHECK_LT(start, layers_.size());
for (int i = start; i >= end; --i) {
if (layer_need_backward_[i]) {
// 对每一层经行反向传播计算
layers_[i]->Backward(
top_vecs_[i], bottom_need_backward_[i], bottom_vecs_[i]);
if (debug_info_) { BackwardDebugInfo(i); }
}
}
}
//----------layers_[i]->Backward(top_vecs_[i], bottom_need_backward_[i], bottom_vecs_[i])--------;
在layer.hpp中有:
//给定相对于 top 层输出的梯度,计算其相对于输入的梯度,并传递到 bottom层
//一个有参数的 layer 需要计算相对于各个参数的梯度值并存储在内部
inline void Backward(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,const vector<Blob<Dtype>*>& bottom);
template <typename Dtype>
inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
switch (Caffe::mode()) {
case Caffe::CPU:
Backward_cpu(top, propagate_down, bottom);
break;
case Caffe::GPU:
Backward_gpu(top, propagate_down, bottom);
break;
default:
LOG(FATAL) << "Unknown caffe mode.";
}
//假如是cpu模式
//----------------------Backward_cpu(top, propagate_down, bottom)函数--------
在layer.hpp中
//纯虚函数,派生类必须实现
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) = 0;//开始调用子类的后项传播函数,前后项传播结束
//869行的UpdateSmoothedLoss(loss, start_iter, average_loss);
//和903行的ApplyUpdate()见博客分析,写的比较清楚;