初始化完Solver类之后,调用基类Solver::Solve()进行迭代
template <typename Dtype>
void Solver<Dtype>::Solve(const char* resume_file) {
CHECK(Caffe::root_solver());
LOG(INFO) << "Solving " << net_->name();
LOG(INFO) << "Learning Rate Policy: " << param_.lr_policy();// Initialize to false every time we start solving.
requested_early_exit_ = false;if (resume_file) {
LOG(INFO) << "Restoring previous solver status from " << resume_file;
Restore(resume_file);
}// For a network that is trained by the solver, no bottom or top vecs
// should be given, and we will just provide dummy vecs.
int start_iter = iter_;# 迭代
Step(param_.max_iter() - iter_);
// If we haven't already, save a snapshot after optimization, unless
// overridden by setting snapshot_after_train := false
if (param_.snapshot_after_train()
&& (!param_.snapshot() || iter_ % param_.snapshot() != 0)) {
Snapshot();
}
if (requested_early_exit_) {
LOG(INFO) << "Optimization stopped early.";
return;
}
// After the optimization is done, run an additional train and test pass to
// display the train and test loss/outputs if appropriate (based on the
// display and test_interval settings, respectively). Unlike in the rest of
// training, for the train net we only run a forward pass as we've already
// updated the parameters "max_iter" times -- this final pass is only done to
// display the loss, which is computed in the forward pass.
if (param_.display() && iter_ % param_.display() == 0) {
int average_loss = this->param_.average_loss();
Dtype loss;
net_->Forward(&loss);UpdateSmoothedLoss(loss, start_iter, average_loss);
LOG(INFO) << "Iteration " << iter_ << ", loss = " << smoothed_loss_;
}
if (param_.test_interval() && iter_ % param_.test_interval() == 0) {
TestAll();
}
LOG(INFO) << "Optimization Done.";
}
template <typename Dtype>
void Solver<Dtype>::Step(int iters) {
const int start_iter = iter_;
const int stop_iter = iter_ + iters;#获取设置的要计算之前多少次的loss均值,默认的average_loss为1
int average_loss = this->param_.average_loss();
losses_.clear();
smoothed_loss_ = 0;while (iter_ < stop_iter) {
// zero-init the params#清零上一次反向传输过程中产生的梯度数据
net_->ClearParamDiffs();
// 每隔test_iter进行一次测试
if (param_.test_interval() && iter_ % param_.test_interval() == 0
&& (iter_ > 0 || param_.test_initialization())
&& Caffe::root_solver()) {
TestAll();
if (requested_early_exit_) {
// Break out of the while loop because stop was requested while testing.
break;
}
}for (int i = 0; i < callbacks_.size(); ++i) {
callbacks_[i]->on_start();
}
const bool display = param_.display() && iter_ % param_.display() == 0;
net_->set_debug_info(display && param_.debug_info());
// accumulate the loss and gradient
Dtype loss = 0;#累加多个batch的误差,以免batch_size过大,内存不够
for (int i = 0; i < param_.iter_size(); ++i) {#执行前向计算和后向计算
loss += net_->ForwardBackward();
}
loss /= param_.iter_size();
// average the loss across iterations for smoothed reporting#若average_loss为1:loss_容器里面只存当前获得的真实loss值,而smooth_loss_当然也是这个值;若average_loss为n:loss_容器里面就会存储前n个loss的值,而smooth_loss_相当于做了一个loss平均
UpdateSmoothedLoss(loss, start_iter, average_loss);
if (display) {
LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << iter_
<< ", loss = " << smoothed_loss_;
const vector<Blob<Dtype>*>& result = net_->output_blobs();
int score_index = 0;
for (int j = 0; j < result.size(); ++j) {
const Dtype* result_vec = result[j]->cpu_data();
const string& output_name =
net_->blob_names()[net_->output_blob_indices()[j]];
const Dtype loss_weight =
net_->blob_loss_weights()[net_->output_blob_indices()[j]];
for (int k = 0; k < result[j]->count(); ++k) {
ostringstream loss_msg_stream;
if (loss_weight) {
loss_msg_stream << " (* " << loss_weight
<< " = " << loss_weight * result_vec[k] << " loss)";
}
LOG_IF(INFO, Caffe::root_solver()) << " Train net output #"
<< score_index++ << ": " << output_name << " = "
<< result_vec[k] << loss_msg_stream.str();
}
}
}
for (int i = 0; i < callbacks_.size(); ++i) {
callbacks_[i]->on_gradients_ready();
}#权重更新
ApplyUpdate();// Increment the internal iter_ counter -- its value should always indicate
// the number of times the weights have been updated.
++iter_;SolverAction::Enum request = GetRequestedAction();
// Save a snapshot if needed.
if ((param_.snapshot()
&& iter_ % param_.snapshot() == 0
&& Caffe::root_solver()) ||
(request == SolverAction::SNAPSHOT)) {
Snapshot();
}
if (SolverAction::STOP == request) {
requested_early_exit_ = true;
// Break out of training loop.
break;
}
}
}
# net.cpp
Dtype ForwardBackward() {
Dtype loss;
Forward(&loss); # -> Net<Dtype>::Forward(Dtype* loss) -> Net<Dtype>::ForwardFromTo (int start, int end)
Backward(); # -> Net<Dtype>::Forward -> Net<Dtype>::BackwardFromTo(int start, int end)
return loss;
}
前向计算卷积实现原理:将每个像素位置作为模板中心时,被卷积模板覆盖的区域按一维排列(K*K),BGR通道依次排列(C*K*K)。一共有H*W个这样的一维数组(H*W)*(C*K*K)。每一组卷积模板也展开成一维形式(K*K),并以BGR通道依次排列为一位数组(C*K*K),共Cout个,这样点积之后,再reshape就可以得到Cout*(H*W)特征图。