【caffe】caffe结构解析（二）

本文链接：https://blog.csdn.net/q295684174/article/details/53175255

本文深入解析Caffe的Solve过程，从Solver::Step()函数开始，详细阐述了正向传播、反向传播的实现，以及权重更新的步骤。通过ForwardFromTo与各层的Forward函数，展示了Caffe如何处理CPU与GPU计算。同时，介绍了Backward函数与权重更新的ApplyUpdate()函数。整个网络训练流程清晰呈现。

摘要由CSDN通过智能技术生成

这次来写写caffe是如何来solve的
在成员函数Solve()内部，

template <typename Dtype>
void Solver<Dtype>::Solve(const char* resume_file) {
  ......
  // For a network that is trained by the solver, no bottom or top vecs
  // should be given, and we will just provide dummy vecs.
  int start_iter = iter_;
  //开始迭代
  Step(param_.max_iter() - iter_);
  ......
}

下面我们看一下Solver::Step()函数内部实现情况，

template <typename Dtype>  
void Solver<Dtype>::Step(int iters)  
{  
    // 起始迭代步数  
    const int start_iter = iter_;  
    // 终止迭代步数  
    const int stop_iter = iter_ + iters;  

    // 判断是否已经完成设定步数  
    while (iter_ < stop_iter)  
    {  
        // 将net_中的Bolb梯度参数置为零  
        net_->ClearParamDiffs();  

        ...  

        // accumulate the loss and gradient  
        Dtype loss = 0;  
        for (int i = 0; i < param_.iter_size(); ++i)  
        {  
            // 正向传导和反向传导，并计算loss  
            loss += net_->ForwardBackward();  
        }  
        loss /= param_.iter_size();  

        // 为了输出结果平滑，将临近的average_loss个loss数值进行平均，存储在成员变量smoothed_loss_中  
        UpdateSmoothedLoss(loss, start_iter, average_loss);  

        // BP算法更新权重  
        ApplyUpdate();  

        // Increment the internal iter_ counter -- its value should always indicate  
        // the number of times the weights have been updated.  
        ++iter_;  
    }  
}

while循环中先调用了网络类Net::ForwardBackward()成员函数进行正向传导和反向传导，并计算loss

  Dtype ForwardBackward() {
    Dtype loss;
    //正向传导
    Forward(&loss);
    //反向传导
    Backward();
    return loss;
  }

而Fordward函数中调用了ForwardFromTo

template <typename Dtype>
const vector<Blob<Dtype>*>& Net<Dtype>::Forward(Dtype* loss) {
  if (loss != NULL) {
    *loss = ForwardFromTo(0, layers_.size() - 1);
  } else {
    ForwardFromTo(0, layers_.size() - 1);
  }
  return net_output_blobs_;
}

而FordwardFromTo又调用了每个layer的Fordward

template <typename Dtype>
Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
  CHECK_GE(start, 0);
  CHECK_LT(end, layers_.size());
  Dtype loss = 0;
  for (int i = start; i <= end; ++i) {
    // LOG(ERROR) << "Forwarding " << layer_names_[i];
    //每个layer的前向传导
    Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i]);
    loss += layer_loss;
    if (debug_info_) { ForwardDebugInfo(i); }
  }
  return loss;
}

虽然layer这个基类的Forward函数不是虚函数，但是在其内部包装了虚函数Forward_cpu()和Forward_gpu()，分别对应CPU版本和GPU版本。其中Forward_cpu()为父类Layer的纯虚函数，必须被子类重载。而Forward_gpu()在父类Layer中的实现为直接调用Forward_cpu()，于是该虚函数的实现为可选。总的来说，正因为这两个虚函数，所以不同层有不同的正向传导计算方法。

// Forward and backward wrappers. You should implement the cpu and
// gpu specific implementations instead, and should not change these
// functions.
template <typename Dtype>
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  // Lock during forward to ensure sequential forward
  Lock();
  Dtype loss = 0;
  Reshape(bottom, top);
  switch (Caffe::mode()) {
  case Caffe::CPU:
    //调用每个layer的子类的Forward_cpu
    Forward_cpu(bottom, top);
    for (int top_id = 0; top_id < top.size(); ++top_id) {
      if (!this->loss(top_id)) { continue; }
      const int count = top[top_id]->count();
      const Dtype* data = top[top_id]->cpu_data();
      const Dtype* loss_weights = top[top_id]->cpu_diff();
      loss += caffe_cpu_dot(count, data, loss_weights);
    }
    break;
  case Caffe::GPU:
    Forward_gpu(bottom, top);
#ifndef CPU_ONLY
    for (int top_id = 0; top_id < top.size(); ++top_id) {
      if (!this->loss(top_id)) { continue; }
      const int count = top[top_id]->count();
      const Dtype* data = top[top_id]->gpu_data();
      const Dtype* loss_weights = top[top_id]->gpu_diff();
      Dtype blob_loss = 0;
      caffe_gpu_dot(count, data, loss_weights, &blob_loss);
      loss += blob_loss;
    }
#endif
    break;
  default:
    LOG(FATAL) << "Unknown caffe mode.";
  }
  Unlock();
  return loss;
}

反向传导函数Backward()调用了BackwardFromTo(int start, int end)函数

template <typename Dtype>  
void Net<Dtype>::Backward()  
{  
    BackwardFromTo(layers_.size() - 1, 0);  
}

template <typename Dtype>  
void Net<Dtype>::BackwardFromTo(int start, int end)  
{  
  CHECK_GE(end, 0);  
  CHECK_LT(start, layers_.size());  
  // 倒过来逐层传导  
  for (int i = start; i >= end; --i)  
  {  
    if (layer_need_backward_[i])  
    {  
      // 与正向传导函数类似，虽然Backward()不是虚函数，但是包装了虚函数Backward_cpu()和Backward_gpu()，因此不同层有不同的计算方法  
      // 注意反向传导比正向传导多了一个参数bottom_need_backward_。在实现反向传导时，首先判断当前层是否需要反向传导的层，不需要则直接返回  
      layers_[i]->Backward(top_vecs_[i], bottom_need_backward_[i], bottom_vecs_[i]);  
      if (debug_info_)  
      {  
        BackwardDebugInfo(i);  
      }  
    }  
  }  
}

正向传导和反向传导结束后，再调用SGDSolver::ApplyUpdate()成员函数进行权重更新。

template <typename Dtype>  
void SGDSolver<Dtype>::ApplyUpdate()  
{  
    // 获取当前学习速率  
    Dtype rate = GetLearningRate();  
    if (this->param_.display() && this->iter_ % this->param_.display() == 0)  
    {  
        LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate;  
    }  

    // 在计算当前梯度的时候，如果该值超过了阈值clip_gradients，则将梯度直接设置为该阈值  
    // 此处阈值设为-1，即不起作用  
    ClipGradients();  

    // 逐层更新网络中的可学习层  
    for (int param_id = 0; param_id < this->net_->learnable_params().size();  
       ++param_id)  
    {  
        // 归一化  
        Normalize(param_id);  
        // L2范数正则化添加衰减权重  
        Regularize(param_id);  
        // 随机梯度下降法计算更新值  
        ComputeUpdateValue(param_id, rate);  
    }  
    // 更新权重  
    this->net_->Update();  
}