Caffe源码：relu_layer.cpp

最新推荐文章于 2020-07-31 20:53:52 发布

zzqiuzz

最新推荐文章于 2020-07-31 20:53:52 发布

阅读量277

点赞数

分类专栏： Caffe

本文链接：https://blog.csdn.net/zhengzhe1/article/details/78741434

版权

Caffe 专栏收录该内容

7 篇文章

订阅专栏

本文详细解析了ReLU层的正向传播与反向传播过程，重点介绍了如何根据输入值的正负决定反向传播时的梯度计算方式。ReLU激活函数在正向传播时保留大于0的输入值，而在反向传播时对于大于0的输入值传递完整的梯度，对于小于等于0的输入值则将梯度置0。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

@brief ReluLayer层反向传播

Relu激活函数： $f(x)=\max (0,x)$

这里写图片描述

top_data={bottom_data,0,if bottom_data>0if bottom_data≤0

$top\_data = \begin{cases} bottom\_data, & \text{if $bottom\_data \gt 0$} \\ 0, & \text{if $bottom\_data \leq 0$} \end{cases}$
反向传导时，

∂loss∂bottom_data=输入梯度∗∂top_data∂bottom_data{输入梯度∗top_diff,输入梯度∗0,if bottom_data>0if bottom_data≤0

$\frac{\partial loss}{\partial bottom\_data}=输入梯度*\frac{\partial top\_data}{\partial bottom\_data} \begin{cases} 输入梯度*top\_diff,&\text{if $bottom\_data \gt0$}\\ 输入梯度*0, & \text{if $bottom\_data \leq 0$} \end{cases}$

#include <algorithm>
#include <vector>

#include "caffe/layers/relu_layer.hpp"

namespace caffe {

template <typename Dtype>
void ReLULayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  const int count = bottom[0]->count();
  Dtype negative_slope = this->layer_param_.relu_param().negative_slope();
  for (int i = 0; i < count; ++i) {
    top_data[i] = std::max(bottom_data[i], Dtype(0))
        + negative_slope * std::min(bottom_data[i], Dtype(0));//proto中设置为0
  }
}

template <typename Dtype>
void ReLULayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[0]) {
    const Dtype* bottom_data = bottom[0]->cpu_data();//拿到ReluLayer的输入值
    const Dtype* top_diff = top[0]->cpu_diff();//拿到ReluLayer输入梯度
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();//要从ReluLayer反向传播的梯度
    const int count = bottom[0]->count();//该层具有神经元的个数
    Dtype negative_slope = this->layer_param_.relu_param().negative_slope();
    for (int i = 0; i < count; ++i) {
      bottom_diff[i] = top_diff[i] * ((bottom_data[i] > 0)//如果输入激活值>0 要反向传播的梯度=该层传进的梯度值*输入的值
          + negative_slope * (bottom_data[i] <= 0));//如果输入激活值<=0 梯度为0
    }
  }
}


#ifdef CPU_ONLY
STUB_GPU(ReLULayer);
#endif

INSTANTIATE_CLASS(ReLULayer);

}  // namespace caffe