零基础学caffe源码 ReLU激活函数

最新推荐文章于 2023-07-23 22:36:53 发布

XZZPPP

最新推荐文章于 2023-07-23 22:36:53 发布

阅读量6.4k

点赞数 1

分类专栏： caffe源码文章标签： caffe 源码解析

本文链接：https://blog.csdn.net/xzzppp/article/details/52105629

版权

caffe源码专栏收录该内容

4 篇文章 0 订阅

订阅专栏

1、如何有效阅读caffe源码

1、caffe源码阅读路线最好是从src/cafffe/proto/caffe.proto开始，了解基本数据结构内存对象和磁盘文件的一一映射关系，中间过程都由ProtoBuffer工具自动完成。

2、看include/目录中.hpp头文件，通过头文件类申明理解整个框架。从基类向派生类，掌握这些类。

3、看src/目录中.cpp和.cu源文件，亦可以按需求派生新的类。

4、编写各类工具，集成到caffe内部，如tools/下就有训练模型，提取特征，转换数据格式等工具。

2、ReLU激活函数

激活函数，用在各个卷积层和全连接层输出位置。激活函数是深度网络非线性的主要来源。通常包括：simgoid（f(x)=(1+e-x)-1）和tanh（f(x)=sinhx/coshx,图形类似于arctanx，但是值域是[-1,1]）作为激活函数。

用ReLU（f(x)=max(0,x)）作为激活函数的原因是：加速收敛、解决了梯度消失问题

3、ReLU函数caffe源码解析

在models/bvlc_alexnet/train_val.prototxt网络模型设置中找到了ReLU层描述

//ReLU层，新的非线性层
layer {
  name: "relu1"  //层名
  type: "ReLU"  //层类型
  bottom: "conv1"  //层输入
  top: "conv1"  //层输出
}

下面开始按步骤解析源码：
3.1、阅读src/cafffe/proto/caffe.proto中ReLU层参数

//存储ReLU层参数的数据结构类
message ReLUParameter {
  //message：数据结构类，ReLUParameter为其对象
  //optional；表示一个可选字段，required：表示一个必须字段
  //repeated：表示该字段可以包含多个元素，和optional特性相同，类似数组
  optional float negative_slope = 1 [default = 0];
  enum Engine {  //枚举Engine中所有元素
    DEFAULT = 0;
    CAFFE = 1;
    CUDNN = 2;
  }
  optional Engine engine = 2 [default = DEFAULT];
}

3.2、阅读include/caffe/layers/relu_layer.hpp中类申明

<span style="font-family:KaiTi_GB2312;font-size:24px;">//头文件中的 #ifndef/#define/#endif，防止该头文件被重复引用
#ifndef CAFFE_RELU_LAYER_HPP_   
#define CAFFE_RELU_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

#include "caffe/layers/neuron_layer.hpp"

//ReLULayer，派生于NeuronLayer，实现了ReLU激活函数计算
namespace caffe {
//template：指定模板类型参数，Dtype：表示一个类型
template <typename Dtype>
//新定义ReLULayer类，其继承NeuronLayer类
class ReLULayer : public NeuronLayer<Dtype> {
 public:
//显式构造函数，NeuronLayer的参数显式传递给ReLULayer，LayerParameter：protobuf文件中存储的layer参数
  explicit ReLULayer(const LayerParameter& param) 
      : NeuronLayer<Dtype>(param) {}
//虚内联函数，const成员函数，返回类名字符串
  virtual inline const char* type() const { return "ReLU"; }

 protected:  //bottom为输入，top为输出
//前向传播函数
  //CPU版本前馈实现
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  //GPU版本前馈实现
  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);

//反向传播函数
  //top为输出blob，propagate_down为bottom索引，bottom为输入blob
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
};

}  // namespace caffe

#endif  // CAFFE_RELU_LAYER_HPP_</span>

3.3、阅读src/caffe/layers/relu_layer.cpp中代码

<span style="font-family:KaiTi_GB2312;font-size:24px;">#include <algorithm>
#include <vector>

#include "caffe/layers/relu_layer.hpp"

namespace caffe {

template <typename Dtype>
//定义前向传播函数
void ReLULayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  //（只读）获得输入blob的data指针
  const Dtype* bottom_data = bottom[0]->cpu_data(); //->：指针引用
  //（读写）获得输出blob的data指针
  Dtype* top_data = top[0]->mutable_cpu_data();
  //获得输入blob元素个数
  const int count = bottom[0]->count();
  //Leak ReLU参数，从layer_param_中获得，默认为0（negative_slope=0），即普通ReLU
  Dtype negative_slope = this->layer_param_.relu_param().negative_slope();
  for (int i = 0; i < count; ++i) {
    top_data[i] = std::max(bottom_data[i], Dtype(0)) //ReLU（f(x)=max(0,x)）
        + negative_slope * std::min(bottom_data[i], Dtype(0));
  }
}

template <typename Dtype>
//定义反向传播函数
void ReLULayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  //如果需要做反向传播计算，propagate_down是与计算关于bottom的梯度相关，
  //在caffe的BP实现中非常重要
  if (propagate_down[0]) {
	//（只读）获得前一层的data指针，data：前向传播所用数据
    const Dtype* bottom_data = bottom[0]->cpu_data();
	//（只读）获得后一层的diff指针，diff：反向传播所用数据
    const Dtype* top_diff = top[0]->cpu_diff();
	//（读写）获得前一层的diff指针，是损失函数关于当前层的输入（bottom）的偏导数
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
	//获得需要参与计算的元素总和
    const int count = bottom[0]->count();
	//Leaky ReLU参数，默认为0
    Dtype negative_slope = this->layer_param_.relu_param().negative_slope();
    for (int i = 0; i < count; ++i) {
	//ReLU的导函数就是(bottom_data[i] > 0，根据求导链式法则，后一层的误差乘以导函数得到前一层的误差
      bottom_diff[i] = top_diff[i] * ((bottom_data[i] > 0)
          + negative_slope * (bottom_data[i] <= 0));  //negative_slope=0
    }
  }
}

#ifdef CPU_ONLY
STUB_GPU(ReLULayer);
#endif

INSTANTIATE_CLASS(ReLULayer);

}  // namespace caffe</span>

XZZPPP

关注

1
点赞
踩
3

收藏

觉得还不错? 一键收藏
7
评论
零基础学caffe源码 ReLU激活函数

1、如何有效阅读caffe源码 1、caffe源码阅读路线最好是从src/cafffe/proto/caffe.proto开始，了解基本数据结构内存对象和磁盘文件的一一映射关系，中间过程都由ProtoBuffer工具自动完成。 2、看include/目录中.hpp头文件，通过头文件类申明理解整个框架。从基类向派生类，掌握这些类。 3、看src/目录中.
复制链接

扫一扫

专栏目录