【YOLO3代码详解系列02】激活层

前言

本文开始,本系列会对比较重要的脚本进行解读,以便在后期搭建自己的网络时得心应手。

1 activations.h

#ifndef ACTIVATIONS_H
#define ACTIVATIONS_H
#include "darknet.h"
#include "cuda.h"
#include "math.h"

// 获得定义的枚举类型的激活函数类别,这些结构体定义在darknet.h
ACTIVATION get_activation(char *s);

// 获得激活函数对应的字符串描述 
char *get_activation_string(ACTIVATION a);

// 根据不同的激活函数类型,调用不同的激活函数处理输入元素x 
float activate(float x, ACTIVATION a);

// 根据不同的激活函数求取输入的梯度 
float gradient(float x, ACTIVATION a);

// 计算激活函数对加权输入的导数,并乘以delta,得到当前层最终的delta(误差项)
void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta);

// 用激活函数处理输入x中的每一个元素 
void activate_array(float *x, const int n, const ACTIVATION a);

// CUDA编程,此处不作解读
#ifdef GPU 
void activate_array_gpu(float *x, int n, ACTIVATION a);
void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta);
#endif

// static 修饰的内联函数,加快调用的速度,且一般不会产生函数本身的代码
static inline float stair_activate(float x)
{
    int n = floor(x);
    if (n%2 == 0) return floor(x/2.);
    else return (x - n) + floor(x/2.);
}

static inline float hardtan_activate(float x)
{
    if (x < -1) return -1;
    if (x > 1) return 1;
    return x;
}

static inline float plse_activate(float x)
{
    if(x < -4) return .01 * (x + 4);
    if(x > 4)  return .01 * (x - 4) + 1;
    return .125*x + .5;
}

static inline float lhtan_activate(float x)
{
    if(x < 0) return .001*x;
    if(x > 1) return .001*(x-1) + 1;
    return x;
}

// 线性激活函数:f(x)=x
static inline float linear_activate(float x){return x;}

// sigmoid激活函数:f(x)=1./(1. + exp(-x))
static inline float logistic_activate(float x){return 1./(1. + exp(-x));}

// ReLU非线性激活函数:0(x<=0);x(x>0)
static inline float relu_activate(float x){return x*(x>0);}

// leaky ReLU非线性激活函数:x(x>0);kx(x<=0)
static inline float leaky_activate(float x){return (x>0) ? x : .1*x;}

// tanh非线性激活函数:f(x)=(exp(2*x)-1)/(exp(2*x)+1)
static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}

static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;}
static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
static inline float selu_activate(float x){return (x >= 0)*1.0507*x + (x < 0)*1.0507*1.6732*(exp(x)-1);}
static inline float relie_activate(float x){return (x>0) ? x : .01*x;}
static inline float ramp_activate(float x){return x*(x>0)+.1*x;}


//梯度值
static inline float lhtan_gradient(float x)
{
    if(x > 0 && x < 1) return 1;
    return .001;
}

static inline float hardtan_gradient(float x)
{
    if (x > -1 && x < 1) return 1;
    return 0;
}

static inline float loggy_gradient(float x)
{
    float y = (x+1.)/2.;
    return 2*(1-y)*y;
}

static inline float stair_gradient(float x)
{
    if (floor(x) == x) return 0;
    return 1;
}

// 返回线性激活函数关于输入x的导数值
static inline float linear_gradient(float x){return 1;}

// 返回sigmoid函数关于输入x的导数值
static inline float logistic_gradient(float x){return (1-x)*x;}

// 返回ReLU非线性激活函数关于输入x的导数值
static inline float relu_gradient(float x){return (x>0);}

// 返回leaky ReLU非线性激活函数关于输入x的导数值
static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;}

// 返回tanh非线性激活函数关于输入x的导数值
static inline float tanh_gradient(float x){return 1-x*x;}

static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);}
static inline float selu_gradient(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);}
static inline float relie_gradient(float x){return (x>0) ? 1 : .01;}
static inline float ramp_gradient(float x){return (x>0)+.1;}
static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;}

#endif

2 activations.c

#include "activations.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// 获得激活函数对应的字符串描述 
char *get_activation_string(ACTIVATION a)
{
    switch(a){
        case LOGISTIC:
            return "logistic";
        case LOGGY:
            return "loggy";
        case RELU:
            return "relu";
        case ELU:
            return "elu";
        case SELU:
            return "selu";
        case RELIE:
            return "relie";
        case RAMP:
            return "ramp";
        case LINEAR:
            return "linear";
        case TANH:
            return "tanh";
        case PLSE:
            return "plse";
        case LEAKY:
            return "leaky";
        case STAIR:
            return "stair";
        case HARDTAN:
            return "hardtan";
        case LHTAN:
            return "lhtan";
        default:
            break;
    }
    return "relu";
}

//  根据输入的激活函数名称,返回定义的枚举类型的激活函数类别
ACTIVATION get_activation(char *s)
{
    if (strcmp(s, "logistic")==0) return LOGISTIC;
    if (strcmp(s, "loggy")==0) return LOGGY;
    if (strcmp(s, "relu")==0) return RELU;
    if (strcmp(s, "elu")==0) return ELU;
    if (strcmp(s, "selu")==0) return SELU;
    if (strcmp(s, "relie")==0) return RELIE;
    if (strcmp(s, "plse")==0) return PLSE;
    if (strcmp(s, "hardtan")==0) return HARDTAN;
    if (strcmp(s, "lhtan")==0) return LHTAN;
    if (strcmp(s, "linear")==0) return LINEAR;
    if (strcmp(s, "ramp")==0) return RAMP;
    if (strcmp(s, "leaky")==0) return LEAKY;
    if (strcmp(s, "tanh")==0) return TANH;
    if (strcmp(s, "stair")==0) return STAIR;
    fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s);
    return RELU;
}

// 根据不同的激活函数类型,调用不同的激活函数处理输入元素x
float activate(float x, ACTIVATION a)
{
    switch(a){
        case LINEAR:
            return linear_activate(x);
        case LOGISTIC:
            return logistic_activate(x);
        case LOGGY:
            return loggy_activate(x);
        case RELU:
            return relu_activate(x);
        case ELU:
            return elu_activate(x);
        case SELU:
            return selu_activate(x);
        case RELIE:
            return relie_activate(x);
        case RAMP:
            return ramp_activate(x);
        case LEAKY:
            return leaky_activate(x);
        case TANH:
            return tanh_activate(x);
        case PLSE:
            return plse_activate(x);
        case STAIR:
            return stair_activate(x);
        case HARDTAN:
            return hardtan_activate(x);
        case LHTAN:
            return lhtan_activate(x);
    }
    return 0;
}

/* 
** 功能:用激活函数处理输入x中的每一个元素
** 输入: x    待处理的数组;一般为网络层每个神经元的加权输入Wx+b
**        n    x中含有多少个元素
**        a    激活函数类型
** 说明:该函数会逐个处理x中的元素;该函数一般用于每一层网络的前向传播函数中;该函数的输出即为每一层网络的输出    
*/
void activate_array(float *x, const int n, const ACTIVATION a)
{
    int i;
    for(i = 0; i < n; ++i){
        x[i] = activate(x[i], a);   
    }
}
 
// 根据不同的激活函数求取对输入的梯度
float gradient(float x, ACTIVATION a)
{
    switch(a){
        case LINEAR:
            return linear_gradient(x);
        case LOGISTIC:
            return logistic_gradient(x);
        case LOGGY:
            return loggy_gradient(x);
        case RELU:
            return relu_gradient(x);
        case ELU:
            return elu_gradient(x);
        case SELU:
            return selu_gradient(x);
        case RELIE:
            return relie_gradient(x);
        case RAMP:
            return ramp_gradient(x);
        case LEAKY:
            return leaky_gradient(x);
        case TANH:
            return tanh_gradient(x);
        case PLSE:
            return plse_gradient(x);
        case STAIR:
            return stair_gradient(x);
        case HARDTAN:
            return hardtan_gradient(x);
        case LHTAN:
            return lhtan_gradient(x);
    }
    return 0;
}

/*  
** 功能:计算激活函数对加权输入的导数,并乘以delta,得到当前层最终的delta(误差项)
** 输入: x    当前层的所有输出(维度为l.batch * l.out_c * l.out_w * l.out_h)
**        n    l.output的维度
**        ACTIVATION    激活函数类型
**        delta     当前层误差
*/
void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta)
{
    int i;
    for(i = 0; i < n; ++i){
        delta[i] *= gradient(x[i], a);
    }
} 

3 activation_layer.h

#ifndef ACTIVATION_LAYER_H
#define ACTIVATION_LAYER_H
#include "activations.h"
#include "layer.h"
#include "network.h"

// 构造激活层函数 
layer make_activation_layer(int batch, int inputs, ACTIVATION activation);

// 激活层前向传播函数
void forward_activation_layer(layer l, network net);

// 激活层后向传播函数
void backward_activation_layer(layer l, network net);

#ifdef GPU
void forward_activation_layer_gpu(layer l, network net);
void backward_activation_layer_gpu(layer l, network net);
#endif

#endif

4 activation_layer.c

#include "activation_layer.h"
#include "utils.h"
#include "cuda.h"
#include "blas.h"
#include "gemm.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// 构造激活层  
layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
{
    layer l = {0};
    l.type = ACTIVE;
    l.inputs = inputs;
    l.outputs = inputs;
    l.batch = batch;
	
	// 分配内存空间:元素的数目和每个元素的大小
    l.output = calloc(batch*inputs, sizeof(float*));
    l.delta = calloc(batch*inputs, sizeof(float*));

    l.forward = forward_activation_layer;
    l.backward = backward_activation_layer;
    
#ifdef GPU
    l.forward_gpu = forward_activation_layer_gpu;
    l.backward_gpu = backward_activation_layer_gpu;
    l.output_gpu = cuda_make_array(l.output, inputs*batch);
    l.delta_gpu = cuda_make_array(l.delta, inputs*batch);
#endif

    l.activation = activation;
    fprintf(stderr, "Activation Layer: %d inputs\n", inputs);
    return l;
}

// 激活层前向传播 
void forward_activation_layer(layer l, network net)
{
    copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1);
    activate_array(l.output, l.outputs*l.batch, l.activation);
}

// 激活层后向传播 
void backward_activation_layer(layer l, network net)
{
    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
    copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1);
}

#ifdef GPU
void forward_activation_layer_gpu(layer l, network net)
{
    copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1);
    activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation);
}

void backward_activation_layer_gpu(layer l, network net)
{
    gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
    copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1);
}
#endif

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值