前言
本文开始,本系列会对比较重要的脚本进行解读,以便在后期搭建自己的网络时得心应手。
1 activations.h
#ifndef ACTIVATIONS_H
#define ACTIVATIONS_H
#include "darknet.h"
#include "cuda.h"
#include "math.h"
// 获得定义的枚举类型的激活函数类别,这些结构体定义在darknet.h
ACTIVATION get_activation(char *s);
// 获得激活函数对应的字符串描述
char *get_activation_string(ACTIVATION a);
// 根据不同的激活函数类型,调用不同的激活函数处理输入元素x
float activate(float x, ACTIVATION a);
// 根据不同的激活函数求取输入的梯度
float gradient(float x, ACTIVATION a);
// 计算激活函数对加权输入的导数,并乘以delta,得到当前层最终的delta(误差项)
void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta);
// 用激活函数处理输入x中的每一个元素
void activate_array(float *x, const int n, const ACTIVATION a);
// CUDA编程,此处不作解读
#ifdef GPU
void activate_array_gpu(float *x, int n, ACTIVATION a);
void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta);
#endif
// static 修饰的内联函数,加快调用的速度,且一般不会产生函数本身的代码
static inline float stair_activate(float x)
{
int n = floor(x);
if (n%2 == 0) return floor(x/2.);
else return (x - n) + floor(x/2.);
}
static inline float hardtan_activate(float x)
{
if (x < -1) return -1;
if (x > 1) return 1;
return x;
}
static inline float plse_activate(float x)
{
if(x < -4) return .01 * (x + 4);
if(x > 4) return .01 * (x - 4) + 1;
return .125*x + .5;
}
static inline float lhtan_activate(float x)
{
if(x < 0) return .001*x;
if(x > 1) return .001*(x-1) + 1;
return x;
}
// 线性激活函数:f(x)=x
static inline float linear_activate(float x){return x;}
// sigmoid激活函数:f(x)=1./(1. + exp(-x))
static inline float logistic_activate(float x){return 1./(1. + exp(-x));}
// ReLU非线性激活函数:0(x<=0);x(x>0)
static inline float relu_activate(float x){return x*(x>0);}
// leaky ReLU非线性激活函数:x(x>0);kx(x<=0)
static inline float leaky_activate(float x){return (x>0) ? x : .1*x;}
// tanh非线性激活函数:f(x)=(exp(2*x)-1)/(exp(2*x)+1)
static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;}
static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
static inline float selu_activate(float x){return (x >= 0)*1.0507*x + (x < 0)*1.0507*1.6732*(exp(x)-1);}
static inline float relie_activate(float x){return (x>0) ? x : .01*x;}
static inline float ramp_activate(float x){return x*(x>0)+.1*x;}
//梯度值
static inline float lhtan_gradient(float x)
{
if(x > 0 && x < 1) return 1;
return .001;
}
static inline float hardtan_gradient(float x)
{
if (x > -1 && x < 1) return 1;
return 0;
}
static inline float loggy_gradient(float x)
{
float y = (x+1.)/2.;
return 2*(1-y)*y;
}
static inline float stair_gradient(float x)
{
if (floor(x) == x) return 0;
return 1;
}
// 返回线性激活函数关于输入x的导数值
static inline float linear_gradient(float x){return 1;}
// 返回sigmoid函数关于输入x的导数值
static inline float logistic_gradient(float x){return (1-x)*x;}
// 返回ReLU非线性激活函数关于输入x的导数值
static inline float relu_gradient(float x){return (x>0);}
// 返回leaky ReLU非线性激活函数关于输入x的导数值
static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;}
// 返回tanh非线性激活函数关于输入x的导数值
static inline float tanh_gradient(float x){return 1-x*x;}
static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);}
static inline float selu_gradient(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);}
static inline float relie_gradient(float x){return (x>0) ? 1 : .01;}
static inline float ramp_gradient(float x){return (x>0)+.1;}
static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;}
#endif
2 activations.c
#include "activations.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// 获得激活函数对应的字符串描述
char *get_activation_string(ACTIVATION a)
{
switch(a){
case LOGISTIC:
return "logistic";
case LOGGY:
return "loggy";
case RELU:
return "relu";
case ELU:
return "elu";
case SELU:
return "selu";
case RELIE:
return "relie";
case RAMP:
return "ramp";
case LINEAR:
return "linear";
case TANH:
return "tanh";
case PLSE:
return "plse";
case LEAKY:
return "leaky";
case STAIR:
return "stair";
case HARDTAN:
return "hardtan";
case LHTAN:
return "lhtan";
default:
break;
}
return "relu";
}
// 根据输入的激活函数名称,返回定义的枚举类型的激活函数类别
ACTIVATION get_activation(char *s)
{
if (strcmp(s, "logistic")==0) return LOGISTIC;
if (strcmp(s, "loggy")==0) return LOGGY;
if (strcmp(s, "relu")==0) return RELU;
if (strcmp(s, "elu")==0) return ELU;
if (strcmp(s, "selu")==0) return SELU;
if (strcmp(s, "relie")==0) return RELIE;
if (strcmp(s, "plse")==0) return PLSE;
if (strcmp(s, "hardtan")==0) return HARDTAN;
if (strcmp(s, "lhtan")==0) return LHTAN;
if (strcmp(s, "linear")==0) return LINEAR;
if (strcmp(s, "ramp")==0) return RAMP;
if (strcmp(s, "leaky")==0) return LEAKY;
if (strcmp(s, "tanh")==0) return TANH;
if (strcmp(s, "stair")==0) return STAIR;
fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s);
return RELU;
}
// 根据不同的激活函数类型,调用不同的激活函数处理输入元素x
float activate(float x, ACTIVATION a)
{
switch(a){
case LINEAR:
return linear_activate(x);
case LOGISTIC:
return logistic_activate(x);
case LOGGY:
return loggy_activate(x);
case RELU:
return relu_activate(x);
case ELU:
return elu_activate(x);
case SELU:
return selu_activate(x);
case RELIE:
return relie_activate(x);
case RAMP:
return ramp_activate(x);
case LEAKY:
return leaky_activate(x);
case TANH:
return tanh_activate(x);
case PLSE:
return plse_activate(x);
case STAIR:
return stair_activate(x);
case HARDTAN:
return hardtan_activate(x);
case LHTAN:
return lhtan_activate(x);
}
return 0;
}
/*
** 功能:用激活函数处理输入x中的每一个元素
** 输入: x 待处理的数组;一般为网络层每个神经元的加权输入Wx+b
** n x中含有多少个元素
** a 激活函数类型
** 说明:该函数会逐个处理x中的元素;该函数一般用于每一层网络的前向传播函数中;该函数的输出即为每一层网络的输出
*/
void activate_array(float *x, const int n, const ACTIVATION a)
{
int i;
for(i = 0; i < n; ++i){
x[i] = activate(x[i], a);
}
}
// 根据不同的激活函数求取对输入的梯度
float gradient(float x, ACTIVATION a)
{
switch(a){
case LINEAR:
return linear_gradient(x);
case LOGISTIC:
return logistic_gradient(x);
case LOGGY:
return loggy_gradient(x);
case RELU:
return relu_gradient(x);
case ELU:
return elu_gradient(x);
case SELU:
return selu_gradient(x);
case RELIE:
return relie_gradient(x);
case RAMP:
return ramp_gradient(x);
case LEAKY:
return leaky_gradient(x);
case TANH:
return tanh_gradient(x);
case PLSE:
return plse_gradient(x);
case STAIR:
return stair_gradient(x);
case HARDTAN:
return hardtan_gradient(x);
case LHTAN:
return lhtan_gradient(x);
}
return 0;
}
/*
** 功能:计算激活函数对加权输入的导数,并乘以delta,得到当前层最终的delta(误差项)
** 输入: x 当前层的所有输出(维度为l.batch * l.out_c * l.out_w * l.out_h)
** n l.output的维度
** ACTIVATION 激活函数类型
** delta 当前层误差
*/
void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta)
{
int i;
for(i = 0; i < n; ++i){
delta[i] *= gradient(x[i], a);
}
}
3 activation_layer.h
#ifndef ACTIVATION_LAYER_H
#define ACTIVATION_LAYER_H
#include "activations.h"
#include "layer.h"
#include "network.h"
// 构造激活层函数
layer make_activation_layer(int batch, int inputs, ACTIVATION activation);
// 激活层前向传播函数
void forward_activation_layer(layer l, network net);
// 激活层后向传播函数
void backward_activation_layer(layer l, network net);
#ifdef GPU
void forward_activation_layer_gpu(layer l, network net);
void backward_activation_layer_gpu(layer l, network net);
#endif
#endif
4 activation_layer.c
#include "activation_layer.h"
#include "utils.h"
#include "cuda.h"
#include "blas.h"
#include "gemm.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// 构造激活层
layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
{
layer l = {0};
l.type = ACTIVE;
l.inputs = inputs;
l.outputs = inputs;
l.batch = batch;
// 分配内存空间:元素的数目和每个元素的大小
l.output = calloc(batch*inputs, sizeof(float*));
l.delta = calloc(batch*inputs, sizeof(float*));
l.forward = forward_activation_layer;
l.backward = backward_activation_layer;
#ifdef GPU
l.forward_gpu = forward_activation_layer_gpu;
l.backward_gpu = backward_activation_layer_gpu;
l.output_gpu = cuda_make_array(l.output, inputs*batch);
l.delta_gpu = cuda_make_array(l.delta, inputs*batch);
#endif
l.activation = activation;
fprintf(stderr, "Activation Layer: %d inputs\n", inputs);
return l;
}
// 激活层前向传播
void forward_activation_layer(layer l, network net)
{
copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1);
activate_array(l.output, l.outputs*l.batch, l.activation);
}
// 激活层后向传播
void backward_activation_layer(layer l, network net)
{
gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1);
}
#ifdef GPU
void forward_activation_layer_gpu(layer l, network net)
{
copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1);
activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation);
}
void backward_activation_layer_gpu(layer l, network net)
{
gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1);
}
#endif