这个头文件里总共有以下几个函数
ACTIVATION get_activation(char *s);
char *get_activation_string(ACTIVATION a);
float activate(float x, ACTIVATION a);
float gradient(float x, ACTIVATION a);
void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta);
void activate_array(float *x, const int n, const ACTIVATION a);
#ifdef GPU
void activate_array_gpu(float *x, int n, ACTIVATION a);
void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta);
#endif
其中ACTIVATION是枚举类型,定义了各种激活函数
typedef enum{
LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU
} ACTIVATION;
接下来通过inline快速实现一些简单的激活函数计算
static inline float linear_activate(float x){return x;}
static inline float logistic_activate(float x){return 1./(1. + exp(-x));}
static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;}
static inline float relu_activate(float x){return x*(x>0);}
static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
static inline float selu_activate(float x){return (x >= 0)*1.0507*x + (x < 0)*1.0507*1.6732*(exp(x)-1);}
static inline float relie_activate(float x){return (x>0) ? x : .01*x;}
static inline float ramp_activate(float x){return x*(x>0)+.1*x;}
static inline float leaky_activate(float x){return (x>0) ? x : .1*x;}
static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
然后实现统一的激活函数计算
float activate(float x, ACTIVATION a)
{
switch(a){
case LINEAR:
return linear_activate(x);
case LOGISTIC:
return logistic_activate(x);
case LOGGY:
return loggy_activate(x);
case RELU:
return relu_activate(x);
case ELU:
return elu_activate(x);
case SELU:
return selu_activate(x);
case RELIE:
return relie_activate(x);
case RAMP:
return ramp_activate(x);
case LEAKY:
return leaky_activate(x);
case TANH:
return tanh_activate(x);
case PLSE:
return plse_activate(x);
case STAIR:
return stair_activate(x);
case HARDTAN:
return hardtan_activate(x);
case LHTAN:
return lhtan_activate(x);
}
return 0;
}
void activate_array(float *x, const int n, const ACTIVATION a)
{
int i;
for(i = 0; i < n; ++i){
x[i] = activate(x[i], a);
}
}
然后梯度的实现也是相同的
float gradient(float x, ACTIVATION a)
{
switch(a){
case LINEAR:
return linear_gradient(x);
case LOGISTIC:
return logistic_gradient(x);
case LOGGY:
return loggy_gradient(x);
case RELU:
return relu_gradient(x);
case ELU:
return elu_gradient(x);
case SELU:
return selu_gradient(x);
case RELIE:
return relie_gradient(x);
case RAMP:
return ramp_gradient(x);
case LEAKY:
return leaky_gradient(x);
case TANH:
return tanh_gradient(x);
case PLSE:
return plse_gradient(x);
case STAIR:
return stair_gradient(x);
case HARDTAN:
return hardtan_gradient(x);
case LHTAN:
return lhtan_gradient(x);
}
return 0;
}
然后去看activations_layer的实现
layer make_activation_layer(int batch, int inputs, ACTIVATION activation);
void forward_activation_layer(layer l, network net);
void backward_activation_layer(layer l, network net);
#ifdef GPU
void forward_activation_layer_gpu(layer l, network net);
void backward_activation_layer_gpu(layer l, network net);
#endif
layer和net是两个结构体
layer这个结构体东西相当多,net也是结构体,东西很多。
layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
{
layer l = {0};
l.type = ACTIVE;
l.inputs = inputs;
l.outputs = inputs;
l.batch=batch;
l.output = calloc(batch*inputs, sizeof(float*));
l.delta = calloc(batch*inputs, sizeof(float*));
l.forward = forward_activation_layer;
l.backward = backward_activation_layer;
#ifdef GPU
l.forward_gpu = forward_activation_layer_gpu;
l.backward_gpu = backward_activation_layer_gpu;
l.output_gpu = cuda_make_array(l.output, inputs*batch);
l.delta_gpu = cuda_make_array(l.delta, inputs*batch);
#endif
l.activation = activation;
fprintf(stderr, "Activation Layer: %d inputs\n", inputs);
return l;
}
void forward_activation_layer(layer l, network net)
{
copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1);
activate_array(l.output, l.outputs*l.batch, l.activation);
}
void backward_activation_layer(layer l, network net)
{
gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1);
}
#ifdef GPU
void forward_activation_layer_gpu(layer l, network net)
{
copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1);
activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation);
}
void backward_activation_layer_gpu(layer l, network net)
{
gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1);
}
#endif
重点看forward和backward
void forward_activation_layer(layer l, network net)
{
copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1);
activate_array(l.output, l.outputs*l.batch, l.activation);
}
void backward_activation_layer(layer l, network net)
{
gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1);
}
其中
void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
{
int i;
for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX];
}