l = parse_convolutional(options, params);函数主体如下所示
//进行卷积层操作,最后返回layer类型的结构体
convolutional_layer parse_convolutional(list *options, size_params params)
{
int n = option_find_int(options, "filters",1);//设置卷积核个数,同时也是输出特征图个数
int size = option_find_int(options, "size",1);//设置卷积核尺寸
int stride = option_find_int(options, "stride",1);//设置卷积步长
int pad = option_find_int_quiet(options, "pad",0);//设置卷积方式,=1,即是same填充
int padding = option_find_int_quiet(options, "padding",0);//填充
int groups = option_find_int_quiet(options, "groups", 1);//分组数
if(pad) padding = size/2;//如果pad设置了,那么padding将边缘填充卷积核尺寸的一半,这里向下取整
char *activation_s = option_find_str(options, "activation", "logistic");//选择激活函数,yolo使用的一般是leaky激活
ACTIVATION activation = get_activation(activation_s);//设置选择的激活函数
int batch,h,w,c;//设置高宽通道并进行赋值初始化
h = params.h;
w = params.w;
c = params.c;
batch=params.batch;//一次的图像数量
if(!(h && w && c)) error("Layer before convolutional layer must output image.");//若有一个为0则报错
int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);//是否进行批标准化处理
int binary = option_find_int_quiet(options, "binary", 0);//是否对权重进行二值化,默认为0
int xnor = option_find_int_quiet(options, "xnor", 0);//是否对权重以及输入进行二值化,默认为0
//创建网络架构,具体参考[博客](https://blog.csdn.net/m0_37799466/article/details/106051226)
convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,groups,size,stride,padding,activation, batch_normalize, binary, xnor, params.net->adam);
layer.flipped = option_find_int_quiet(options, "flipped", 0);//图像翻转
layer.dot = option_find_float_quiet(options, "dot", 0);//点乘
return layer;
}
convolutional_layer结构体,即layer
struct layer{
LAYER_TYPE type;
ACTIVATION activation;
COST_TYPE cost_type;
void (*forward) (struct layer, struct network);//声明了函数指针,指针forward等都是指向函数的指针
void (*backward) (struct layer, struct network);
void (*update) (struct layer, update_args);
void (*forward_gpu) (struct layer, struct network);
void (*backward_gpu) (struct layer, struct network);
void (*update_gpu) (struct layer, update_args);
int batch_normalize;
int shortcut;
int batch;
int forced;
int flipped;
int inputs;
int outputs;
int nweights;
int nbiases;
int extra;
int truths;
int h,w,c;
int out_h, out_w, out_c;
int n;
int max_boxes;
int groups;
int size;
int side;
int stride;
int reverse;
int flatten;
int spatial;
int pad;
int sqrt;
int flip;
int index;
int binary;
int xnor;
int steps;
int hidden;
int truth;
float smooth;
float dot;
float angle;
float jitter;
float saturation;
float exposure;
float shift;
float ratio;
float learning_rate_scale;
float clip;
int noloss;
int softmax;
int classes;
int coords;
int background;
int rescore;
int objectness;
int joint;
int noadjust;
int reorg;
int log;
int tanh;
int *mask;
int total;
float alpha;
float beta;
float kappa;
float coord_scale;
float object_scale;
float noobject_scale;
float mask_scale;
float class_scale;
int bias_match;
int random;
float ignore_thresh;
float truth_thresh;
float thresh;
float focus;
int classfix;
int absolute;
int onlyforward;
int stopbackward;
int dontload;
int dontsave;
int dontloadscales;
int numload;
float temperature;
float probability;
float scale;
char * cweights;
int * indexes;
int * input_layers;
int * input_sizes;
int * map;
int * counts;
float ** sums;
float * rand;
float * cost;
float * state;
float * prev_state;
float * forgot_state;
float * forgot_delta;
float * state_delta;
float * combine_cpu;
float * combine_delta_cpu;
float * concat;
float * concat_delta;
float * binary_weights;
float * biases;
float * bias_updates;
float * scales;
float * scale_updates;
float * weights;
float * weight_updates;
float * delta;
float * output;
float * loss;
float * squared;
float * norms;
float * spatial_mean;
float * mean;
float * variance;
float * mean_delta;
float * variance_delta;
float * rolling_mean;
float * rolling_variance;
float * x;
float * x_norm;
float * m;
float * v;
float * bias_m;
float * bias_v;
float * scale_m;
float * scale_v;
float *z_cpu;
float *r_cpu;
float *h_cpu;
float * prev_state_cpu;
float *temp_cpu;
float *temp2_cpu;
float *temp3_cpu;
float *dh_cpu;
float *hh_cpu;
float *prev_cell_cpu;
float *cell_cpu;
float *f_cpu;
float *i_cpu;
float *g_cpu;
float *o_cpu;
float *c_cpu;
float *dc_cpu;
float * binary_input;
struct layer *input_layer;
struct layer *self_layer;
struct layer *output_layer;
struct layer *reset_layer;
struct layer *update_layer;
struct layer *state_layer;
struct layer *input_gate_layer;
struct layer *state_gate_layer;
struct layer *input_save_layer;
struct layer *state_save_layer;
struct layer *input_state_layer;
struct layer *state_state_layer;
struct layer *input_z_layer;
struct layer *state_z_layer;
struct layer *input_r_layer;
struct layer *state_r_layer;
struct layer *input_h_layer;
struct layer *state_h_layer;
struct layer *wz;
struct layer *uz;
struct layer *wr;
struct layer *ur;
struct layer *wh;
struct layer *uh;
struct layer *uo;
struct layer *wo;
struct layer *uf;
struct layer *wf;
struct layer *ui;
struct layer *wi;
struct layer *ug;
struct layer *wg;
tree *softmax_tree;
size_t workspace_size;
#ifdef GPU
int *indexes_gpu;
float *z_gpu;
float *r_gpu;
float *h_gpu;
float *temp_gpu;
float *temp2_gpu;
float *temp3_gpu;
float *dh_gpu;
float *hh_gpu;
float *prev_cell_gpu;
float *cell_gpu;
float *f_gpu;
float *i_gpu;
float *g_gpu;
float *o_gpu;
float *c_gpu;
float *dc_gpu;
float *m_gpu;
float *v_gpu;
float *bias_m_gpu;
float *scale_m_gpu;
float *bias_v_gpu;
float *scale_v_gpu;
float * combine_gpu;
float * combine_delta_gpu;
float * prev_state_gpu;
float * forgot_state_gpu;
float * forgot_delta_gpu;
float * state_gpu;
float * state_delta_gpu;
float * gate_gpu;
float * gate_delta_gpu;
float * save_gpu;
float * save_delta_gpu;
float * concat_gpu;
float * concat_delta_gpu;
float * binary_input_gpu;
float * binary_weights_gpu;
float * mean_gpu;
float * variance_gpu;
float * rolling_mean_gpu;
float * rolling_variance_gpu;
float * variance_delta_gpu;
float * mean_delta_gpu;
float * x_gpu;
float * x_norm_gpu;
float * weights_gpu;
float * weight_updates_gpu;
float * weight_change_gpu;
float * biases_gpu;
float * bias_updates_gpu;
float * bias_change_gpu;
float * scales_gpu;
float * scale_updates_gpu;
float * scale_change_gpu;
float * output_gpu;
float * loss_gpu;
float * delta_gpu;
float * rand_gpu;
float * squared_gpu;
float * norms_gpu;
#ifdef CUDNN
cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc;
cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc;
cudnnTensorDescriptor_t normTensorDesc;
cudnnFilterDescriptor_t weightDesc;
cudnnFilterDescriptor_t dweightDesc;
cudnnConvolutionDescriptor_t convDesc;
cudnnConvolutionFwdAlgo_t fw_algo;
cudnnConvolutionBwdDataAlgo_t bd_algo;
cudnnConvolutionBwdFilterAlgo_t bf_algo;
#endif
#endif
};
ACTIVATION;枚举类型
typedef enum{
LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU
} ACTIVATION;
ACTIVATION activation = get_activation(activation_s);函数默认为RELU
ACTIVATION get_activation(char *s)
{
if (strcmp(s, "logistic")==0) return LOGISTIC;
if (strcmp(s, "loggy")==0) return LOGGY;
if (strcmp(s, "relu")==0) return RELU;
if (strcmp(s, "elu")==0) return ELU;
if (strcmp(s, "selu")==0) return SELU;
if (strcmp(s, "relie")==0) return RELIE;
if (strcmp(s, "plse")==0) return PLSE;
if (strcmp(s, "hardtan")==0) return HARDTAN;
if (strcmp(s, "lhtan")==0) return LHTAN;
if (strcmp(s, "linear")==0) return LINEAR;
if (strcmp(s, "ramp")==0) return RAMP;
if (strcmp(s, "leaky")==0) return LEAKY;
if (strcmp(s, "tanh")==0) return TANH;
if (strcmp(s, "stair")==0) return STAIR;
fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s);
return RELU;
}