1、在parser.c文件里其在声明dropout层时,直接制定了其输出层的内存地址是上一层的输出地址,代码如下:
#ifndef pca
fprintf(stderr, "layer filters size input output\n");
#else
fprintf(stderr, "加载数据!!");
#endif
while(n){
params.index = count;
// fprintf(stderr, "%5d ", count);
s = (section *)n->val;
options = s->options;
layer l = {0};
LAYER_TYPE lt = string_to_layer_type(s->type);
if(lt == CONVOLUTIONAL){
l = parse_convolutional(options, params);
}else if(lt == LOCAL){
l = parse_local(options, params);
}else if(lt == ACTIVE){
l = parse_activation(options, params);
}else if(lt == RNN){
l = parse_rnn(options, params);
}else if(lt == GRU){
l = parse_gru(options, params);
}else if(lt == CRNN){
l = parse_crnn(options, params);
}else if(lt == CONNECTED){
l = parse_connected(options, params);
}else if(lt == CROP){
l = parse_crop(options, params);
}else if(lt == COST){
l = parse_cost(options, params);
}else if(lt == REGION){
l = parse_region(options, params);
}else if(lt == DETECTION){
l = parse_detection(options, params);
}else if(lt == SOFTMAX){
l = parse_softmax(options, params);
net.hierarchy = l.softmax_tree;
}else if(lt == NORMALIZATION){
l = parse_normalization(options, params);
}else if(lt == BATCHNORM){
l = parse_batchnorm(options, params);
}else if(lt == MAXPOOL){
l = parse_maxpool(options, params);
}else if(lt == REORG){
l = parse_reorg(options, params);
}else if(lt == AVGPOOL){
l = parse_avgpool(options, params);
}else if(lt == ROUTE){
l = parse_route(options, params, net);
}else if(lt == SHORTCUT){
l = parse_shortcut(options, params, net);
}else if(lt == DROPOUT) {
l = parse_dropout(options, params);
l.output = net.layers[count-1].output;
l.delta = net.layers[count-1].delta;
而其前向传播代码在dropout_layer.c里:
void forward_dropout_layer(dropout_layer l, network_state state)
{
int i;
if (!state.train) return;
for(i = 0; i < l.batch * l.inputs; ++i){
float r = rand_uniform(0, 1);
l.rand[i] = r;
if(r < l.probability)
//把神经元赋值为0,则是相当于dropout了
state.input[i] = 0;
else
//这里是把没有抑制的进行rescale,网上说是为了保证原来网络的归一化特性。
state.input[i] *= l.scale;
}
}
这里的state.input跟l.output的内存地址是一至的。因为在整体网络前向传播的代码里,可以知道state.input的地址是上一层网络的输出值地址,在network.c如下:
void forward_network(network net, network_state state)
{
state.workspace = net.workspace;
int i;
for (i = 0; i < net.n; ++i){
state.index = i;
layer l = net.layers[i]; //开始新的网络读取
if (l.delta){
scal_cpu(l.outputs * l.batch, 0, l.delta, 1);
}
l.forward(l, state);
//把上一层l的输出作为下一层网络state的输入,这里的input跟上一层的output是同地址,这是内存地址赋予另一变量
state.input = l.output;
}
}