卷积层的初始化
void im2col_cpu(float * data_im,
int channels, int height, int width,
int ksize, int stride, int pad, float * data_col)
{
int c,h,w;
int height_col = (height - ksize) / stride + 1 ;
int width_col = (width - ksize) / stride + 1 ;
if (pad){
height_col = 1 + (height-1 ) / stride;
width_col = 1 + (width-1 ) / stride;
pad = ksize/2 ;
}
int channels_col = channels * ksize * ksize;
for (c = 0 ; c < channels_col; ++c) {
int w_offset = c % ksize;
int h_offset = (c / ksize) % ksize;
int c_im = c / ksize / ksize;
for (h = 0 ; h < height_col; ++h) {
for (w = 0 ; w < width_col; ++w) {
int im_row = h_offset + h * stride;
int im_col = w_offset + w * stride;
int col_index = (c * height_col + h) * width_col + w;
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,
im_row, im_col, c_im, pad);
}
}
}
}
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, int batch_normalize, int binary)
{
int i;
convolutional_layer l = {0 };
l.type = CONVOLUTIONAL;
l.h = h;
l.w = w;
l.c = c;
l.n = n;
l.binary = binary;
l.batch = batch;
l.stride = stride;
l.size = size;
l.pad = pad;
l.batch_normalize = batch_normalize;
l.filters = calloc (c*n*size*size, sizeof (float ));
l.filter_updates = calloc (c*n*size*size, sizeof (float ));
l.biases = calloc (n, sizeof (float ));
l.bias_updates = calloc (n, sizeof (float ));
float scale = sqrt (2. /(size*size*c));
for (i = 0 ; i < c*n*size*size; ++i) l.filters[i] = scale*rand_uniform(-1 , 1 );
int out_h = convolutional_out_height(l);
int out_w = convolutional_out_width(l);
l.out_h = out_h;
l.out_w = out_w;
l.out_c = n;
l.outputs = l.out_h * l.out_w * l.out_c;
l.inputs = l.w * l.h * l.c;
l.col_image = calloc (out_h * out_w * size * size * c, sizeof (float ));
l.output = calloc (l.batch * out_h * out_w * n, sizeof (float ));
l.delta = calloc (l.batch * out_h * out_w * n, sizeof (float ));
if (binary){
l.binary_filters = calloc (c*n*size*size, sizeof (float ));
l.cfilters = calloc (c*n*size*size, sizeof (char ));
l.scales = calloc (n, sizeof (float ));
}
if (batch_normalize){
l.scales = calloc (n, sizeof (float ));
l.scale_updates = calloc (n, sizeof (float ));
for (i = 0 ; i < n; ++i){
l.scales[i] = 1 ;
}
l.mean = calloc (n, sizeof (float ));
l.variance = calloc (n, sizeof (float ));
l.rolling_mean = calloc (n, sizeof (float ));
l.rolling_variance = calloc (n, sizeof (float ));
}
#ifdef GPU
l.filters_gpu = cuda_make_array(l.filters, c*n*size*size);
l.filter_updates_gpu = cuda_make_array(l.filter_updates, c*n*size*size);
l.biases_gpu = cuda_make_array(l.biases, n);
l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);
l.scales_gpu = cuda_make_array(l.scales, n);
l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);
l.col_image_gpu = cuda_make_array(l.col_image, out_h*out_w*size*size*c);
l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
if (binary){
l.binary_filters_gpu = cuda_make_array(l.filters, c*n*size*size);
}
if (batch_normalize){
l.mean_gpu = cuda_make_array(l.mean, n);
l.variance_gpu = cuda_make_array(l.variance, n);
l.rolling_mean_gpu = cuda_make_array(l.mean, n);
l.rolling_variance_gpu = cuda_make_array(l.variance, n);
l.mean_delta_gpu = cuda_make_array(l.mean, n);
l.variance_delta_gpu = cuda_make_array(l.variance, n);
l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
}
#endif
l.activation = activation;
fprintf (stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n" , h,w,c,n, out_h, out_w, n);
return l;
}
正向传播
void forward_convolutional_layer(convolutional_layer l, network_state state)
{
int out_h = convolutional_out_height(l)
int out_w = convolutional_out_width(l)
int i
fill_cpu(l.outputs *l.batch , 0 , l.output , 1 )
if(l.binary ){
int m = l.n
int k = l.size *l.size *l.c
int n = out_h*out_w
char *a = l.cfilters
float *b = l.col _image
float *c = l.output
for(i = 0
im2col_cpu(state.input , l.c , l.h , l.w ,
l.size , l.stride , l.pad , b)
gemm_bin(m,n,k,1 ,a,k,b,n,c,n)
c += n*m
state.input += l.c *l.h *l.w
}
scale_bias(l.output , l.scales , l.batch , l.n , out_h*out_w)
add_bias(l.output , l.biases , l.batch , l.n , out_h*out_w)
activate_array(l.output , m*n*l.batch , l.activation )
return
}
int m = l.n
int k = l.size *l.size *l.c
int n = out_h*out_w
float *a = l.filters
float *b = l.col _image
float *c = l.output
for(i = 0
im2col_cpu(state.input , l.c , l.h , l.w ,
l.size , l.stride , l.pad , b)
gemm(0 ,0 ,m,n,k,1 ,a,k,b,n,1 ,c,n)
c += n*m
state.input += l.c *l.h *l.w
}
if(l.batch _normalize){//不清楚这个是要做什么
if(state.train ){
mean_cpu(l.output , l.batch , l.n , l.out _h*l.out _w, l.mean )
variance_cpu(l.output , l.mean , l.batch , l.n , l.out _h*l.out _w, l.variance )
normalize_cpu(l.output , l.mean , l.variance , l.batch , l.n , l.out _h*l.out _w)
} else {
normalize_cpu(l.output , l.rolling _mean, l.rolling _variance, l.batch , l.n , l.out _h*l.out _w)
}
scale_bias(l.output , l.scales , l.batch , l.n , out_h*out_w)
}
add_bias(l.output , l.biases , l.batch , l.n , out_h*out_w)
activate_array(l.output , m*n*l.batch , l.activation )
}
反向传播
void backward_convolutional_layer(convolutional_layer l, network_state state)
{
int i;
int m = l.n;
int n = l.size *l .size *l .c;
int k = convolutional_out_height(l)*
convolutional_out_width(l);
gradient_array(l.output, m*k *l .batch, l.activation, l.delta);
backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);
for (i = 0 ; i < l.batch; ++i){
float *a = l.delta + i*m *k ;
float *b = l.col_image;
float *c = l.filter_updates;
float *im = state.input+i*l .c*l .h*l .w;
im2col_cpu(im, l.c, l.h, l.w,
l.size , l.stride, l.pad, b);
gemm(0 ,1 ,m,n,k,1 ,a,k,b,k,1 ,c,n);
if (state.delta){
a = l.filters;
b = l.delta + i*m *k ;
c = l.col_image;
gemm(1 ,0 ,n,k,m,1 ,a,n,b,k,0 ,c,k);
col2im_cpu(l.col_image, l.c, l.h, l.w, l.size , l.stride, l.pad, state.delta+i*l .c*l .h*l .w);
}
}
}