YOLO 卷积层代码学习

卷积层的初始化

void im2col_cpu(float* data_im,
     int channels,  int height,  int width,
     int ksize,  int stride, int pad, float* data_col) 
{
    int c,h,w;
    int height_col = (height - ksize) / stride + 1;
    int width_col = (width - ksize) / stride + 1;
    if (pad){
        height_col = 1 + (height-1) / stride;
        width_col = 1 + (width-1) / stride;
        pad = ksize/2;
    }
    int channels_col = channels * ksize * ksize;
    for (c = 0; c < channels_col; ++c) {
        //下面的三个坐标表示在卷积核里面的位置。存储是一个通道,一行一行的存储。
        //因此wide_offset是ksize的取余,h_offset是ksize(核的宽)的倍数然后对ksize(每个核的高)的取余。
        //c_im,每个核的通道为ksize*ksize。
        int w_offset = c % ksize;
        int h_offset = (c / ksize) % ksize;
        int c_im = c / ksize / ksize;
        for (h = 0; h < height_col; ++h) {
            for (w = 0; w < width_col; ++w) {
                int im_row = h_offset + h * stride;
                int im_col = w_offset + w * stride;
                int col_index = (c * height_col + h) * width_col + w;
                data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,
                        im_row, im_col, c_im, pad);
            }
        }
    }
}

convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, int batch_normalize, int binary)
{
    int i;
    convolutional_layer l = {0};
    l.type = CONVOLUTIONAL;//层的类别

    l.h = h;//输入的featuremap的高
    l.w = w;//输入的featuremap的宽
    l.c = c;//输入的featuremap的通道数
    l.n = n;//卷积核的个数
    l.binary = binary;
    l.batch = batch;//每次输入的样本的个数
    l.stride = stride;//卷积运算的跳跃
    l.size = size;//卷积核的大小
    l.pad = pad;//是在featuremap上下左右扩展,pad等于0,就不增加,等于1,增加一个l.size的大小
    /*这小段代码是yolo计算输出featuremap高度的过程
        if (!l.pad) h -= l.size;
        else h -= 1;
        return h/l.stride + 1;
    */
    l.batch_normalize = batch_normalize;

    l.filters = calloc(c*n*size*size, sizeof(float));//卷积核
    l.filter_updates = calloc(c*n*size*size, sizeof(float));//卷积核的梯度

    l.biases = calloc(n, sizeof(float));//偏置
    l.bias_updates = calloc(n, sizeof(float));//偏置的梯度

    // float scale = 1./sqrt(size*size*c);
    float scale = sqrt(2./(size*size*c));
    for(i = 0; i < c*n*size*size; ++i) l.filters[i] = scale*rand_uniform(-1, 1);
    int out_h = convolutional_out_height(l);//输出的featuremap高
    int out_w = convolutional_out_width(l);//输出的featuremap宽
    l.out_h = out_h;
    l.out_w = out_w;
    l.out_c = n;//输出featuremap的通道数
    l.outputs = l.out_h * l.out_w * l.out_c;//单个样本输出的值
    l.inputs = l.w * l.h * l.c;//输入

    l.col_image = calloc(out_h * out_w * size * size * c, sizeof(float));//存放输入featuremap转化为col的指针
    l.output = calloc(l.batch * out_h * out_w * n, sizeof(float));//所有样本输出
    l.delta  = calloc(l.batch * out_h * out_w * n, sizeof(float));

    if(binary){
        l.binary_filters = calloc(c*n*size*size, sizeof(float));
        l.cfilters = calloc(c*n*size*size, sizeof(char));
        l.scales = calloc(n, sizeof(float));
    }

    if(batch_normalize){
        l.scales = calloc(n, sizeof(float));
        l.scale_updates = calloc(n, sizeof(float));
        for(i = 0; i < n; ++i){
            l.scales[i] = 1;
        }

        l.mean = calloc(n, sizeof(float));
        l.variance = calloc(n, sizeof(float));

        l.rolling_mean = calloc(n, sizeof(float));
        l.rolling_variance = calloc(n, sizeof(float));
    }

#ifdef GPU
    l.filters_gpu = cuda_make_array(l.filters, c*n*size*size);
    l.filter_updates_gpu = cuda_make_array(l.filter_updates, c*n*size*size);

    l.biases_gpu = cuda_make_array(l.biases, n);
    l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);

    l.scales_gpu = cuda_make_array(l.scales, n);
    l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);

    l.col_image_gpu = cuda_make_array(l.col_image, out_h*out_w*size*size*c);
    l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
    l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);

    if(binary){
        l.binary_filters_gpu = cuda_make_array(l.filters, c*n*size*size);
    }

    if(batch_normalize){
        l.mean_gpu = cuda_make_array(l.mean, n);
        l.variance_gpu = cuda_make_array(l.variance, n);

        l.rolling_mean_gpu = cuda_make_array(l.mean, n);
        l.rolling_variance_gpu = cuda_make_array(l.variance, n);

        l.mean_delta_gpu = cuda_make_array(l.mean, n);
        l.variance_delta_gpu = cuda_make_array(l.variance, n);

        l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
        l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
    }
#endif
    l.activation = activation;//激活函数的类型

    fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);

    return l;
}

正向传播

void forward_convolutional_layer(convolutional_layer l, network_state state)
{
    int out_h = convolutional_out_height(l);//输出的featuremap的高
    int out_w = convolutional_out_width(l); //输出的featuremap的宽
    int i;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);
    /*
    if(l.binary){
        binarize_filters(l.filters, l.n, l.c*l.size*l.size, l.binary_filters);
        binarize_filters2(l.filters, l.n, l.c*l.size*l.size, l.cfilters, l.scales);
        swap_binary(&l);
    }
    */

    if(l.binary){
        int m = l.n;
        int k = l.size*l.size*l.c;
        int n = out_h*out_w;

        char  *a = l.cfilters;
        float *b = l.col_image;
        float *c = l.output;

        for(i = 0; i < l.batch; ++i){
            im2col_cpu(state.input, l.c, l.h, l.w, 
                    l.size, l.stride, l.pad, b);
            gemm_bin(m,n,k,1,a,k,b,n,c,n);
            c += n*m;
            state.input += l.c*l.h*l.w;
        }
        scale_bias(l.output, l.scales, l.batch, l.n, out_h*out_w);
        add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);
        activate_array(l.output, m*n*l.batch, l.activation);
        return;
    }

    int m = l.n;//卷积核的个数
    int k = l.size*l.size*l.c;//每个卷积核的维度(大小)
    int n = out_h*out_w;//输出featuremap的维度(大小)

    float *a = l.filters;
    float *b = l.col_image;
    float *c = l.output;

    for(i = 0; i < l.batch; ++i){//batch依次的操作
        im2col_cpu(state.input, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, b);
        gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);//利用矩阵乘法实现卷积操作
        c += n*m;
        state.input += l.c*l.h*l.w;
    }

    if(l.batch_normalize){//不清楚这个是要做什么
        if(state.train){
            mean_cpu(l.output, l.batch, l.n, l.out_h*l.out_w, l.mean);   
            variance_cpu(l.output, l.mean, l.batch, l.n, l.out_h*l.out_w, l.variance);   
            normalize_cpu(l.output, l.mean, l.variance, l.batch, l.n, l.out_h*l.out_w);   
        } else {
            normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.n, l.out_h*l.out_w);
        }
        scale_bias(l.output, l.scales, l.batch, l.n, out_h*out_w);
    }
    add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);//添加偏置

    activate_array(l.output, m*n*l.batch, l.activation);//激活函数
}

反向传播

void backward_convolutional_layer(convolutional_layer l, network_state state)
{
    int i;
    int m = l.n;
    int n = l.size*l.size*l.c;
    int k = convolutional_out_height(l)*
        convolutional_out_width(l);
    //这个函数实现的是返回激活函数导数与后面发现传播回来的误差的乘积
    gradient_array(l.output, m*k*l.batch, l.activation, l.delta);
    //求bias的梯度
    backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);

    for(i = 0; i < l.batch; ++i){
        //a的维度是l.n*(height_col * width_col)也就是m*k
        //b是输入的featuremap维度是(channels*ksize*ksize)*(height_col*width_col)也就是n*k
        //c是卷积核维度是(l.n)*(channels*ksize*ksize)也就是m*n
        //因此c = a*(b')
        float *a = l.delta + i*m*k;//表示反向传播回来的值,现在是要通过卷积层
        float *b = l.col_image;
        float *c = l.filter_updates;

        float *im = state.input+i*l.c*l.h*l.w;

        im2col_cpu(im, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, b);
        gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);//这个函数返回卷积核的梯度

        if(state.delta){
            //a是卷积核维度是(l.n)*(channels*ksize*ksize)也就是m*n
            //b的维度是l.n*(height_col * width_col)也就是m*k
            //c是输入的featuremap维度是(channels*ksize*ksize)*(height_col*width_col)也就是n*k
            //计算是c = a'*b
            a = l.filters;
            b = l.delta + i*m*k;
            c = l.col_image;//专门存储image转化成col的指针
            //计算返回到前面一层的误差,返回的大小跟输入进去的大小是一致的。
            gemm(1,0,n,k,m,1,a,n,b,k,0,c,k);

            col2im_cpu(l.col_image, l.c,  l.h,  l.w,  l.size,  l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
        }
    }
}

这里写图片描述

  • 2
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 6
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值