【代码阅读】darknet源码阅读（九）:col2im.h 和 col2im.c

最新推荐文章于 2023-10-20 17:39:20 发布

花噜噜酱

最新推荐文章于 2023-10-20 17:39:20 发布

阅读量534

点赞数

分类专栏： darknet代码阅读文章标签： c++

本文链接：https://blog.csdn.net/weixin_38715903/article/details/106636903

版权

darknet代码阅读专栏收录该内容

9 篇文章 2 订阅

订阅专栏

参考文献依然是放前面：https://blog.csdn.net/caicaiatnbu/category_9096319.html

darknet版本： https://github.com/AlexeyAB/darknet，与原始的版本还是有一点区别的。

因为第一次读源码，我就直接按照参考文献的顺序来了，到时候再查漏补缺，加油！

【只分析原理，弄明白具体的功能，就不去看gpu了，等以后有时间再补.cu文件】

首先你要弄明白一个原理：im2col和col2im的原理，链接如下

https://blog.csdn.net/u013066730/article/details/86489139?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-4.nonecase&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-4.nonecase

其次，要先去看一下im2col的代码，链接如下：

https://blog.csdn.net/weixin_38715903/article/details/105978855

最后开始看代码

1.col2im.h

#ifndef COL2IM_H
#define COL2IM_H

#ifdef __cplusplus
extern "C" {
#endif
void col2im_cpu(float* data_col,
        int channels, int height, int width,
        int ksize, int stride, int pad, float* data_im);

void col2im_cpu_ext(const float* data_col, const int channels,
    const int height, const int width, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w,
    const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w,
    float* data_im);

#ifdef GPU
void col2im_ongpu(float *data_col,
        int channels, int height, int width,
        int ksize, int stride, int pad, float *data_im);


void col2im_gpu_ext(const float* data_col, const int channels,
    const int height, const int width, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w, const int stride_h,
    const int stride_w, const int dilation_h, const int dilation_w,
    float* data_im);
#endif
#ifdef __cplusplus
}
#endif
#endif

2.col2im.c

#include <stdio.h>
#include <math.h>
#include <string.h>
#include "col2im.h"

/**
 * 将输入图像im的channel通道上的第row行,col列像素灰度值加上val(直接修改im的值,因此im相当于是返回值)
 * @param im 输入图像
 * @param height 输入图像的高度
 * @param width 宽度
 * @param channels 通道数
 * @param row 需要加上val的像素所在的行数
 * @param col 需要加上val的像素所在的列数
 * @param channel 需要加上val的像素所在的通道数
 * @param pad 补0的长度
 * @param val 像素灰度值
 */

void col2im_add_pixel(float *im, int height, int width, int channels,
                        int row, int col, int channel, int pad, float val)
{
    //只在img部分上修改像素值，所以要减去pad的值，获得padding之前的宽高值
    row -= pad;
    col -= pad;

    //限定大小满足条件
    if (row < 0 || col < 0 ||
        row >= height || col >= width) return;
    //对图片上的每一个值进行像素val的添加
    im[col + width*(row + height*channel)] += val;
}
//This one might be too, can't remember.


/**
 * 此函数与im2col_cpu函数的流程相反,目的是将im2col_cpu()函数重排得到的图片data_col恢复至正常的图像矩阵形式,并与data_im相加,
 * 最终data_im相当于输出值, 需要注意的是, data_im的尺寸是在函数外确定的,且并没有显示的将data_col转为
 * 
 * @param data_col backward_convolutional_layer()中计算得到包含上一层所有敏感度信息的矩阵,
 *                      行数 l.n * l.size * l.size (l.代表本层)
 *                      列数l.out_h * l.out_w 
 * @param channels 当前层输入图像的通道数 
 * @param height 当前层输入图像的行数 
 * @param width  当前层输入图像的列数 
 * @param ksize  当前层卷积层尺寸 
 * @param stride 当前层卷积步幅 
 * @param pad    当前层对输入图像卷积时补0的长度 
 * @param data_im 经过col2im_cpu()重新恢复之后得到的输出矩阵,也即上一层的敏感图 l.c * l.h * l.w
 */

//明确一下：col矩阵式为了方便进行卷积操作而转换图片得到的。
//https://blog.csdn.net/weixin_38715903/article/details/105978855 解析
//col矩阵直接与卷积核相乘得到的是相应的卷积输出，每一列就是相应的一组点乘操作的数据
//现在目的是为了将得到的特征或是梯度，反映射至原始输入特征上去
void col2im_cpu(float* data_col,
         int channels,  int height,  int width,
         int ksize,  int stride, int pad, float* data_im)
{
    int c,h,w;
    //padding之后的尺寸计算，也就是我们需要填内容的im经过卷积操作之后得到的特征大小
    //也就是得到进行卷积操作时，分别在输入特征图上，向右向下的位移次数
    //以im:[5,5] kernel:[3,3] stride[2] pad[0]为例，最后的输出是[2,2]
    //也可以知道，kernel分别向右向下移动了[2,2]次
    int height_col = (height + 2*pad - ksize) / stride + 1;
    int width_col = (width + 2*pad - ksize) / stride + 1;

    int channels_col = channels * ksize * ksize;//整体col的行数

    for (c = 0; c < channels_col; ++c) {
        //获得在im中对应的位置，比如说3*3的卷积，就是找出col对应在这九个位置中的哪一个
        //比如说col的第一行是im中9个位置的（0,0）位，那么w_offset=0，h_offset=0
        int w_offset = c % ksize;
        int h_offset = (c / ksize) % ksize;
        //找出它是属于im中哪一个channels的【c/9】
        int c_im = c / ksize / ksize;
        for (h = 0; h < height_col; ++h) {
            for (w = 0; w < width_col; ++w) {
                //找到映射到im中对应的几何像素点，行，列的坐标
                //如果看懂了im2col的原理，这里应该比较容易懂，我就不解释了
                int im_row = h_offset + h * stride;
                int im_col = w_offset + w * stride;
                //为了取出每一个col中的特征值，先计算特征值存放在col一维数组中的位置
                int col_index = (c * height_col + h) * width_col + w;
                float val = data_col[col_index];
                //得到相应的像素值，把我们在col中的像素值抽取出来，放到im中去与im中原有值相加
                col2im_add_pixel(data_im, height, width, channels,
                        im_row, im_col, c_im, pad, val);
            }
        }
    }
}
// ----------------------------------------
//构建Y矩阵空间，并赋初值为0
//如果想要的初值不为0，就对Y逐值赋值
void caffe_set(const int N, const float alpha, float* Y) {
    if (alpha == 0) {
        memset(Y, 0, sizeof(float) * N);  // NOLINT(caffe/alt_fn)
        return;
    }
    int i;
    for (i = 0; i < N; ++i) {
        Y[i] = alpha;
    }
}

//判断a<b
inline static int is_a_ge_zero_and_a_lt_b(int a, int b) {
    return (unsigned)(a) < (unsigned)(b);
}

// https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cpp
void col2im_cpu_ext(const float* data_col, const int channels,
    const int height, const int width, const int kernel_h, const int kernel_w,
    const int pad_h, const int pad_w,
    const int stride_h, const int stride_w,
    const int dilation_h, const int dilation_w,
    float* data_im)
{
    //给定一个data_im的空间
    caffe_set(height * width * channels, 0.0F, data_im);
    const int output_h = (height + 2 * pad_h -
        (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
    const int output_w = (width + 2 * pad_w -
        (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
    const int channel_size = height * width;

    int channel, kernel_row, kernel_col, output_rows, output_col;
    for (channel = channels; channel--; data_im += channel_size) {
        for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
            for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
                int input_row = -pad_h + kernel_row * dilation_h;
                for (output_rows = output_h; output_rows; output_rows--) {
                    if (!is_a_ge_zero_and_a_lt_b(input_row, height)) {
                        data_col += output_w;
                    }
                    else {
                        int input_col = -pad_w + kernel_col * dilation_w;
                        for (output_col = output_w; output_col; output_col--) {
                            if (is_a_ge_zero_and_a_lt_b(input_col, width)) {
                                data_im[input_row * width + input_col] += *data_col;
                            }
                            data_col++;
                            input_col += stride_w;
                        }
                    }
                    input_row += stride_h;
                }
            }
        }
    }
}

花噜噜酱

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
【代码阅读】darknet源码阅读（九）:col2im.h 和 col2im.c

参考文献依然是放前面：https://blog.csdn.net/caicaiatnbu/category_9096319.htmldarknet版本： https://github.com/AlexeyAB/darknet，与原始的版本还是有一点区别的。因为第一次读源码，我就直接按照参考文献的顺序来了，到时候再查漏补缺，加油！【只分析原理，弄明白具体的功能，就不去看gpu了，等以后有时间再补.cu文件】1.col2im.h#ifndef COL2IM_H#define COL2IM
复制链接

扫一扫

专栏目录