im2col总结

最新推荐文章于 2023-09-19 17:53:10 发布

ChunyeLi

最新推荐文章于 2023-09-19 17:53:10 发布

阅读量640

点赞数 2

分类专栏：深度学习 C 文章标签：深度学习 caffe

本文链接：https://blog.csdn.net/sinat_32512123/article/details/117219782

版权

深度学习同时被 2 个专栏收录

28 篇文章 3 订阅

订阅专栏

4 篇文章 0 订阅

订阅专栏

本文详细解读了Darknet库中的im2col函数，它用于将图像矩阵转换为便于矩阵相乘的列式布局，便于理解卷积过程。通过实例说明，展示了如何通过im2col进行元素提取和数组格式转换，以支持复杂卷积操作。

摘要由CSDN通过智能技术生成

目标

darknet中im2col.c详细解析

具体数值举例

目标

为了将稍复杂的卷积相乘转化为“平摊式”的矩阵相乘，先使用im2col完成数组格式的转化，便于之后的矩阵相乘计算。

darknet中im2col.c详细解析

按行存储。

#include "im2col.h"
#include <stdio.h>
/**
*<依据指定行、列、通道数提取输入图像矩阵中对应的元素>
*param *im 输入图像元素的一维数据，每通道按行存储，再将多通道依次拼接而成
*param height 每通道的高度（不加padding）
*param width 每通道的宽度（不加padding）
*param channels 通道数
*param row 提取指定元素的所在行（加padding）
*param col 提取指定元素的所在列（加padding）
*param channel 提取指定元素的所在通道
*param pad padding值
*return float类型，已知图像数据（宽width，高height,通道channels）,
*       返回指定row行、col列、通道channel的元素值
*/
float im2col_get_pixel(float *im, int height, int width, int channels,
                        int row, int col, int channel, int pad)
{
    row -= pad;
    col -= pad;
    //因为row和col都是加了padding后的数据，所以需要减去padding

    if (row < 0 || col < 0 ||
        row >= height || col >= width) return 0;
    //当row或col小于0；或者大于原图片的高和宽时，将元素值设为0；即padding的补0操作

    return im[col + width*(row + height*channel)];
}


//From Berkeley Vision's Caffe!
//https://github.com/BVLC/caffe/blob/master/LICENSE
/**
*<图片数据的数组格式转化>
*param data_im  输入图像数据
*param channels 输入图像的通道数
*param height   输入图像的高
*param width    输入图像的宽
*param ksize    卷积核的尺寸（高/宽）
*param stride   卷积的步长
*param pad      padding值
*param data_col 转化后的数组（最终的输出数组）
*/
void im2col_cpu(float* data_im,
     int channels,  int height,  int width,
     int ksize,  int stride, int pad, float* data_col) 
{
    int c,h,w;//迭代参数
    //c为输出矩阵的行数
    //h为卷积核块在原输入图像矩阵上移动的行迭代参数
    //w为卷积核块在原输入图像矩阵上移动的列迭代参数

    /**
    *输出数组的尺寸（channels * ksize * ksize，height_col*width_col）
    */
    int height_col = (height + 2*pad - ksize) / stride + 1;//该层神经网络的输出图像的高
    int width_col = (width + 2*pad - ksize) / stride + 1;//该层神经网络的输出图像的宽

    int channels_col = channels * ksize * ksize;//输出矩阵的高

    //举例：channels=3，height=width=5，ksize=3

    for (c = 0; c < channels_col; ++c) {

        int w_offset = c % ksize;//输出数组的第c行对应的原输入图像数组的列偏移，
        //因为输出数组的第一列为输入图像矩阵中第一个要与卷积核对应相乘的矩阵块展开而成（3通道依次拼接）,
        //所以当ksize=3时，列偏移总为0,1,2
        //当c=0时，w_offset=0；当c=5时，w_offset=2；当c=10时，w_offset=1（第2通道）

        int h_offset = (c / ksize) % ksize;//输出数组的第c行对应的原输入图像数组的行偏移,     
        //行偏移总为0，1，2   
        //当c=0时，h_offset=0；当c=5时，h_offset=2；当c=10时，h_offset=1（第2通道）

        int c_im = c / ksize / ksize;//输出数组的第c行对应的原输入图像数组的所在通道数, 
        //每 ksize *ksize个元素为1个通道
        //当c=0时，c_im=0；当c=5时，c_im=0；当c=10时，c_im=1

        for (h = 0; h < height_col; ++h) {

            for (w = 0; w < width_col; ++w) {

                int im_row = h_offset + h * stride;//行移位操作
                //h为卷积核在原输入图像矩阵上移动的行迭代参数，和步长stride有关
                //加上行偏移后即为所需提取的行元素

                int im_col = w_offset + w * stride;//列移位操作
                //w为卷积核在原输入图像矩阵上移动的列迭代参数，和步长stride有关
                //加上列偏移后即为所需提取的列元素

                int col_index = (c * height_col + h) * width_col + w;
                //卷积核移动后，卷积核位置指针（左上角第一个元素）在输出矩阵上对应的位置索引

                data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,
                        im_row, im_col, c_im, pad);
                //将原输入图像矩阵上的元素依次赋值给重排后的输出矩阵
            }
        }
    }
}

具体数值举例

假设输入图像矩阵（data_im）：height=width=5，channels=1;

卷积核大小ksize=3，pad=1，stride=2。

data_im={0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}

补0后（pad=1）：

输出矩阵大小：

高为 channels_col=channels * ksize * ksize=9，

宽为 height_col *width_col=9。

其中height_col=(height + 2*pad - ksize) / stride + 1=3，width_col=(width + 2*pad - ksize) / stride + 1=3.

——————进入循环——————

当c=0时

列偏移w_offset = c % ksize=0%3=0，

行偏移h_offset = (c / ksize) % ksize=（0/3）%3=0

所属通道c_im = c / ksize / ksize=0/3/3=0

h=0

w=0

im_row = h_offset + h * stride=0+0*2=0

int im_col = w_offset + w * stride=0+0*2=0

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=（0*3+0）*3+0=0

data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,0, 0, 1)

row -= pad=-1;col -= pad=-1;return 0;

data_col[0]=0

w=1

im_row = h_offset + h * stride=0+0*2=0

int im_col = w_offset + w * stride=0+1*2=2

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=（0*3+0）*3+1=1

data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,2, 0, 1)

row -= pad=-1;col -= pad=1;return 0;

data_col[1]=0

w=2

im_row = h_offset + h * stride=0+0*2=0

int im_col = w_offset + w * stride=0+2*2=4

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=（0*3+0）*3+2=2

data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,4, 0, 1)

row -= pad=-1;col -= pad=3;return 0;

data_col[2]=0

==>

h=1

w=0

im_row = h_offset + h * stride=0+1*2=2

int im_col = w_offset + w * stride=0+0*2=0

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=（0*3+1）*3+0=3

data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,0, 0, 1)

row -= pad=1;col -= pad=-1;return 0;

data_col[3]=0

w=1

im_row = h_offset + h * stride=0+1*2=2

int im_col = w_offset + w * stride=0+1*2=2

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=（0*3+1）*3+1=4

data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,2, 0, 1)

row -= pad=1;col -= pad=1;return im[col + width*(row + height*channel)]=im[6]=6;

data_col[4]=6

w=2

im_row = h_offset + h * stride=0+1*2=2

int im_col = w_offset + w * stride=0+2*2=4

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=（0*3+1）*3+2=5

data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,4, 0, 1)

row -= pad=1;col -= pad=3;return im[col + width*(row + height*channel)]=im[8]=8;

data_col[5]=8

==>

h=2

w=0

im_row = h_offset + h * stride=0+2*2=4

int im_col = w_offset + w * stride=0+0*2=0

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=（0*3+2）*3+0=6

data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,4,0, 0, 1)

row -= pad=3;col -= pad=-1;return 0;

data_col[6]=0

w=1

im_row = h_offset + h * stride=0+2*2=4

int im_col = w_offset + w * stride=0+1*2=2

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=（0*3+2）*3+1=7

data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,4,2, 0, 1)

row -= pad=3;col -= pad=1;return im[col + width*(row + height*channel)]=im[16]=16;

data_col[7]=16

w=2

im_row = h_offset + h * stride=0+2*2=4

int im_col = w_offset + w * stride=0+2*2=4

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=（0*3+2）*3+2=8

data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,4,4, 0, 1)

row -= pad=3;col -= pad=3;return im[col + width*(row + height*channel)]=im[18]=18;

data_col[8]=18

==>

当c=1时

列偏移w_offset = 1 % ksize=1%3=1，

行偏移h_offset = (c / ksize) % ksize=（1/3）%3=0

所属通道c_im = c / ksize / ksize=1/3/3=0

h=0

w=0

im_row = h_offset + h * stride=0+0*2=0

int im_col = w_offset + w * stride=1+0*2=1

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=（1*3+0）*3+0=9

data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,1, 0, 1)

row -= pad=-1;col -= pad=0;return im[col + width*(row + height*channel)]=im[18];

data_col[9]=0

w=1

im_row = h_offset + h * stride=0+0*2=0

int im_col = w_offset + w * stride=1+1*2=3

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=（1*3+0）*3+1=10

data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,3, 0, 1)

row -= pad=-1;col -= pad=3，return 0;

data_col[10]=0

w=2

im_row = h_offset + h * stride=0+0*2=0

int im_col = w_offset + w * stride=1+2*2=5

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=（1*3+0）*3+2=11

data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,5, 0, 1)

row -= pad=-1;col -= pad=4;return 0;

data_col[11]=0

==>

h=1

w=0

im_row = h_offset + h * stride=0+1*2=2

int im_col = w_offset + w * stride=1+0*2=1

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=（1*3+1）*3+0=12

data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,1, 0, 1)

row -= pad=1;col -= pad=0;return im[col + width*(row + height*channel)]=im[5]=5;

data_col[12]=5

w=1

im_row = h_offset + h * stride=0+1*2=2

int im_col = w_offset + w * stride=1+1*2=3

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=（1*3+1）*3+1=13

data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,3, 0, 1)

row -= pad=1;col -= pad=3，return im[col + width*(row + height*channel)]=im[7]=7;

data_col[13]=7

w=2

im_row = h_offset + h * stride=0+1*2=2

int im_col = w_offset + w * stride=1+2*2=5

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=（1*3+1）*3+2=14

data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,5, 0, 1)

row -= pad=1;col -= pad=4;return im[col + width*(row + height*channel)]=im[9]=9;

data_col[14]=9

==>

以此类推……

最后得到输出矩阵：

ChunyeLi

关注

2
点赞
踩
1

收藏

觉得还不错? 一键收藏
2
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录