im2col总结

本文详细解读了Darknet库中的im2col函数,它用于将图像矩阵转换为便于矩阵相乘的列式布局,便于理解卷积过程。通过实例说明,展示了如何通过im2col进行元素提取和数组格式转换,以支持复杂卷积操作。
摘要由CSDN通过智能技术生成

 

目录

目标

darknet中im2col.c详细解析

具体数值举例


目标

为了将稍复杂的卷积相乘转化为“平摊式”的矩阵相乘,先使用im2col完成数组格式的转化,便于之后的矩阵相乘计算。

darknet中im2col.c详细解析

按行存储。

#include "im2col.h"
#include <stdio.h>
/**
*<依据指定行、列、通道数提取输入图像矩阵中对应的元素>
*param *im 输入图像元素的一维数据,每通道按行存储,再将多通道依次拼接而成
*param height 每通道的高度(不加padding)
*param width 每通道的宽度(不加padding)
*param channels 通道数
*param row 提取指定元素的所在行(加padding)
*param col 提取指定元素的所在列(加padding)
*param channel 提取指定元素的所在通道
*param pad padding值
*return float类型,已知图像数据(宽width,高height,通道channels),
*       返回指定row行、col列、通道channel的元素值
*/
float im2col_get_pixel(float *im, int height, int width, int channels,
                        int row, int col, int channel, int pad)
{
    row -= pad;
    col -= pad;
    //因为row和col都是加了padding后的数据,所以需要减去padding

    if (row < 0 || col < 0 ||
        row >= height || col >= width) return 0;
    //当row或col小于0;或者大于原图片的高和宽时,将元素值设为0;即padding的补0操作

    return im[col + width*(row + height*channel)];
}


//From Berkeley Vision's Caffe!
//https://github.com/BVLC/caffe/blob/master/LICENSE
/**
*<图片数据的数组格式转化>
*param data_im  输入图像数据
*param channels 输入图像的通道数
*param height   输入图像的高
*param width    输入图像的宽
*param ksize    卷积核的尺寸(高/宽)
*param stride   卷积的步长
*param pad      padding值
*param data_col 转化后的数组(最终的输出数组)
*/
void im2col_cpu(float* data_im,
     int channels,  int height,  int width,
     int ksize,  int stride, int pad, float* data_col) 
{
    int c,h,w;//迭代参数
    //c为输出矩阵的行数
    //h为卷积核块在原输入图像矩阵上移动的行迭代参数
    //w为卷积核块在原输入图像矩阵上移动的列迭代参数

    /**
    *输出数组的尺寸(channels * ksize * ksize,height_col*width_col)
    */
    int height_col = (height + 2*pad - ksize) / stride + 1;//该层神经网络的输出图像的高
    int width_col = (width + 2*pad - ksize) / stride + 1;//该层神经网络的输出图像的宽

    int channels_col = channels * ksize * ksize;//输出矩阵的高

    //举例:channels=3,height=width=5,ksize=3

    for (c = 0; c < channels_col; ++c) {

        int w_offset = c % ksize;//输出数组的第c行对应的原输入图像数组的列偏移,
        //因为输出数组的第一列为输入图像矩阵中第一个要与卷积核对应相乘的矩阵块展开而成(3通道依次拼接),
        //所以当ksize=3时,列偏移总为0,1,2
        //当c=0时,w_offset=0;当c=5时,w_offset=2;当c=10时,w_offset=1(第2通道)

        int h_offset = (c / ksize) % ksize;//输出数组的第c行对应的原输入图像数组的行偏移,     
        //行偏移总为0,1,2   
        //当c=0时,h_offset=0;当c=5时,h_offset=2;当c=10时,h_offset=1(第2通道)

        int c_im = c / ksize / ksize;//输出数组的第c行对应的原输入图像数组的所在通道数, 
        //每 ksize *ksize个元素为1个通道
        //当c=0时,c_im=0;当c=5时,c_im=0;当c=10时,c_im=1

        for (h = 0; h < height_col; ++h) {

            for (w = 0; w < width_col; ++w) {

                int im_row = h_offset + h * stride;//行移位操作
                //h为卷积核在原输入图像矩阵上移动的行迭代参数,和步长stride有关
                //加上行偏移后即为所需提取的行元素

                int im_col = w_offset + w * stride;//列移位操作
                //w为卷积核在原输入图像矩阵上移动的列迭代参数,和步长stride有关
                //加上列偏移后即为所需提取的列元素

                int col_index = (c * height_col + h) * width_col + w;
                //卷积核移动后,卷积核位置指针(左上角第一个元素)在输出矩阵上对应的位置索引

                data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,
                        im_row, im_col, c_im, pad);
                //将原输入图像矩阵上的元素依次赋值给重排后的输出矩阵
            }
        }
    }
}

 

具体数值举例

假设输入图像矩阵(data_im):height=width=5,channels=1;

卷积核大小ksize=3,pad=1,stride=2。

data_im={0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}

补0后(pad=1):

输出矩阵大小:

高为 channels_col=channels * ksize * ksize=9,

宽为 height_col *width_col=9。

其中height_col=(height + 2*pad - ksize) / stride + 1=3,width_col=(width + 2*pad - ksize) / stride + 1=3.


——————进入循环——————




 

当c=0时

列偏移w_offset = c % ksize=0%3=0,

行偏移h_offset = (c / ksize) % ksize=(0/3)%3=0

所属通道c_im = c / ksize / ksize=0/3/3=0

h=0

w=0

im_row = h_offset + h * stride=0+0*2=0

int im_col = w_offset + w * stride=0+0*2=0

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+0)*3+0=0

 data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,0, 0, 1)

 row -= pad=-1;col -= pad=-1;return 0;

data_col[0]=0


w=1

im_row = h_offset + h * stride=0+0*2=0

int im_col = w_offset + w * stride=0+1*2=2

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+0)*3+1=1

 data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,2, 0, 1)

 row -= pad=-1;col -= pad=1;return 0;

data_col[1]=0


w=2

im_row = h_offset + h * stride=0+0*2=0

int im_col = w_offset + w * stride=0+2*2=4

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+0)*3+2=2

 data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,4, 0, 1)

 row -= pad=-1;col -= pad=3;return 0;

data_col[2]=0

    ==>

 



h=1

w=0

im_row = h_offset + h * stride=0+1*2=2

int im_col = w_offset + w * stride=0+0*2=0

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+1)*3+0=3

 data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,0, 0, 1)

 row -= pad=1;col -= pad=-1;return 0;

data_col[3]=0


w=1

im_row = h_offset + h * stride=0+1*2=2

int im_col = w_offset + w * stride=0+1*2=2

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+1)*3+1=4

 data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,2, 0, 1)

 row -= pad=1;col -= pad=1;return  im[col + width*(row + height*channel)]=im[6]=6;

data_col[4]=6


w=2

im_row = h_offset + h * stride=0+1*2=2

int im_col = w_offset + w * stride=0+2*2=4

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+1)*3+2=5

 data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,4, 0, 1)

 row -= pad=1;col -= pad=3;return  im[col + width*(row + height*channel)]=im[8]=8;

data_col[5]=8

==>



h=2

w=0

im_row = h_offset + h * stride=0+2*2=4

int im_col = w_offset + w * stride=0+0*2=0

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+2)*3+0=6

 data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,4,0, 0, 1)

 row -= pad=3;col -= pad=-1;return 0;

data_col[6]=0


w=1

im_row = h_offset + h * stride=0+2*2=4

int im_col = w_offset + w * stride=0+1*2=2

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+2)*3+1=7

 data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,4,2, 0, 1)

 row -= pad=3;col -= pad=1;return im[col + width*(row + height*channel)]=im[16]=16;

data_col[7]=16


w=2

im_row = h_offset + h * stride=0+2*2=4

int im_col = w_offset + w * stride=0+2*2=4

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+2)*3+2=8

 data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,4,4, 0, 1)

 row -= pad=3;col -= pad=3;return im[col + width*(row + height*channel)]=im[18]=18;

data_col[8]=18

==>




当c=1时

列偏移w_offset = 1 % ksize=1%3=1,

行偏移h_offset = (c / ksize) % ksize=(1/3)%3=0

所属通道c_im = c / ksize / ksize=1/3/3=0

h=0

w=0

im_row = h_offset + h * stride=0+0*2=0

int im_col = w_offset + w * stride=1+0*2=1

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(1*3+0)*3+0=9

 data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,1, 0, 1)

 row -= pad=-1;col -= pad=0;return im[col + width*(row + height*channel)]=im[18];

data_col[9]=0


w=1

im_row = h_offset + h * stride=0+0*2=0

int im_col = w_offset + w * stride=1+1*2=3

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(1*3+0)*3+1=10

 data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,3, 0, 1)

 row -= pad=-1;col -= pad=3,return 0;

data_col[10]=0


w=2

im_row = h_offset + h * stride=0+0*2=0

int im_col = w_offset + w * stride=1+2*2=5

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(1*3+0)*3+2=11

 data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,5, 0, 1)

 row -= pad=-1;col -= pad=4;return 0;

data_col[11]=0

==>



h=1

w=0

im_row = h_offset + h * stride=0+1*2=2

int im_col = w_offset + w * stride=1+0*2=1

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(1*3+1)*3+0=12

 data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,1, 0, 1)

 row -= pad=1;col -= pad=0;return im[col + width*(row + height*channel)]=im[5]=5;

data_col[12]=5


w=1

im_row = h_offset + h * stride=0+1*2=2

int im_col = w_offset + w * stride=1+1*2=3

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(1*3+1)*3+1=13

 data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,3, 0, 1)

 row -= pad=1;col -= pad=3,return im[col + width*(row + height*channel)]=im[7]=7;

data_col[13]=7


w=2

im_row = h_offset + h * stride=0+1*2=2

int im_col = w_offset + w * stride=1+2*2=5

输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(1*3+1)*3+2=14

 data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)

==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,5, 0, 1)

 row -= pad=1;col -= pad=4;return im[col + width*(row + height*channel)]=im[9]=9;

data_col[14]=9

==>

以此类推……

最后得到输出矩阵:

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值