目录
目标
为了将稍复杂的卷积相乘转化为“平摊式”的矩阵相乘,先使用im2col完成数组格式的转化,便于之后的矩阵相乘计算。
darknet中im2col.c详细解析
按行存储。
#include "im2col.h"
#include <stdio.h>
/**
*<依据指定行、列、通道数提取输入图像矩阵中对应的元素>
*param *im 输入图像元素的一维数据,每通道按行存储,再将多通道依次拼接而成
*param height 每通道的高度(不加padding)
*param width 每通道的宽度(不加padding)
*param channels 通道数
*param row 提取指定元素的所在行(加padding)
*param col 提取指定元素的所在列(加padding)
*param channel 提取指定元素的所在通道
*param pad padding值
*return float类型,已知图像数据(宽width,高height,通道channels),
* 返回指定row行、col列、通道channel的元素值
*/
float im2col_get_pixel(float *im, int height, int width, int channels,
int row, int col, int channel, int pad)
{
row -= pad;
col -= pad;
//因为row和col都是加了padding后的数据,所以需要减去padding
if (row < 0 || col < 0 ||
row >= height || col >= width) return 0;
//当row或col小于0;或者大于原图片的高和宽时,将元素值设为0;即padding的补0操作
return im[col + width*(row + height*channel)];
}
//From Berkeley Vision's Caffe!
//https://github.com/BVLC/caffe/blob/master/LICENSE
/**
*<图片数据的数组格式转化>
*param data_im 输入图像数据
*param channels 输入图像的通道数
*param height 输入图像的高
*param width 输入图像的宽
*param ksize 卷积核的尺寸(高/宽)
*param stride 卷积的步长
*param pad padding值
*param data_col 转化后的数组(最终的输出数组)
*/
void im2col_cpu(float* data_im,
int channels, int height, int width,
int ksize, int stride, int pad, float* data_col)
{
int c,h,w;//迭代参数
//c为输出矩阵的行数
//h为卷积核块在原输入图像矩阵上移动的行迭代参数
//w为卷积核块在原输入图像矩阵上移动的列迭代参数
/**
*输出数组的尺寸(channels * ksize * ksize,height_col*width_col)
*/
int height_col = (height + 2*pad - ksize) / stride + 1;//该层神经网络的输出图像的高
int width_col = (width + 2*pad - ksize) / stride + 1;//该层神经网络的输出图像的宽
int channels_col = channels * ksize * ksize;//输出矩阵的高
//举例:channels=3,height=width=5,ksize=3
for (c = 0; c < channels_col; ++c) {
int w_offset = c % ksize;//输出数组的第c行对应的原输入图像数组的列偏移,
//因为输出数组的第一列为输入图像矩阵中第一个要与卷积核对应相乘的矩阵块展开而成(3通道依次拼接),
//所以当ksize=3时,列偏移总为0,1,2
//当c=0时,w_offset=0;当c=5时,w_offset=2;当c=10时,w_offset=1(第2通道)
int h_offset = (c / ksize) % ksize;//输出数组的第c行对应的原输入图像数组的行偏移,
//行偏移总为0,1,2
//当c=0时,h_offset=0;当c=5时,h_offset=2;当c=10时,h_offset=1(第2通道)
int c_im = c / ksize / ksize;//输出数组的第c行对应的原输入图像数组的所在通道数,
//每 ksize *ksize个元素为1个通道
//当c=0时,c_im=0;当c=5时,c_im=0;当c=10时,c_im=1
for (h = 0; h < height_col; ++h) {
for (w = 0; w < width_col; ++w) {
int im_row = h_offset + h * stride;//行移位操作
//h为卷积核在原输入图像矩阵上移动的行迭代参数,和步长stride有关
//加上行偏移后即为所需提取的行元素
int im_col = w_offset + w * stride;//列移位操作
//w为卷积核在原输入图像矩阵上移动的列迭代参数,和步长stride有关
//加上列偏移后即为所需提取的列元素
int col_index = (c * height_col + h) * width_col + w;
//卷积核移动后,卷积核位置指针(左上角第一个元素)在输出矩阵上对应的位置索引
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,
im_row, im_col, c_im, pad);
//将原输入图像矩阵上的元素依次赋值给重排后的输出矩阵
}
}
}
}
具体数值举例
假设输入图像矩阵(data_im):height=width=5,channels=1;
卷积核大小ksize=3,pad=1,stride=2。
data_im={0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}
补0后(pad=1):
输出矩阵大小:
高为 channels_col=channels * ksize * ksize=9,
宽为 height_col *width_col=9。
其中height_col=(height + 2*pad - ksize) / stride + 1=3,width_col=(width + 2*pad - ksize) / stride + 1=3.
——————进入循环——————
当c=0时
列偏移w_offset = c % ksize=0%3=0,
行偏移h_offset = (c / ksize) % ksize=(0/3)%3=0
所属通道c_im = c / ksize / ksize=0/3/3=0
h=0
w=0
im_row = h_offset + h * stride=0+0*2=0
int im_col = w_offset + w * stride=0+0*2=0
输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+0)*3+0=0
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)
==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,0, 0, 1)
row -= pad=-1;col -= pad=-1;return 0;
data_col[0]=0
w=1
im_row = h_offset + h * stride=0+0*2=0
int im_col = w_offset + w * stride=0+1*2=2
输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+0)*3+1=1
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)
==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,2, 0, 1)
row -= pad=-1;col -= pad=1;return 0;
data_col[1]=0
w=2
im_row = h_offset + h * stride=0+0*2=0
int im_col = w_offset + w * stride=0+2*2=4
输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+0)*3+2=2
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)
==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,4, 0, 1)
row -= pad=-1;col -= pad=3;return 0;
data_col[2]=0
==>
h=1
w=0
im_row = h_offset + h * stride=0+1*2=2
int im_col = w_offset + w * stride=0+0*2=0
输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+1)*3+0=3
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)
==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,0, 0, 1)
row -= pad=1;col -= pad=-1;return 0;
data_col[3]=0
w=1
im_row = h_offset + h * stride=0+1*2=2
int im_col = w_offset + w * stride=0+1*2=2
输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+1)*3+1=4
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)
==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,2, 0, 1)
row -= pad=1;col -= pad=1;return im[col + width*(row + height*channel)]=im[6]=6;
data_col[4]=6
w=2
im_row = h_offset + h * stride=0+1*2=2
int im_col = w_offset + w * stride=0+2*2=4
输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+1)*3+2=5
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)
==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,4, 0, 1)
row -= pad=1;col -= pad=3;return im[col + width*(row + height*channel)]=im[8]=8;
data_col[5]=8
==>
h=2
w=0
im_row = h_offset + h * stride=0+2*2=4
int im_col = w_offset + w * stride=0+0*2=0
输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+2)*3+0=6
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)
==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,4,0, 0, 1)
row -= pad=3;col -= pad=-1;return 0;
data_col[6]=0
w=1
im_row = h_offset + h * stride=0+2*2=4
int im_col = w_offset + w * stride=0+1*2=2
输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+2)*3+1=7
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)
==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,4,2, 0, 1)
row -= pad=3;col -= pad=1;return im[col + width*(row + height*channel)]=im[16]=16;
data_col[7]=16
w=2
im_row = h_offset + h * stride=0+2*2=4
int im_col = w_offset + w * stride=0+2*2=4
输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(0*3+2)*3+2=8
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)
==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,4,4, 0, 1)
row -= pad=3;col -= pad=3;return im[col + width*(row + height*channel)]=im[18]=18;
data_col[8]=18
==>
当c=1时
列偏移w_offset = 1 % ksize=1%3=1,
行偏移h_offset = (c / ksize) % ksize=(1/3)%3=0
所属通道c_im = c / ksize / ksize=1/3/3=0
h=0
w=0
im_row = h_offset + h * stride=0+0*2=0
int im_col = w_offset + w * stride=1+0*2=1
输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(1*3+0)*3+0=9
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)
==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,1, 0, 1)
row -= pad=-1;col -= pad=0;return im[col + width*(row + height*channel)]=im[18];
data_col[9]=0
w=1
im_row = h_offset + h * stride=0+0*2=0
int im_col = w_offset + w * stride=1+1*2=3
输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(1*3+0)*3+1=10
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)
==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,3, 0, 1)
row -= pad=-1;col -= pad=3,return 0;
data_col[10]=0
w=2
im_row = h_offset + h * stride=0+0*2=0
int im_col = w_offset + w * stride=1+2*2=5
输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(1*3+0)*3+2=11
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)
==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,0,5, 0, 1)
row -= pad=-1;col -= pad=4;return 0;
data_col[11]=0
==>
h=1
w=0
im_row = h_offset + h * stride=0+1*2=2
int im_col = w_offset + w * stride=1+0*2=1
输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(1*3+1)*3+0=12
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)
==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,1, 0, 1)
row -= pad=1;col -= pad=0;return im[col + width*(row + height*channel)]=im[5]=5;
data_col[12]=5
w=1
im_row = h_offset + h * stride=0+1*2=2
int im_col = w_offset + w * stride=1+1*2=3
输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(1*3+1)*3+1=13
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)
==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,3, 0, 1)
row -= pad=1;col -= pad=3,return im[col + width*(row + height*channel)]=im[7]=7;
data_col[13]=7
w=2
im_row = h_offset + h * stride=0+1*2=2
int im_col = w_offset + w * stride=1+2*2=5
输出矩阵的索引col_index = (c * height_col + h) * width_col + w=(1*3+1)*3+2=14
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,im_row, im_col, c_im, pad)
==>data_col[0] = im2col_get_pixel(data_im, 5, 5, 1,2,5, 0, 1)
row -= pad=1;col -= pad=4;return im[col + width*(row + height*channel)]=im[9]=9;
data_col[14]=9
==>
以此类推……
最后得到输出矩阵: