通过摆脱pytorch和tensorflow完全搭建神经网络来夯实一下基础,害,双非的卑微努力
本篇搭建了卷积部分和池化的前向传播
首先需要复习卷积的基础概念
输入(feature), 卷积核(filter)中对应点的相乘
上图很清楚的表示了单通道feature的卷积计算,在多通道下,需要注意的是一个卷积核的shape(c, h, w),在每个卷积核的对应通道和feature的对应通道相乘后加起来,具体过程如下图所示
在理解上面的卷积过程后,如果自己尝试卷积,可能需要至少6个for循环,极大拖延效率,所以此次使用im2col来实现卷积的过程。
im2col原理
通过把每次卷积的区域保存成为一维数组,卷积的操作次数就是我们需要保存的所有个数, 构成了二维数组[h_out * w_out * n, f * f * c ],同样卷积核也变幻成了这样的二维数组,这样就可以通过矩阵的乘法一次得出结果,速度比起原始的for循环快很多,这也是caffe的实现方式。
def Im2col(feature, w_shape, shape, stride):
c_in, f, f, c_out = w_shape
n, h_out, w_out, c_out = shape
col_feature = []
for i in range(n):
for ii in range(h_out):
for iii in range(w_out):
col_feature.append(feature[i:i+1, ii * stride: ii * stride + f, iii * stride: iii * stride + f, :])
col_feature = np.array(col_feature).reshape(h_out * w_out * n, -1)
return col_feature
池化部分如下
def Pool_Forward(image, f, stride, mode='max'):
n, h, w, c = image.shape
hh = ww = int(1 + (h - f) / stride)
out = np.ones((n, hh, ww, c))
for i in range(n):
for ii in range(hh):
for iii in range(ww):
for iiii in range(c):
if mode == 'max':
out[i, ii, iii, iiii] = np.max(image[i, ii : ii + f, iii : iii + f, iiii])
elif mode == 'ave':
out[i, ii, iii, iiii] = np.mean(image[i, ii: ii + f, iii: iii + f, iiii])
return out
总体部分如下
import numpy as np
# N H W C
def Padding(image, pad):
return np.pad((0, 0), (pad, pad), (pad, pad), (0, 0), 'constant')
def Pool_Forward(image, f, stride, mode='max'):
n, h, w, c = image.shape
hh = ww = int(1 + (h - f) / stride)
out = np.ones((n, hh, ww, c))
for i in range(n):
for ii in range(hh):
for iii in range(ww):
for iiii in range(c):
if mode == 'max':
out[i, ii, iii, iiii] = np.max(image[i, ii : ii + f, iii : iii + f, iiii])
elif mode == 'ave':
out[i, ii, iii, iiii] = np.mean(image[i, ii: ii + f, iii: iii + f, iiii])
return out
def Im2col(image, w_shape, shape, stride):
f, f, c_in, c_out = w_shape[]
n, h_out, w_out, c_out = shape
out = []
for i in range(h_out):
for ii in range(w_out):
out.append(image[:, i * stride : i * stride + f, ii * stride : ii * stride + f, :])
out = out.reshape(h_out * w_out * n, -1)
return out
def Conv_Forward(feature_in, weight, bias, stride, pad):
# prepare
(n, h_in, w_in, c_in) = feature_in.shape
(c_in, f, f, c_out) = weight.shape
if pad == 'same':
w_out = w_in / stride
w_out = h_out = w_out + (w_out - int(w_out) > 0) # 向上取整
padding = f - stride
elif pad == 'valid':
w_out = (w_in - f + 1) / stride
w_out = h_out = w_out + (w_out - int(w_out) > 0)
padding = 0
shape = (n, h_out, w_out, c_out)
feature_in_pad = Padding(feature_in, padding)
col_feature_in = Im2col(feature_in_pad, weight.shape, shape, stride) # h_out * w_out * n, f * f * c_in
col_weight = weight.reshape(f * f * c_in, -1) # f * f * c_in, c_out
feature_out = np.dot(col_feature_in, col_weight).reshape(n, h_out, w_out, c_out)
cache = [col_feature_in, weight, bias, stride, pad, feature_in.shape]
return feature_out, cache