1.卷积层(Convolutional)
1) 填充(padding)
在进行卷积层的处理之前,有时要向输入数据的周围填入固定的数据(比如0),这称为填充。
使用填充主要是为了调整输出的大小。
2)步幅
应用滤波器(filter)的位置间隔称为步幅(stride)。
输出大小的计算:
假设输入大小为(H,W),滤波器大小为(FH,FW),输出大小为(OH,OW),填充为P,步幅为S(横纵轴都为S),则:
OH = (H+2P-FH)/S +1
OW = (W+2P-FW)/S +1
tensorflow中:
padding = valid
OH = (H-FH)/S +1
OW = (W-FW)/S +1
padding = same
OH = H/S
OW = W/S
具体是怎么加padding的呢?
pad_H = max((OH-1)*S + FH - H ,0)
pad_W = max((OW-1)*S + FW - W ,0)
pad_top = pad_H/2
pad_bottom = pad_H - pad_top
pad_left = pad_W/2
pad_right = pad_W - pad_left
3)三维卷积运算
滤波器的通道数只能设定为和输入数据的通道数相同的值。
(C,H,W) * (C,FH,FW) ----> (1,OH,OW)
输入数据 卷积运算 过滤器 输出数据
N个卷积核(过滤器)
(C,H,W) * (FN,C,FH,FW) ----> (FN,OH,OW) + (FN,1,1) ----> (FN,OH,OW)
输入数据 卷积运算 过滤器 偏置 输出数据
批处理
(N,C,H,W) * (FN,C,FH,FW) ----> (N,FN,OH,OW) + (FN,1,1) ----> (N,FN,OH,OW)
输入数据 卷积运算 过滤器 偏置 输出数据
实现:
class Convolution:
def __init__(self, W, b, stride=1, pad=0):
self.W = W
self.b = b
self.stride = stride
self.pad = pad
self.x = None
self.col = None
self.col_W = None
self.dW = None
self.db = None
def forward(self, x):
FN, C, FH, FW = self.W.shape
N, C, H, W = x.shape
out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
out_w = 1 + int((W + 2*self.pad - FW) / self.stride)
col = im2col(x, FH, FW, self.stride, self.pad)
col_W = self.W.reshape(FN, -1).T
out = np.dot(col, col_W) + self.b
out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
self.x = x
self.col = col
self.col_W = col_W
return out
def backward(self, dout):
FN, C, FH, FW = self.W.shape
dout = dout.transpose(0,2,3,1).reshape(-1, FN)
self.db = np.sum(dout, axis=0)
self.dW = np.dot(self.col.T, dout)
self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)
dcol = np.dot(dout, self.col_W.T)
dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)
return dx
2.池化层(Pooling)
特征:
1) 没有要学习的参数
2) 通道数不发生变化
3) 对微小的位置变化具有鲁棒性
实现:
class Pooling:
def __init__(self, pool_h, pool_w, stride=1, pad=0):
self.pool_h = pool_h
self.pool_w = pool_w
self.stride = stride
self.pad = pad
self.x = None
self.arg_max = None
def forward(self, x):
N, C, H, W = x.shape
out_h = int(1 + (H - self.pool_h) / self.stride)
out_w = int(1 + (W - self.pool_w) / self.stride)
col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
col = col.reshape(-1, self.pool_h*self.pool_w)
arg_max = np.argmax(col, axis=1)
out = np.max(col, axis=1)
out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
self.x = x
self.arg_max = arg_max
return out
def backward(self, dout):
dout = dout.transpose(0, 2, 3, 1)
pool_size = self.pool_h * self.pool_w
dmax = np.zeros((dout.size, pool_size))
dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
dmax = dmax.reshape(dout.shape + (pool_size,))
dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
return dx
3.具有代表性的CNN
Lenet,AlexNet