文章目录
Convolution
Naive forward pass
实现卷积操作,按照卷积的流程,写出最能理解的代码。
N, C, H, W = x.shape
F, _, HH, WW = w.shape
stride, pad = conv_param['stride'], conv_param['pad']
# 整除
H_out = 1 + (H + 2 * pad - HH) // stride
W_out = 1 + (W + 2 * pad - WW) // stride
# 预先分配
out = np.zeros((N, F, H_out, W_out))
# 只在长、宽上做变换,故只写第三第四维度
# 以常量进行填充,以constant_values=0填充
x_pad = np.pad(x, ((0, 0), (0, 0), (pad, pad), (pad, pad)), mode='constant', constant_values=0)
for i in range(N): # 对于第i幅图片
for f in range(F): # 对于第f个滤波器
for j in range(H_out):
for k in range(W_out):
out[i, f, j, k] = np.sum(x_pad[i, :, j * stride: HH + j * stride, k * stride: WW + k * stride] * w[f]) + b[f]
Naive backward pass
x, w, b, conv_param = cache
N, C, H, W = x.shape
F, _, HH, WW = w.shape
stride, pad = conv_param['stride'], conv_param['pad']
H_out = 1 + (H + 2 * pad - HH) / stride
W_out = 1 + (W + 2 * pad - WW) / stride
# padding
x_pad = np.pad(x, ((0, 0), (0, 0), (pad, pad), (pad, pad)), mode='constant', constant_values=0)
# 预分配
dx = np.zeros_like(x)
dx_pad = np.zeros_like(x_pad)
dw = np.zeros_like(w)
db = np.zeros_like(b)
for i in range(N): # 第i个图片
for f in range(F): # 第f个滤波器
for j in range(H_out):
for k in range(W_out):
window = x_pad[i, :, j * stride: HH + j * stride, k * stride: WW + k * stride]
db[f] += dout[i, f, j, k]
dw[f] += window * dout[i, f, j, k]
dx_pad[i, :, j * stride:HH + j * stride, k * stride: WW + k * stride] += w[f] * dout[i, f, j, k]
dx = dx_pad[:, :, pad: -pad, pad: -pad]
Max-Pooling
Naive forward
N, C, H, W = x.shape
pool_height, pool_width, stride = pool_param['pool_height'], pool_param['pool_width'], pool_param['stride']
H_out = 1 + (H - pool_height) // stride
W_out = 1 + (W - pool_width) // stride
out = np.zeros((N, C, H_out, W_out))
for j in range(H_out):
for k in range(W_out):
out[:, :, j, k] = np.max(x[:, :, j * stride:pool_height + j * stride, k * stride: pool_width + k * stride],
axis=(2, 3))
pass
Naive backward
x, pool_param = cache
N, C, H, W = x.shape
pool_height, pool_width, stride = pool_param['pool_height'], pool_param['pool_width'], pool_param['stride']
H_out = 1 + (H - pool_height) // stride
W_out = 1 + (W - pool_width) // stride
dx = np.zeros_like(x)
for i in range(H_out):
for j in range(W_out):
x_masked = x[:, :, i * stride:i * stride + pool_width, j * stride:j * stride + pool_height]
# print(x_masked.shape)
max_x_masked = np.max(x_masked, axis=(2, 3))[:,:,None,None]
# print(max_x_masked.shape)
dx[:, :, i * stride:i * stride + pool_height, j * stride:j * stride + pool_width] += (x_masked == max_x_masked) * dout[:,:,i,j][:,:,None,None]
pass
Three-layer ConvNet
cnn.py
直接使用已经封装好的方法实现三层网络
首先计算scores
out_conv, cache_conv = conv_relu_pool_forward(X, W1, b1, conv_param, pool_param)
out_fc1, cache_fc1 = affine_relu_forward(out_conv, W2, b2)
scores, cache_fc2 = affine_forward(out_fc1, W3, b3)
接着计算loss和梯度,注意需要添加正则项
loss, dout = softmax_loss(scores,y)
loss += 0.5 * self.reg * (np.sum(W1 ** 2) + np.sum(W2 ** 2) + np.sum(W3 ** 2))
dx3, dW3, db3 = affine_backward(dout,cache_fc2)
dx2, dW2, db2 = affine_relu_backward(dx3, cache_fc1)
dx1, dw1, db1 = conv_relu_pool_backward(dx2, cache_conv)
grads['W3'] = dW3 + self.reg * W3
grads['b3'] = db3
grads['W2'] = dW2 + self.reg * W2
grads['b2'] = db2
grads['W1'] = dw1 + self.reg * W1
grads['b1'] = db1
Spatial Batch Normalization
Batchnorm对于全连接网络来讲有着巨大的作用,可以帮助网络实现正则化,防止过拟合。对于卷积神经网络需要对之前的Batchnorm做些小修改,主要表现在数据维度层面。之前的batchnorn是两维的,现在的卷积神经网络中数据成了4维,故需要用transpose和reshape函数对数据进行变换。
前向传播
layer.py中的spatial_batchnorm_forward()
N, C, H, W = x.shape
x_new = x.transpose(0, 2, 3, 1).reshape(N * H * W, C)
out, cache = batchnorm_forward(x_new, gamma, beta, bn_param)
out = out.reshape(N, H, W, C).transpose(0, 3, 1, 2)
layers.py中的spatial_batchnorm_backward()
N, C, H, W = dout.shape
dout_new = dout.transpose(0, 2, 3, 1).reshape(N * H * W, C)
dx, dgamma, dbeta = batchnorm_backward_alt(dout_new, cache)
dx = dx.reshape(N, H, W, C).transpose(0, 3, 1, 2)
Group Normalization
Group Normalization是2018年发表至ECCV的一篇论文提出来的。主要思想是折衷Layer Normalization和Instance Normalization的一种方法,即克服了batch的约束,又保证了收敛速度。
其主要思想是将特征通道进行分组,然后以[N,G,C//G,H,W]的维度进行归一化。
forward
N, C, H, W = x.shape
# 将特征通道数分组,按照分组重新设置形状
x_group = x.reshape((N, G, C // G, H, W))
mean = np.mean(x_group, axis=(2, 3, 4), keepdims=True)
var = np.var(x_group, axis=(2, 3, 4), keepdims=True)
x_norm = (x_group - mean) / np.sqrt(var + eps) # 归一化
x_norm = x_norm.reshape((N, C, H, W)) # 还原维度
out = x_norm * gamma + beta
cache = (x, gamma, beta, G, eps, mean, var, x_norm)
backward
反向传播代码参考他人博客,博客地址见[1]
N, C, H, W = dout.shape
x, gamma, beta, G, eps, mean, var, x_norm = cache
dbeta = np.sum(dout, axis=(0, 2, 3), keepdims=True)
dgamma = np.sum(dout * x_norm, axis=(0, 2, 3), keepdims=True)
dx_norm = dout * gamma
dx_groupnorm = dx_norm.reshape((N, G, C // G, H, W))
x_group = x.reshape((N, G, C // G, H, W))
dvar = np.sum(dx_groupnorm * -1.0 / 2 * (x_group - mean) / (var + eps) ** (3.0 / 2), axis=(2, 3, 4), keepdims=True)
N_GROUP = C // G * H * W
dmean1 = np.sum(dx_groupnorm * -1.0 / np.sqrt(var + eps), axis=(2, 3, 4), keepdims=True)
dmean2_var = dvar * -2.0 / N_GROUP * np.sum(x_group - mean, axis=(2, 3, 4), keepdims=True)
dmean = dmean1 + dmean2_var
dx_group1 = dx_groupnorm * 1.0 / np.sqrt(var + eps)
dx_group2_mean = dmean * 1.0 / N_GROUP
dx_group3_var = dvar * 2.0 / N_GROUP * (x_group - mean)
dx_group = dx_group1 + dx_group2_mean + dx_group3_var
dx = dx_group.reshape((N, C, H, W))
参考文章
[1] https://blog.csdn.net/weixin_42880443/article/details/81589745
[2] https://blog.csdn.net/u013832707/article/details/83059540