import numpy as np
import h5py
import matplotlib.pyplot as plt
np.random.seed(1)
def zero_pad(X, pad):
"""
给样本集X的所有样本进行零填补
:param X: (m,n_H,n_W,n_C),样本数量、图片的高、宽、深度
:param pad: padding个数
:return: X_pad: 填补后的结果(m, n-H+2*pad,n_W+2*pad,n_C)
"""
X_pad = np.pad(X, ((0, 0), (pad, pad), (pad, pad), (0, 0)), 'constant', constant_values=0)
return X_pad
def conv_single_step(a_slice_prev, W, b):
"""
单步卷积(这个函数只执行一步卷积)
:param a_slice_prev:输入矩阵的一小块数据,这里输入矩阵也就是上一层的输出矩阵,维度(f,f,n_C_prev)
:param W: 权重参数,指过滤器,维度是(f,f,n_C_prev), 因为是对a_slice_prev的卷积,故维度也是一样的
:param b: 每个过滤器都有1个b,维度是(1,1,1)
:return: Z- 卷积一步后得到的一个数值,作为输出矩阵中的一个元素
"""
s = np.multiply(a_slice_prev, W) + b
Z = np.sum(s)
return Z
def conv_forward(A_prev, W, b, hparameters):
"""
卷积网络前向传播
:param A_prev: 本层输入矩阵,也是上一层的输出矩阵 (m, n_H_prev, n_W_prev, n_C_prev)
:param W: 权重,即过滤器参数 (f, f, n_C_prev, n_C),n_C代表过滤器个数
:param b: 阈值 (1,1,1,n_C),一个过滤器一个阈值
:param hparameters: 超参数步长s和padding数p
:return: Z 卷积结果 (m,n_H,n_W,n_C)
cache 缓存,用于反向传播使用
"""
(m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
(f, f, n_C_prev, n_C) = W.shape
stride = hparameters['stride']
pad = hparameters['pad']
n_H = int((n_H_prev + 2 * pad - f) / stride) + 1
n_W = int((n_W_prev + 2 * pad - f) / stride) + 1
Z = np.zeros((m, n_H, n_W, n_C))
A_prev_pad = zero_pad(A_prev, pad)
for i in range(m):
a_prev_pad = A_prev_pad[i]
for h in range(n_H):
for w in range(n_W):
for c in range(n_C):
vert_start = h * stride
vert_end = vert_start + f
horiz_start = w * stride
horiz_end = horiz_start + f
a_slice_prev = a_prev_pad[vert_start:vert_end, horiz_start: horiz_end,
:]
Z[i, h, w, c] = conv_single_step(a_slice_prev, W[..., c], b[..., c])
assert (Z.shape == (m, n_H, n_W, n_C))
cache = (A_prev, W, b, hparameters)
return Z, cache
def pool_forward(A_prev, hparameters, mode="max"):
"""池化
参数
:param A_prev: 输入矩阵,也是上一层的输出矩阵 (m,n_H_prev,n_W_prev,n_C_prev)
:param hparameters: 窗口大小f和步长s
:param mode: max、average
:return: A-池化层的输出矩阵,维度是(m,n_H,n_W,n_C)
cache-缓存一些数据
"""
(m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
f = hparameters["f"]
stride = hparameters["stride"]
n_H = int((n_H_prev - f) / stride) + 1
n_W = int((n_W_prev - f) / stride) + 1
n_C = n_C_prev
A = np.zeros((m, n_H, n_W, n_C))
for i in range(m):
a_prev = A_prev[i]
for h in range(n_H):
for w in range(n_W):
for c in range(n_C):
vert_start = h * stride
vert_end = vert_start + f
horiz_start = w * stride
horiz_end = horiz_start + f
a_prev_slice = a_prev[vert_start:vert_end, horiz_start: horiz_end, c]
if mode == "max":
A[i, h, w, c] = np.max(a_prev_slice)
elif mode == "average":
A[i, h, w, c] = np.mean(a_prev_slice)
else:
assert (1 == 1), "Not support mode"
cache = (A_prev, hparameters)
assert (A.shape == (m, n_H, n_W, n_C))
return A, cache
def conv_backward(dZ, cache):
"""
:param dZ: 后一层相关的dZ,维度是 (m, n_H,n_W,n_C)
:param cache: 缓存数据
:return:
dA_prev -- 本卷积层输入矩阵的dA,维度(m, n_H_prev, n_W_prev, n_C_prev)
dW -- 本卷积层的dW, 维度是(f, f, n_C_prev, n_C)
db -- 本卷积层的db,维度(1, 1, 1, n_C)
"""
(A_prev, W, b, hparameters) = cache
(m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
(f, f, n_C_prev, n_C) = W.shape
stride = hparameters["stride"]
pad = hparameters["pad"]
(m, n_H, n_W, n_C) = dZ.shape
dA_prev = np.zeros((m, n_H_prev, n_W_prev, n_C_prev))
dW = np.zeros((f, f, n_C_prev, n_C))
db = np.zeros((1, 1, 1, n_C))
A_prev_pad = zero_pad(A_prev, pad)
dA_prev_pad = zero_pad(dA_prev, pad)
for i in range(m):
a_prev_pad = A_prev_pad[i]
da_prev_pad = dA_prev_pad[i]
for h in range(n_H):
for w in range(n_W):
for c in range(n_C):
vert_start = h
vert_end = vert_start + f
horiz_start = w
horiz_end = horiz_start + f
a_slice = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]
da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += W[:, :, :, c] * dZ[i, h, w, c]
dW[:, :, :, c] += a_slice * dZ[i, h, w, c]
db[:, :, :, c] += dZ[i, h, w, c]
dA_prev[i, :, :, :] = da_prev_pad[pad:-pad, pad:-pad, :]
assert (dA_prev.shape == (m, n_H_prev, n_W_prev, n_C_prev))
return dA_prev, dW, db
def create_mask_from_window(x):
mask = x == np.max(x)
return mask
def distribute_value(dz, shape):
"""
参数
:param dz: 一个 数值
:param shape: 输出矩阵的维度
:return: a, a的维度就是shape,里面的值是dz平分而来的
"""
(n_H, n_W) = shape
average = dz / (n_H * n_W)
a = np.ones(shape) * average
return a
def pool_backward(dA, cache, mode="max"):
"""
参数
:param dA: 本池化层的输出矩阵对应的偏导数
:param cache: 前向传播缓存的值
:param mode:最大池化还是平均池化, max 或 average
:return: dA_prev 本池化的输入矩阵对应的偏导数
"""
(A_prev, hparameters) = cache
stride = hparameters["stride"]
f = hparameters["f"]
m, n_H_prev, n_W_prev, n_C_prev = A_prev.shape
m, n_H, n_W, n_C = dA.shape
dA_prev = np.zeros(A_prev.shape)
for i in range(m):
a_prev = A_prev[i]
for h in range(n_H):
for w in range(n_W):
for c in range(n_C):
vert_start = h
vert_end = vert_start + f
horiz_start = w
horiz_end = horiz_start + f
if mode == "max":
a_prev_slice = a_prev[vert_start:vert_end, horiz_start:horiz_end, c]
mask = create_mask_from_window(a_prev_slice)
dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += np.multiply(mask, dA[i, h, w, c])
elif mode == "average":
da = dA[i, h, w, c]
shape = (f, f)
dA_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += distribute_value(da, shape)
assert (dA_prev.shape == A_prev.shape)
return dA_prev
def test1():
np.random.seed(1)
x = np.random.randn(4, 3, 3, 2)
x_pad = zero_pad(x, 2)
print("x.shape:", x.shape)
print("x_pad.shape:", x_pad.shape)
print("x[1,1]=", x[1, 1])
print("x_pad[1,1]=", x_pad[1, 1])
fig, axarr = plt.subplots(1, 2)
axarr[0].set_title('x')
print(x)
print("-=-----")
print(x[0, :, :, 0])
axarr[0].imshow(x[0, :, :, 0])
axarr[1].set_title('x_pad')
print("xxxx", x_pad)
axarr[1].imshow(x_pad[0, :, :, 0])
plt.show()
def test2():
a_slice_prev = np.random.randn(4, 4, 3)
W = np.random.randn(4, 4, 3)
b = np.random.randn(1, 1, 1)
Z = conv_single_step(a_slice_prev, W, b)
print("Z=", Z)
def test3():
np.random.seed(1)
A_prev = np.random.randn(10, 4, 4, 3)
W = np.random.randn(2, 2, 3, 8)
b = np.random.randn(1, 1, 1, 8)
hparameters = {"pad": 2, "stride": 1}
Z, cache_conv = conv_forward(A_prev, W, b, hparameters)
print("Z's mean =", np.mean(Z))
print("cache_conv[0][1][2][3]=", cache_conv[0][1][2][3])
return Z, cache_conv
def test4():
np.random.seed(1)
A_prev = np.random.randn(2, 4, 4, 3)
hparameters = {"stride": 1, "f": 4}
A, cache = pool_forward(A_prev, hparameters)
print("mode=max")
print("A=", A)
print()
A, cache = pool_forward(A_prev, hparameters, mode="average")
print("mode=average")
print("A=", A)
def test5():
np.random.seed(1)
Z, cache_conv = test3()
dA, dW, db = conv_backward(Z, cache_conv)
print("dA_mean =", np.mean(dA))
print("dW_mean =", np.mean(dW))
print("db_mean =", np.mean(db))
def test6():
np.random.seed(1)
x = np.random.randn(2, 3)
mask = create_mask_from_window(x)
print("x=", x)
print("mask=", mask)
def test7():
a = distribute_value(2, (2, 2))
print("distributed value = ", a)
def test8():
np.random.seed(1)
A_prev = np.random.randn(5, 5, 3, 2)
hparameters = {"stride": 1, "f": 2}
A, cache = pool_forward(A_prev, hparameters)
dA = np.random.randn(5, 4, 2, 2)
dA_prev = pool_backward(dA, cache, mode="max")
print("mode=max")
print("mean of dA = ", np.mean(dA))
print("dA_prev[1,1]=", dA_prev[1, 1])
print()
dA_prev=pool_backward(dA,cache,mode="average")
print("mode=average")
print('mean of dA = ', np.mean(dA))
print('dA_prev[1,1] = ', dA_prev[1, 1])
if __name__ == "__main__":
test8()
