Building your Deep Neural Network: Step by Step
- Import Packages
import numpy as np
import h5py
import matplotlib.pyplot as plt
from testCases import *
from dnn_utils import sigmoid, sigmoid_backward, relu, relu_backward
from public_tests import *
%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
%load_ext autoreload
%autoreload 2
np.random.seed(1)
-
步骤概要(Outline)
-
对 L 层的神经网络的参数进行初始化(Initialization)
weight 初始化为正态分布的随机数 * 0.01,bias 初始化为 0
def initialize_parameters_deep(layer_dims):
np.random.seed(3)
parameters = {}
L = len(layer_dims) # number of layers in the network
for l in range(1, L):
# parameters['W' + str(l)] = ...
# parameters['b' + str(l)] = ...
parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l - 1]))
assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))
return parameters
-
前向传播(Forward Propagation Module)
4.1 线性部分(Linear Forward)def linear_forward(A, W, b): Z = np.dot(W,A)+b cache = (A, W, b) return Z, cache
4.2 实现 Linear-Activation 层的前向传播(Linear-Activation Forward)
def linear_activation_forward(A_prev, W, b, activation): if activation == "sigmoid": Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = sigmoid(Z) elif activation == "relu": Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = relu(Z) cache = (linear_cache, activation_cache) return A, cache
4.3 L-Layer Model
实现 [LINEAR -> RELU] × (L-1) -> LINEAR -> SIGMOID 模块的前向传播def L_model_forward(X, parameters): caches = [] A = X L = len(parameters) // 2 # 网络层数 # for循环从1开始,因为第0层为输入 for l in range(1, L): A_prev = A A , cache = linear_activation_forward(A, parameters['W' + str(l)], parameters['b' + str(l)], "relu") caches.append(cache) AL, cache = linear_activation_forward(A, parameters['W' + str(l+1)], parameters['b' + str(l+1)], "sigmoid") caches.append(cache) return AL, caches
-
交叉熵损失函数(Cost Function)
def compute_cost(AL, Y):
m = Y.shape[1]
logprobs = np.multiply(np.log(AL),Y) + np.multiply(np.log(1-AL),1-Y)
cost = - 1 / m * np.sum(logprobs)
cost = np.squeeze(cost) # 删除维数为1的维度
return cost
-
反向传播(Backward Propagation Module)
6.1 Linear Backward
def linear_backward(dZ, cache): A_prev, W, b = cache m = A_prev.shape[1] dW = 1/m*np.dot(dZ,A_prev.T) db = 1/m*np.sum(dZ,axis=1,keepdims=True) dA_prev = np.dot(W.T,dZ) return dA_prev, dW, db
6.2 Linear-Activation Backward
def linear_activation_backward(dA, cache, activation): linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
6.3 整个网络的反向传播函数(L-Model Backward)
def L_model_backward(AL, Y, caches): grads = {} L = len(caches) m = AL.shape[1] Y = Y.reshape(AL.shape) # 初始化 dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) # Lth layer (SIGMOID -> LINEAR) gradients. current_cache = caches[L-1] dA_prev_temp, dW_temp, db_temp = linear_activation_backward(dAL, current_cache, "sigmoid") grads["dA" + str(L-1)] = dA_prev_temp grads["dW" + str(L)] = dW_temp grads["db" + str(L)] = db_temp # Loop from l=L-2 to l=0 for l in reversed(range(L-1)): # lth layer: (RELU -> LINEAR) gradients. current_cache = caches[l] dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, "relu") grads["dA" + str(l)] = dA_prev_temp grads["dW" + str(l + 1)] = dW_temp grads["db" + str(l + 1)] = db_temp return grads
6.4 更新参数(Update Parameters)
def update_parameters(params, grads, learning_rate): parameters = params.copy() L = len(parameters) // 2 for l in range(L): parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)] parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)] return parameters