初始化:
def model(...initilalization = 'he'):
if intitialization == 'he':
parameters = initialize_parameters_he(layers_dims)
-------------------------------------------------
def initialize_parameters_he(layers_dims):
np.random.seed(3)
parameters = {}
L = len(layers_dims) - 1
for i in range(1, L+1):
parameters['W' + str(l)] = np.random.randn(layers_dims[l], layers_dims[l-1]) * np.sqrt(2. / layers_dims[l-1])
parameters['b' + str(l)] = np.zeros((layers_dims[l], 1))
return parameters
正则化:
def model(..., lambd = 0, keep_prob = 1):
...
for i in range(0, num_iterations):
if keep_prob == 1:
a3, cache = forward_propagation(X, parameters)
elif keep_prob < 1:
a3, cache = forward_propagation_with_dropout(X, parameters, keep_prob)
if lambd == 0:
cost = compute_cost(a3, Y)
else:
cost = compute_cost_with_regularization(a3, Y, parameters, lambd)
...
return parameters
----------------------------------------------------------
def compute_cost_with_regularization(A3, Y, parameters, lambd):
m = Y.shape[1]
W1 = parameters['W1']
W2 = parameters['W2']
W3 = parameters['W3']
cross_entropy_cost = compute _cost(A3, Y)
L2_regularization_cost = (np.sum(np.square(W1)) + np.sum(np.square(W2)) + np.sum(np.square(W3))) * lambd / 2 / m
cost = cross_entropy_cost + L2_regularization_cost
return cost
-----------------------------------------------------------
def backward_propagation_with_regularization(X, Y, cache, lambd):
m = X.shape[1]
(Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3) = cache
dZ3 = A3 - Y
dW3 = 1. / m * np.dot(dZ3, A2.T) + lambd / m * W3
db3 = 1. / m * np.sum(dZ3, axis=1, keepdims = True)
dA2 = np.dot(W3.T, dZ3)
dZ2