import numpy as np
import h5py
import matplotlib.pyplot as plt
from testCases_v3 import *
from dnn_utils_v2 import sigmoid, sigmoid_backward, relu, relu_backward
# plt.rcParams["figure.figsize"] = (5.0, 4.0)
# plt.rcParams['image.interpolation'] = 'nearest'
# plt.rcParams['image.cmap'] = 'gray'
np.random.seed(1) # keep all the random function calls consistent
#sigmoid() 返回 A, cache cache=Z A = sigmoid(Z)
#sigmoid_backward() 输入dA, cache 返回dZ dZ = dA * s * (1-s) #单个sigmoid单元 反向传播的结果
#relu() 返回 A, cache
#relu_backward() 输入dA, cache 返回dZ
# Initialization
# L-layer Neural Network
def initialize_parameters_deep(layer_dims):
# layer_dims中存储n[l](即不同层中的单位数量)
"""
Arguments:
layer_dims -- python array (list) containing the dimensions of each layer in our network
Returns:
parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])
bl -- bias vector of shape (layer_dims[l], 1)
"""
np.random.seed(3)
parameters ={}
L = len(layer_dims)
for l in range(1, L):
parameters["W"+str(l)] = np.random.rand(layer_dims[l],layer_dims[l-1])*0.01
parameters["b"+str(l)] = np.zeros((layer_dims[l], 1))
assert (parameters["W"+str(l)].shape == (layer_dims[l],layer_dims[l-1]))
assert (parameters["b"+str(l)].shape == (layer_dims[l],1))
return parameters
#
# parameters = initialize_parameters_deep([5,4,3])
# print("W1 = " + str(parameters["W1"]))
# print("b1 = " + str(parameters["b1"]))
# print("W2 = " + str(parameters["W2"]))
# print("b2 = " + str(parameters["b2"]))
# Linear Forward
def linear_forward(A, W, b):
"""
Implement the linear part of a layer's forward propagation.
Arguments:
A -- activations from previous layer (or input data): ( n(l-1), number of examples )
W -- weights matrix: numpy array of shape ( n(l), n(l-1) )
b -- bias vector, numpy array of shape ( n(l), 1 )
Returns:
Z -- the input of the activation function, also called pre-activation parameter
cache -- a python list containing "A", "W" and "b" ; stored for computing the backward pass efficiently
"""
Z = np.dot(W, A) + b
assert (Z.shape == (W.shape[0], A.shape[1]))
cache= (A, W, b)
return Z, cache
# Linear-Activation Forward
def linear_activation_forward(A_pre, W, b, activation):
"""
Implement the forward propagation for the LINEAR->ACTIVATION layer
Arguments:
A_prev -- activations from previous layer (or input data): ( n(l-1) , number of examples)
W -- weights matrix: numpy array of shape ( n(l) , n(l-1) )
b -- bias vector, numpy array of shape ( n(l) , 1 )
activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
# A, activation_cache = sigmoid(Z) A, activation_cache = relu(Z) cache == Z
Returns:
A -- the output of the activation function, also called the post-activation value
cache -- a python list containing "linear_cache" and "activation_cache";
stored for computing the backward pass efficiently
"""
Z, linear_cache = linear_forward(A_pre, W, b)
if activation == "sigmoid":
A, activation_cache = sigmoid(Z) # Z == activation_cache
elif activation == "relu":
A, activation_cache = relu(Z)
assert (A.shape == (W.shape[0], A_pre.shape[1]))
cache = (linear_cache, activation_cache)
return A, cache
# L-Layer forward Model
def L_model_forward(X, parameters):
"""
Implement forward propagation for the [LINEAR->RELU]*(L-1) -> LINEAR->SIGMOID computation
Arguments:
X -- data, numpy array of shape (input size, number of examples)
parameters -- output of initialize_parameters_deep()
Returns:
AL -- last post-activation value (1, numbers)
caches -- list of caches containing:
every cache of linear_relu_forward() (there are L-1 of them, indexed from 0 to L-2)
the cache of linear_sigmoid_forward() (there is one, indexed L-1)
"""
L = len(parameters)//2 # L通过parameters获得 //2
A = X
caches =[]
for l in range(1, L):
W = parameters["W"+str(l)]
b = parameters["b"+str(l)]
A, cache = linear_activation_forward(A, W, b, activation="relu") #cache = (linear_cache, activation_cache) linear_cache (A, W, b) activation_cache Z
caches.append(cache) #默认就是从0开始加的
#执行了l-1层, 有 cache ()
AL, cache = linear_activation_forward(A, parameters["W"+str(L)], parameters["b"+str(L)], activation="sigmoid" ) #这个地方全部用的是初始化的w,b 不过这里面cache保存的也是初始化的 变量
caches.append(cache)
assert (AL.shape == (1, X.shape[1])) #当前nl数 为 1, X.shape[1] 为numbers
return AL, caches
# X, parameters = L_model_forward_test_case_2hidden()
# AL, caches = L_model_forward(X, parameters)
# print("AL = " + str(AL))
# print("Length of caches list = " + str(len(caches)))
# Cost function
def compute_cost(AL, Y):
m = Y.shape[1]
cost = -(1.0/m)*np.sum(Y*np.log(AL) +(1-Y)*np.log(1-AL) )
cost= np.squeeze(cost)
assert (cost.shape == ())
return cost
# Backward propagation module
def linear_backward(dZ, cache):
"""
Implement the linear portion of backward propagation for a single layer (layer l)
Arguments:
dZ -- 当前层的
cache -- [linear_cache] (A_prev, W, b) 上一层的
Returns:
dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
dW -- Gradient of the cost with respect to W (current layer l), same shape as W
db -- Gradient of the cost with respect to b (current layer l), same shape as b
"""
A_prev, W, b = cache
m = A_prev.shape[1] #A.shape[1] == X.shape[1] == numbers
dW = (1.0/m)*np.dot(dZ, A_prev.T)
db = (1.0/m)*np.sum(dZ, axis=1, keepdims=True)
dA_prev = np.dot(W.T, dZ)
assert (dA_prev.shape == A_prev.shape)
assert (dW.shape == W.shape)
assert (db.shape == b.shape)
return dA_prev, dW, db
# Linear-Activation backward
def linear_activation_backward(dA, cache, activation):
"""
Implement the backward propagation for the LINEAR->ACTIVATION layer.
Arguments:
dA -- post-activation gradient for current layer l
cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
Returns:
dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
dW -- Gradient of the cost with respect to W (current layer l), same shape as W
db -- Gradient of the cost with respect to b (current layer l), same shape as b
"""
# relu_backward() sigmoid_backward() 输入dA, cache 返回dZ
linear_cache, activation_cache = cache
if activation=="sigmoid":
dZ = sigmoid_backward(dA, activation_cache) #activation_cache Z
dA_prev, dW, db = linear_backward(dZ, linear_cache)
elif activation=="relu":
dZ = relu_backward(dA, activation_cache)
dA_prev, dW, db = linear_backward(dZ, linear_cache)
return dA_prev, dW, db
# L-Layer Backward Model
def L_model_backward(AL, Y, caches):
"""
Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group
Arguments:
AL -- output of the forward propagation (L_model_forward())
Y -- label (containing 0 if non-cat, 1 if cat)
caches -- list of caches containing:
(caches[l], for l in range(L-1) i.e l = 0...L-2) every cache of linear_activation_forward() with "relu"
(caches[L-1]) the cache of linear_activation_forward() with "sigmoid"
caches[l] = (linear_cache, activation_cache) (A, W, b) , Z
Returns:
grads -- A dictionary with the gradients
grads["dA" + str(l)] = ...
grads["dW" + str(l)] = ...
grads["db" + str(l)] = ...
"""
grads = {}
m = AL.shape[1]
L = len(caches)
Y = Y.reshape(AL.shape) #????
# Initializing the backpropagation
# dAL = -np.divide(Y, AL)+np.divide(1-Y, 1-AL)
dAL = -Y/AL + (1-Y)/(1-AL)
# Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"]
# 用 linear_activation_backward
current_cache = caches[L-1]
grads["dA"+str(L)], grads["dW"+str(L)], grads["db"+str(L)] = linear_activation_backward(dAL, current_cache, activation="sigmoid")
for l in range(L-2, -1, -1): #reversed(range(L-1)) [0,L-2) == [L-1, -1]
current_cache = caches[l] # 初始 l = L-2
dAL = grads["dA" + str(l + 2)]
grads["dA"+str(l+1)], grads["dW"+str(l+1)], grads["db"+str(l+1)] = linear_activation_backward(dAL, current_cache, activation="relu")
return grads
#
# AL, Y_assess, caches = L_model_backward_test_case()
# grads = L_model_backward(AL, Y_assess, caches)
# print_grads(grads)
# Update Parameters
def update_parameters(parameters, grads, learning_rate):
"""
Update parameters using gradient descent
Arguments:
parameters -- python dictionary containing your parameters
grads -- python dictionary containing your gradients, output of L_model_backward
Returns:
parameters -- python dictionary containing your updated parameters
parameters["W" + str(l)] = ...
parameters["b" + str(l)] = ...
"""
L = len(parameters)//2
for l in range(L): #我们W,b 下标用的是 1到 L
W = parameters["W"+str(l+1)]
b = parameters["b"+str(l+1)]
dW = grads["dW"+str(l+1)]
db = grads["db"+str(l+1)]
parameters["W"+str(l+1)] = W - learning_rate * dW
parameters["b"+str(l+1)] = b - learning_rate * db
return parameters
layers_dims = [12288, 20, 7, 5, 1] # 5-layer model
def L_layer_model(X, Y, layer_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):
np.random.seed(1)
costs =[]
parameters = initialize_parameters_deep(layer_dims)
for i in range(0, num_iterations):
AL, caches = L_model_forward(X, parameters)
cost = compute_cost(AL, Y)
grads = L_model_backward(AL, Y, caches)
parameters = update_parameters(parameters, grads, learning_rate)
if print_cost and i%100 == 0:
print("Cost after iteration %i:%f" %(i, cost))
costs.append(cost)
# plot the cost
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations(per tens)')
plt.title("learing rate="+str(learning_rate))
plt.show()
return parameters
def predict(X, y, parameters):
"""
This function is used to predict the results of a L-layer neural network.
Arguments:
X -- data set of examples you would like to label
parameters -- parameters of the trained model
Returns:
p -- predictions for the given dataset X
"""
m = X.shape[1]
n = len(parameters) // 2 # number of layers in the neural network
p = np.zeros((1,m))
# Forward propagation
probas, caches = L_model_forward(X, parameters)
# convert probas to 0/1 predictions
for i in range(0, probas.shape[1]):
if probas[0,i] > 0.5:
p[0,i] = 1
else:
p[0,i] = 0
#print results
#print ("predictions: " + str(p))
#print ("true labels: " + str(y))
print("Accuracy: " + str(np.sum((p == y)/m)))
return p
parameters = L_layer_model(train_x, train_y, layers_dims, num_iterations=2500, print_cost=True)
#用自己的图片
my_image = "my_image.jpg" # change this to the name of your image file
my_label_y = [1] # the true class of your image (1 -> cat, 0 -> non-cat)
## END CODE HERE ##
fname = "images/" + my_image
image = np.array(ndimage.imread(fname, flatten=False))
my_image = scipy.misc.imresize(image, size=(num_px,num_px)).reshape((num_px*num_px*3,1))
my_predicted_image = predict(my_image, my_label_y, parameters)