参考
- https://blog.csdn.net/u013733326/article/details/79702148
- Github-Building your Deep Neural Network: Step by Step
本文参考了以上两篇,推荐给大家。
一、作业说明
本周作业分为两部分:第一部分与上周要求差不多,即是编写一个单隐藏层的神经网络;第二部分则要编写更多层的神经网络。数据集方面,沿用C1W2的猫图片数据集,也给了testCases.py供jupyter步步验证。
为了平滑引入深度神经网络,笔者在每一步都会分别列出两者的代码。但单隐藏层笔者不写注释,相信经过上周作业,单隐藏层理应熟悉,大家可以此快速理解深度神经网络(DNN)。
需要做的事:
- 构建单层、多层神经网络;
- 使用非线性激活函数,本次作业使用relu、sigmoid;
- 计算交叉熵损失(损失函数);
- 实现迭代向前和向后传播
二、准备工作
Python版本:3.72
1. 建立项目(略)
2. import
import numpy as np
import matplotlib.pyplot as plt
import dnn_utils
import lr_utils
import testCases
np.random.seed(1)
- numpy:用Python进行科学计算的基本软件包
- matplotlib:绘制图表库,可视化支持;
- testCases:包含一些测试示例,可以此在Jupyter上测试主程序各部分正确性;
- np.random.seed():随机初始化。使每次运行主程序时,随机数值均一样,可方便调试。
以上是本次作业主程序的依赖。可选作业会用到其它依赖,笔者到时会列出。
3. 查看lr_utils、testCases内容
lr_utils
import h5py
def load_dataset():
train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels
test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels
classes = np.array(test_dataset["list_classes"][:]) # the list of classes
train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
testCases
import numpy as np
def linear_forward_test_case():
np.random.seed(1)
"""
X = np.array([[-1.02387576, 1.12397796],
[-1.62328545, 0.64667545],
[-1.74314104, -0.59664964]])
W = np.array([[ 0.74505627, 1.97611078, -1.24412333]])
b = np.array([[1]])
"""
A = np.random.randn(3,2)
W = np.random.randn(1,3)
b = np.random.randn(1,1)
return A, W, b
def linear_activation_forward_test_case():
"""
X = np.array([[-1.02387576, 1.12397796],
[-1.62328545, 0.64667545],
[-1.74314104, -0.59664964]])
W = np.array([[ 0.74505627, 1.97611078, -1.24412333]])
b = 5
"""
np.random.seed(2)
A_prev = np.random.randn(3,2)
W = np.random.randn(1,3)
b = np.random.randn(1,1)
return A_prev, W, b
def L_model_forward_test_case():
"""
X = np.array([[-1.02387576, 1.12397796],
[-1.62328545, 0.64667545],
[-1.74314104, -0.59664964]])
parameters = {'W1': np.array([[ 1.62434536, -0.61175641, -0.52817175],
[-1.07296862, 0.86540763, -2.3015387 ]]),
'W2': np.array([[ 1.74481176, -0.7612069 ]]),
'b1': np.array([[ 0.],
[ 0.]]),
'b2': np.array([[ 0.]])}
"""
np.random.seed(1)
X = np.random.randn(4,2)
W1 = np.random.randn(3,4)
b1 = np.random.randn(3,1)
W2 = np.random.randn(1,3)
b2 = np.random.randn(1,1)
parameters = {"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2}
return X, parameters
def compute_cost_test_case():
Y = np.asarray([[1, 1, 1]])
aL = np.array([[.8,.9,0.4]])
return Y, aL
def linear_backward_test_case():
"""
z, linear_cache = (np.array([[-0.8019545 , 3.85763489]]), (np.array([[-1.02387576, 1.12397796],
[-1.62328545, 0.64667545],
[-1.74314104, -0.59664964]]), np.array([[ 0.74505627, 1.97611078, -1.24412333]]), np.array([[1]]))
"""
np.random.seed(1)
dZ = np.random.randn(1,2)
A = np.random.randn(3,2)
W = np.random.randn(1,3)
b = np.random.randn(1,1)
linear_cache = (A, W, b)
return dZ, linear_cache
def linear_activation_backward_test_case():
"""
aL, linear_activation_cache = (np.array([[ 3.1980455 , 7.85763489]]), ((np.array([[-1.02387576, 1.12397796], [-1.62328545, 0.64667545], [-1.74314104, -0.59664964]]), np.array([[ 0.74505627, 1.97611078, -1.24412333]]), 5), np.array([[ 3.1980455 , 7.85763489]])))
"""
np.random.seed(2)
dA = np.random.randn(1,2)
A = np.random.randn(3,2)
W = np.random.randn(1,3)
b = np.random.randn(1,1)
Z = np.random.randn(1,2)
linear_cache = (A, W, b)
activation_cache = Z
linear_activation_cache = (linear_cache, activation_cache)
return dA, linear_activation_cache
def L_model_backward_test_case():
"""
X = np.random.rand(3,2)
Y = np.array([[1, 1]])
parameters = {'W1': np.array([[ 1.78862847, 0.43650985, 0.09649747]]), 'b1': np.array([[ 0.]])}
aL, caches = (np.array([[ 0.60298372, 0.87182628]]), [((np.array([[ 0.20445225, 0.87811744],
[ 0.02738759, 0.67046751],
[ 0.4173048 , 0.55868983]]),
np.array([[ 1.78862847, 0.43650985, 0.09649747]]),
np.array([[ 0.]])),
np.array([[ 0.41791293, 1.91720367]]))])
"""
np.random.seed(3)
AL = np.random.randn(1, 2)
Y = np.array([[1, 0]])
A1 = np.random.randn(4,2)
W1 = np.random.randn(3,4)
b1 = np.random.randn(3,1)
Z1 = np.random.randn(3,2)
linear_cache_activation_1 = ((A1, W1, b1), Z1)
A2 = np.random.randn(3,2)
W2 = np.random.randn(1,3)
b2 = np.random.randn(1,1)
Z2 = np.random.randn(1,2)
linear_cache_activation_2 = ( (A2, W2, b2), Z2)
caches = (linear_cache_activation_1, linear_cache_activation_2)
return AL, Y, caches
def update_parameters_test_case():
"""
parameters = {'W1': np.array([[ 1.78862847, 0.43650985, 0.09649747],
[-1.8634927 , -0.2773882 , -0.35475898],
[-0.08274148, -0.62700068, -0.04381817],
[-0.47721803, -1.31386475, 0.88462238]]),
'W2': np.array([[ 0.88131804, 1.70957306, 0.05003364, -0.40467741],
[-0.54535995, -1.54647732, 0.98236743, -1.10106763],
[-1.18504653, -0.2056499 , 1.48614836, 0.23671627]]),
'W3': np.array([[-1.02378514, -0.7129932 , 0.62524497],
[-0.16051336, -0.76883635, -0.23003072]]),
'b1': np.array([[ 0.],
[ 0.],
[ 0.],
[ 0.]]),
'b2': np.array([[ 0.],
[ 0.],
[ 0.]]),
'b3': np.array([[ 0.],
[ 0.]])}
grads = {'dW1': np.array([[ 0.63070583, 0.66482653, 0.18308507],
[ 0. , 0. , 0. ],
[ 0. , 0. , 0. ],
[ 0. , 0. , 0. ]]),
'dW2': np.array([[ 1.62934255, 0. , 0. , 0. ],
[ 0. , 0. , 0. , 0. ],
[ 0. , 0. , 0. , 0. ]]),
'dW3': np.array([[-1.40260776, 0. , 0. ]]),
'da1': np.array([[ 0.70760786, 0.65063504],
[ 0.17268975, 0.15878569],
[ 0.03817582, 0.03510211]]),
'da2': np.array([[ 0.39561478, 0.36376198],
[ 0.7674101 , 0.70562233],
[ 0.0224596 , 0.02065127],
[-0.18165561, -0.16702967]]),
'da3': np.array([[ 0.44888991, 0.41274769],
[ 0.31261975, 0.28744927],
[-0.27414557, -0.25207283]]),
'db1': 0.75937676204411464,
'db2': 0.86163759922811056,
'db3': -0.84161956022334572}
"""
np.random.seed(2)
W1 = np.random.randn(3,4)
b1 = np.random.randn(3,1)
W2 = np.random.randn(1,3)
b2 = np.random.randn(1,1)
parameters = {"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2}
np.random.seed(3)
dW1 = np.random.randn(3,4)
db1 = np.random.randn(3,1)
dW2 = np.random.randn(1,3)
db2 = np.random.randn(1,1)
grads = {"dW1": dW1,
"db1": db1,
"dW2": dW2,
"db2": db2}
return parameters, grads
4. 加载、加工数据
train_set_x_orig , train_set_y , test_set_x_orig , test_set_y , classes = lr_utils.load_dataset()
train_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
train_x = train_x_flatten / 255
train_y = train_set_y
test_x = test_x_flatten / 255
test_y = test_set_y
本周数据集与C1W2同,加载和加工方式也一样,不清楚可以回头看看作业。
准备完成,来搞起。
三、Let`s tango!
1. DNN构造步骤
- 初始化参数(Wl,bl)
- 迭代向前传播:
1)单层向前传播的线性部分(linear)
2)单层向前传播的激活部分(activation)
3)迭代完成L-1层(relu)和第L层(sigmoid) - 计算损失
- 反向传播:
1)单层反向传播的线性部分(linear)
2)单层反向传播的激活部分(activation)
3)迭代完成L-1层(relu)和第L层(sigmoid) - 更新参数
- 预测
注意:每个向前传播的模块,除了输出神经网络计算参数外,还需缓存一些数据(cache)。推荐将cache记在草稿上,以免混淆。
2. 初始化参数
2.1 单隐藏层
def initialize_parameters(n_x, n_h, n_y):
W1 = np.random.randn(n_h, n_x) * 0.01
b1 = np.zeros(shape=(n_h, 1))
W2 = np.random.randn(n_y, n_h) * 0.01
b2 = np.zeros(shape=(n_y, 1))
# #断言检查
assert (W1.shape == (n_h, n_x))
assert (b1.shape == (n_h, 1))
assert (W2.shape == (n_y, n_h))
assert (b2.shape == (n_y, 1))
parameters = {
"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2,
}
return parameters
测试及运行结果:
# #测试
print("==============测试initialize_parameters==============")
parameters = initialize_parameters(3, 2, 1)
print("W1=" + str(parameters["W1"]))
print("b1=" + str(parameters["b1"]))
print("W2=" + str(parameters["W2"]))
print("b2=" + str(parameters["b2"]))
print("==============测试initialize_parameters==============")
# #运行结果
==============测试initialize_parameters==============
W1=[[-0.01244123 -0.00626417 -0.00803766]
[-0.02419083 -0.00923792 -0.01023876]]
b1=[[0.]
[0.]]
W2=[[ 0.01123978 -0.00131914]]
b2=[[0.]]
==============测试initialize_parameters==============
2.2 DNN
初始化的重点在于W和b的维度。
第l层线性传播:
Z
[
L
−
1
]
=
W
[
L
−
1
]
A
[
L
−
2
]
+
b
[
L
−
1
]
Z^{[L-1]} = W^{[L-1]} A^{[L-2]} + b^{[L-1]}
Z[L−1]=W[L−1]A[L−2]+b[L−1]
第l层W维度:
(
n
[
l
]
,
n
[
l
−
1
]
)
(n^{[l]}, n^{[l-1]})
(n[l],n[l−1])
第l层b维度:
(
n
[
L
−
1
]
,
1
)
(n^{[L-1]}, 1)
(n[L−1],1)
def initialize_parameters_deep(layerdims):
"""
参数:
layers_dims - 数组,涵盖数据层、输出层、隐藏层。每个元素数值对应NN每层的节点数。
返回:
parameters - 包含参数“W1”,“b1”,...,“WL”,“bL”的字典:
W1 - 权重矩阵,维度为(layers_dims [1],layers_dims [l-1])
bl - 偏向量,维度为(layers_dims [1],1)
"""
np.random.seed(3)
parameters = {}
L = len(layerdims)
for l in range(1, L):
parameters["W"+str(l)] = \
np.random.randn(layerdims[l], layerdims[l - 1])/np.sqrt(layerdims[l - 1])
parameters["b"+str(l)] = np.zeros(shape=(layerdims[l], 1))
# #断言检查
assert (parameters["W"+str(l)].shape == (layerdims[l],
layerdims[l - 1]))
assert (parameters["b"+str(l)].shape == (layerdims[l], 1))
return parameters
layerdims这个参数再着重强调一下:包含NN全部层。比如下面加载数据集时,
配置layerdims=[12288, 20 , 7, 5, 1]。12288即为X特征数,1即为二分类输出层。中间的20、7、5均为隐藏层节点数。
测试
# #测试
print("==============测试initialize_parameters_deep==============")
layerdims = [5, 4, 3]
parameters = initialize_parameters_deep(layerdims)
print("W1=" + str(parameters["W1"]))
print("b1=" + str(parameters["b1"]))
print("W2=" + str(parameters["W2"]))
print("b2=" + str(parameters["b2"]))
print("==============测试initialize_parameters_deep==============")
运行结果:
==============测试initialize_parameters_deep==============
W1=[[ 0.79989897 0.19521314 0.04315498 -0.83337927 -0.12405178]
[-0.15865304 -0.03700312 -0.28040323 -0.01959608 -0.21341839]
[-0.58757818 0.39561516 0.39413741 0.76454432 0.02237573]
[-0.18097724 -0.24389238 -0.69160568 0.43932807 -0.49241241]]
b1=[[0.]
[0.]
[0.]
[0.]]
W2=[[-0.59252326 -0.10282495 0.74307418 0.11835813]
[-0.51189257 -0.3564966 0.31262248 -0.08025668]
[-0.38441818 -0.11501536 0.37252813 0.98805539]]
b2=[[0.]
[0.]
[0.]]
==============测试initialize_parameters_deep==============
结果里,每个矩阵的数值可能不一样,维度必须一致。
3. 向前传播
向前传播是一个基于层数的迭代运算,所以先编出单次运算。而单次传播又分为线型和非线性,故而编写步骤如下:
- 线性部分模块
- 非线性部分模块(dnn_utils已经给出,此步省略)
- 综上,以完成单次传播
- 基于层数的迭代运算
3.1 线性部分
公式:
Z
[
L
−
1
]
=
W
[
L
−
1
]
A
[
L
−
2
]
+
b
[
L
−
1
]
Z^{[L-1]} = W^{[L-1]} A^{[L-2]} + b^{[L-1]}
Z[L−1]=W[L−1]A[L−2]+b[L−1]
def linear_foward(A, W, b):
"""
参数:
A - 上一层(或输入数据)的激活,维度为(nl-1,m)
W - 权重矩阵,维度为(nl,nl-1)
b - 偏向量,维度为(nl,1)
返回:
Z - 激活功能的输入
cache - 缓存“A”,“W”和“b”,反向传播会用到
"""
Z = np.dot(W, A) + b
assert (Z.shape == (W.shape[0], A.shape[1]))
cache = (A, W, b)
return Z, cache
测试:
print("==============测试linear_foward==============")
A, W, b = testCases.linear_forward_test_case()
Z, cache = linear_foward(A, W, b)
print("Z" + str(Z))
print("==============测试linear_foward==============")
运行结果:
==============测试linear_foward==============
Z[[ 3.26295337 -1.23429987]]
==============测试linear_foward==============
3.2 非线性激活部分
本次会用到两个激活函数:
s
i
g
m
o
i
d
:
A
=
1
1
+
e
−
Z
sigmoid: A = \frac{1}{1 + e^{-Z}}
sigmoid:A=1+e−Z1
r
e
l
u
:
A
=
m
a
x
(
0
,
Z
)
relu: A = max(0, Z)
relu:A=max(0,Z)
代码可见dnn_utils:
def sigmoid(Z):
"""
Implements the sigmoid activation in numpy
Arguments:
Z -- numpy array of any shape
Returns:
A -- output of sigmoid(z), same shape as Z
cache -- returns Z as well, useful during backpropagation
"""
A = 1/(1+np.exp(-Z))
cache = Z
return A, cache
def relu(Z):
"""
Implement the RELU function.
Arguments:
Z -- Output of the linear layer, of any shape
Returns:
A -- Post-activation parameter, of the same shape as Z
cache -- a python dictionary containing "A" ; stored for computing the backward pass efficiently
"""
A = np.maximum(0,Z)
assert(A.shape == Z.shape)
cache = Z
return A, cache
测试略
3.3 单次传播
包含线型及非线性部分,并缓存部分数据
def linear_activation_foward(A_prev, W, b, activation):
"""
参数:
A_prev - 来自上一层(或输入层)的激活,维度为(nl,m)
W - 权重矩阵,维度为(nl,nl-1)
b - 偏向量,维度为(nl,1)
activation - 激活函数,分为:"sigmoid" 和 "relu"
返回:
A - 激活函数的输出
cache - 缓存反向传播所需的数据
"""
if activation == "sigmoid":
Z, linear_cache = linear_foward(A_prev, W, b)
A, activation_cache = dnn_utils.sigmoid(Z)
elif activation == "relu":
Z, linear_cache = linear_foward(A_prev, W, b)
A, activation_cache = dnn_utils.relu(Z)
cache = (linear_cache, activation_cache)
assert (A.shape == (W.shape[0], A_prev.shape[1]))
return A, cache
写到这一步,运算流程及数据应不难理解,但看到这些缓存(cache)可能头有点晕。不要慌,拿出一张草稿纸,将cache的目录,及每个目录下的数据列出来:
- cache
- linear_cache
- A
- W
- b
- activation_cache
- Z
- linear_cache
到反向传播,再来瞧这个。
另外说一句:这也是单隐藏层向前传播的程序
测试:
# #测试
print("==============测试linear_activation_foward==============")
A_prev, W, b = testCases.linear_activation_forward_test_case()
A, linear_activation_cache = linear_activation_foward(A_prev, W, b,
activation="sigmoid")
print("sigmoid,A=" + str(A))
A, linear_activation_cache = linear_activation_foward(A_prev, W, b,
activation="relu")
print("relu,A=" + str(A))
print("==============测试linear_activation_foward==============")
运行结果:
==============测试linear_activation_foward==============
sigmoid,A=[[0.96890023 0.11013289]]
relu,A=[[3.43896131 0. ]]
==============测试linear_activation_foward==============
3.4 迭代向前传播
认真听课的同学应该知道:NN有L层,从第一层开始,需完成L-1层relu,在最后一层完成sigmoid。
def L_mode_foward(X, parameters):
"""
参数:
X - 数据集
parameters - 包含Wl、bl的参数集,initialize_parameters_deep()的输出
返回:
AL - 第L层激活值
caches - 每层缓存cache的集合(cache = (linear_cache,activation_cache))
"""
caches = []
A = X
L = len(parameters) // 2 # #每一层均有W、b两个参数,故层数除2
# #1~L-1层:relu
for l in range(1, L): # # for...in (a, b)数到b;for...in range(a, b)数到b-1;
A_prev = A
A, cache = \
linear_activation_foward(A_prev, parameters['W' + str(l)],
parameters['b' + str(l)], "relu")
caches.append(cache)
# #L层:sigmoid
AL, cache = linear_activation_foward(A, parameters['W' + str(L)],
parameters['b' + str(L)], "sigmoid")
caches.append(cache)
assert (AL.shape == (1, X.shape[1]))
return AL, caches
测试:
# #测试
print("==============测试L_mode_foward==============")
X, parameters = testCases.L_model_forward_test_case()
AL, caches = L_mode_foward(X, parameters)
print("AL:" + str(AL))
print("caches长度:" + str(len(caches)))
print("==============测试L_mode_foward==============")
运行结果:
==============测试L_mode_foward==============
AL:[[0.17007265 0.2524272 ]]
caches长度:2
==============测试L_mode_foward==============
4. 计算成本
成本计算公式与单隐藏层太大差异,交叉熵损失函数:
J
=
−
1
m
∑
i
=
0
m
(
y
(
i
)
log
(
a
[
L
]
(
i
)
)
+
(
1
−
y
(
i
)
)
log
(
1
−
a
[
L
]
(
i
)
)
)
(13)
J = - \frac{1}{m} \sum\limits_{i = 0}^{m} \large{(} \small y^{(i)}\log\left(a^{[L] (i)}\right) + (1-y^{(i)})\log\left(1- a^{[L] (i)}\right) \large{)} \small\tag{13}
J=−m1i=0∑m(y(i)log(a[L](i))+(1−y(i))log(1−a[L](i)))(13)
def compute_cost(Y, AL):
"""
实施等式(4)定义的成本函数。
参数:
Y - 标签集,维度为(1,m)
AL - 包含第L层预测概率的数组,维度为(1,m)
返回:
cost - 交叉熵成本
"""
m = Y.shape[1]
# #np.log(AL)在前,否则可能运算溢出
cost = (-1 / m) * np.sum(np.multiply(np.log(AL), Y) +
np.multiply(np.log(1 - AL), (1 - Y)))
cost = np.squeeze(cost)
assert (cost.shape == ())
return cost
测试:
print("==============测试compute_cost==============")
Y, AL = testCases.compute_cost_test_case()
cost = compute_cost(Y, AL)
print("损失:" + str(cost))
print("==============测试compute_cost==============")
运行结果:
==============测试compute_cost==============
损失:0.41493159961539694
==============测试compute_cost==============
5. 反向传播
重点来了。
为方便理解,笔者先将单次反向传播的公式列出:
-
非线性激活部分:
d A [ l ] = ∂ J ∂ A [ l ] = − Y A [ l ] + 1 − Y 1 − A [ l ] dA^{[l]} = \frac{\partial J }{ \partial \ {A^{[l]}}} = -\frac{Y}{ A^{[l]}} + \frac{1 - Y}{ 1 - A^{[l]}} dA[l]=∂ A[l]∂J=−A[l]Y+1−A[l]1−Y
d Z [ l ] = d A [ l ] ∗ g [ l ] ′ ( d Z [ l ] ) dZ^{[l]} =dA^{[l]} * g^{[l]'}(dZ^{[l]}) dZ[l]=dA[l]∗g[l]′(dZ[l]) -
线性部分:
d W [ l ] = 1 m d Z [ l ] A [ l − 1 ] T dW^{[l]} = \frac{1}{m} dZ^{[l]} A^{[l-1]T} dW[l]=m1dZ[l]A[l−1]T
d b [ l ] = 1 m ∑ i = 1 m d Z [ l ] db^{[l]} = \frac{1}{m} \sum\limits_{i = 1}^{m}dZ^{[l]} db[l]=m1i=1∑mdZ[l]
d A [ l − 1 ] = W [ l ] T d Z [ l ] dA^{[l-1]} = W^{[l]T}dZ^{[l]} dA[l−1]=W[l]TdZ[l]
线性部分最后一步 W [ l ] W^{[l]} W[l]的转置可能有些人不明白,可以查看矩阵的求导:
f
(
x
)
=
a
T
x
f(x) = a^Tx
f(x)=aTx
∂
f
∂
x
=
∂
a
T
x
∂
x
=
a
\frac{\partial f }{ \partial \ x} = \frac{\partial a^Tx }{ \partial \ x} = a
∂ x∂f=∂ x∂aTx=a
程序构造与向前传播类似:
- 线性部分模块
- 非线性部分模块(dnn_utils已经给出,此步省略)
- 综上,以完成单次传播
- 基于层数的迭代运算
5.1 非线性激活部分
基于本次使用的激活函数的导数代码亦可见dnn_utils:
def sigmoid_backward(dA, cache):
"""
Implement the backward propagation for a single SIGMOID unit.
Arguments:
dA -- post-activation gradient, of any shape
cache -- 'Z' where we store for computing backward propagation efficiently
Returns:
dZ -- Gradient of the cost with respect to Z
"""
Z = cache
s = 1/(1+np.exp(-Z))
dZ = dA * s * (1-s)
assert (dZ.shape == Z.shape)
return dZ
def relu_backward(dA, cache):
"""
Implement the backward propagation for a single RELU unit.
Arguments:
dA -- post-activation gradient, of any shape
cache -- 'Z' where we store for computing backward propagation efficiently
Returns:
dZ -- Gradient of the cost with respect to Z
"""
Z = cache
dZ = np.array(dA, copy=True) # just converting dz to a correct object.
# When z <= 0, you should set dz to 0 as well.
dZ[Z <= 0] = 0
assert (dZ.shape == Z.shape)
return dZ
来看看刚才的缓存:
- cache
- linear_cache
- A
- W
- b
- activation_cache
- Z(这部分会用到哦)
- linear_cache
5.2 线性部分
def linear_backward(dZ, cache):
"""
参数:
dZ - Zl的梯度
cache - 向前传播中对应层的缓存 linear_cache(A_prev,W,b)
返回:
dA_prev - Al-1的梯度,维度为(nl-1,m)
dW - Wl的梯度,维度为(nl, nl-1)
db - bl的梯度,维度为(nl, 1)
"""
A_prev, W, b = cache
m = A_prev.shape[1]
dW = (1 / m) * np.dot(dZ, A_prev.T)
db = (1 / m) * np.sum(dZ, axis=1, keepdims=True)
dA_prev = np.dot(W.T, dZ)
# #断言检查
assert (dA_prev.shape == A_prev.shape)
assert (dW.shape == W.shape)
assert (db.shape == b.shape)
return dA_prev, dW, db
强调一点:输出的dW、db均为l层,而dA为l-1层。
来看看刚才的缓存:
- cache
- linear_cache(这部分会用到哦)
- A
- W
- b
- activation_cache
- Z
- linear_cache(这部分会用到哦)
各缓存数据的用处都清楚了吧。
测试:
# #测试
print("==============测试linear_backward==============")
dZ, linear_cache = testCases.linear_backward_test_case()
dA_prev, dW, db = linear_backward(dZ, linear_cache)
print("dA_prev:" + str(dA_prev))
print("dW:" + str(dW))
print("db:" + str(db))
print("==============测试linear_backward==============")
运行结果:
==============测试linear_backward==============
dA_prev:[[ 0.51822968 -0.19517421]
[-0.40506361 0.15255393]
[ 2.37496825 -0.89445391]]
dW:[[-0.10076895 1.40685096 1.64992505]]
db:[[0.50629448]]
==============测试linear_backward==============
5.3 单次传播
def linear_activation_backward(dA, cache, activation="relu"):
"""
参数:
dA - dAl的梯度
cache - 向前传播中对应层的缓存(linear_cache,activation_cache)
activation - 激活函数,分为:"sigmoid" 和 "relu"
返回:
dA_prev - Al-1的梯度,维度为(nl-1,m)
dW - Wl的梯度,维度为(nl, nl-1)
db - bl的梯度,维度为(nl, 1)
"""
linear_cache, activation_cache = cache
if activation == "relu":
dZ = dnn_utils.relu_backward(dA, activation_cache)
dA_prev, dW, db = linear_backward(dZ, linear_cache)
if activation == "sigmoid":
dZ = dnn_utils.sigmoid_backward(dA, activation_cache)
dA_prev, dW, db = linear_backward(dZ, linear_cache)
return dA_prev, dW, db
这也是单隐藏层向前传播的程序
测试:
print("==============测试linear_activation_backward==============")
dA, linear_activation_cache = testCases.linear_activation_backward_test_case()
dA_prev, dW, db = linear_activation_backward(dA, linear_activation_cache,
activation="sigmoid")
print("sigmoid:")
print("dA_prev:" + str(dA_prev))
print("dW:" + str(dW))
print("db:" + str(db))
dA_prev, dW, db = linear_activation_backward(dA, linear_activation_cache,
activation="relu")
print("relu:")
print("dA_prev:" + str(dA_prev))
print("dW:" + str(dW))
print("db:" + str(db))
print("==============测试linear_activation_backward==============")
5.4 迭代运算
与向前传播的迭代类似,第L层为sigmoid激活导数,从L-1到0层为relu激活导数。
编写时需注意两点:
- 每次运算前需取出缓存
- 第l次迭代输出: 输出的dW、db均为l层,而dA为l-1层
def L_mode_B(AL, Y, caches):
"""
对[LINEAR-> RELU] *(L-1) - > LINEAR - > SIGMOID组执行反向传播,就是多层网络的向后传播
参数:
AL - 正向传播第L层的激活输出矩阵
Y - 标签集
caches - 正向传播中,每层缓存cache的集合(cache = (linear_cache,activation_cache))
返回:
grads - 包含每层梯度值的字典
grads [“dA”+ str(l)] = ...
grads [“dW”+ str(l)] = ...
grads [“db”+ str(l)] = ...
"""
L = len(caches)
grads = {}
dAL = -np.divide(Y, AL) + np.divide(1 - Y, 1 - AL) # #公式搞清楚
grads["dA" + str(L)] = dAL # #这一行没什么用,只是为对仗工整
current_cache = caches[L - 1]
grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)] = \
activate_B(dAL, current_cache, activation="sigmoid")
for l in reversed(range(L - 1)):
current_cache = caches[l]
dA_prev_temp, dW_temp, db_temp = \
activate_B(grads["dA" + str(l + 1)], current_cache, activation="relu")
grads["dA" + str(l)] = dA_prev_temp
grads["dW" + str(l + 1)] = dW_temp
grads["db" + str(l + 1)] = db_temp
return grads
测试:
print("==============测试L_mode_backward==============")
AL, Y, caches = testCases.L_model_backward_test_case()
grads = L_mode_backward(AL, Y, caches)
print("dA1:" + str(grads["dA1"]))
print("dW1:" + str(grads["dW1"]))
print("db1:" + str(grads["db1"]))
print("==============测试L_mode_backward==============")
运行结果:
==============测试L_mode_backward==============
dA1:[[ 0. 0.52257901]
[ 0. -0.3269206 ]
[ 0. -0.32070404]
[ 0. -0.74079187]]
dW1:[[0.41010002 0.07807203 0.13798444 0.10502167]
[0. 0. 0. 0. ]
[0.05283652 0.01005865 0.01777766 0.0135308 ]]
db1:[[-0.22007063]
[ 0. ]
[-0.02835349]]
==============测试L_mode_backward==============
6 更新参数
更新公式(α为学习率):
θ
[
l
]
=
θ
[
l
]
−
α
∂
J
∂
θ
[
l
]
\theta^{[l]} = \theta^{[l]} - \alpha \frac{\partial J }{ \partial \theta^{[l]} }
θ[l]=θ[l]−α∂θ[l]∂J
W
[
l
]
=
W
[
l
]
−
α
∗
d
W
[
l
]
W^{[l]} = W^{[l]} - \alpha * dW^{[l]}
W[l]=W[l]−α∗dW[l]
b
[
l
]
=
b
[
l
]
−
α
∗
d
b
[
l
]
b^{[l]} = b^{[l]} - \alpha * db^{[l]}
b[l]=b[l]−α∗db[l]
def update_parameters(parameters, grads, learning_rate):
"""
参数:
parameters - 包含Wl、bl的字典
grads - 包含dAl、dWl、dbl的字典
返回:
parameters - 更新后的参数字典
"""
L = len(parameters) // 2
for l in range(L):
parameters["W" + str(l + 1)] = \
parameters["W" + str(l + 1)] - learning_rate * grads["dW" + str(l + 1)]
parameters["b" + str(l + 1)] = \
parameters["b" + str(l + 1)] - learning_rate * grads["db" + str(l + 1)]
return parameters
测试:
# #测试
print("==============测试update_parameters==============")
parameters, grads = testCases.update_parameters_test_case()
parameters = update_parameters(parameters, grads, learning_rate=0.1)
print("W1:" + str(parameters["W1"]))
print("b1:" + str(parameters["b1"]))
print("W2:" + str(parameters["W2"]))
print("b2:" + str(parameters["b2"]))
print("==============测试update_parameters==============")
运行结果:
==============测试update_parameters==============
W1:[[-0.59562069 -0.09991781 -2.14584584 1.82662008]
[-1.76569676 -0.80627147 0.51115557 -1.18258802]
[-1.0535704 -0.86128581 0.68284052 2.20374577]]
b1:[[-0.04659241]
[-1.28888275]
[ 0.53405496]]
W2:[[-0.55569196 0.0354055 1.32964895]]
b2:[[-0.84610769]]
==============测试update_parameters==============
至此,主程序各部分已经完成,该组合起来了。
7. 组合
7.1 单隐藏层
def two_layer_model(X, Y, layerdims, learning_rate, iterations_N,
printcost=False,isPlot=True):
np.random.seed(1)
grads = {}
costs = []
(n_x, n_h, n_y) = layers_dims
parameters = initialize_parameters(n_x, n_h, n_y)
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]
for i in range(0, iterations_N):
# #正向
A1, cache1 = linear_activation_foward(X, W1, b1, "relu")
A2, cache2 = linear_activation_foward(A1, W2, b2, "sigmoid")
cost = compute_cost(Y, A2)
# #反向
dA2 = -np.divide(Y, A2) + np.divide(1 - Y, 1 - A2)
dA1, dW2, db2 = linear_activation_backward(dA2, cache2, "sigmoid")
dA0, dW1, db1 = linear_activation_backward(dA1, cache1, "relu")
grads["dW1"] = dW1
grads["db1"] = db1
grads["dW2"] = dW2
grads["db2"] = db2
# #更新参数
parameters = update_parameters(parameters, grads, learning_rate)
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]
if i % 100 == 0:
costs.append(cost)
if printcost:
print("第" + str(i) + "次迭代:" + str(np.squeeze(cost)))
# #绘图
if isPlot:
plt.plot(np.squeeze(costs))
plt.ylabel(costs)
plt.xlabel(iterations_N)
plt.title("learning_rate=" + str(learning_rate))
plt.show()
return parameters
加载数据后,开始运行:
print("==============测试two_layer_model==============")
train_set_x_orig , train_set_y , test_set_x_orig , test_set_y , classes = lr_utils.load_dataset()
train_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
train_x = train_x_flatten / 255
train_y = train_set_y
test_x = test_x_flatten / 255
test_y = test_set_y
# #以上为加载数据,若之前已写则省略
n_x = 12288
n_h = 7
n_y = 1
layers_dims = (n_x, n_h, n_y)
parameters = two_layer_model(train_x, train_y, layerdims=(n_x, n_h, n_y),
learning_rate=0.0075, iterations_N=2500, printcost=True, isPlot=True)
print("==============测试two_layer_model==============")
运行结果:
==============测试two_layer_model==============
第0次迭代:0.693049735659989
第100次迭代:0.6464320953428849
第200次迭代:0.6325140647912677
第300次迭代:0.6015024920354666
第400次迭代:0.5601966311605747
第500次迭代:0.5158304772764729
第600次迭代:0.4754901313943325
第700次迭代:0.43391631512257495
第800次迭代:0.4007977536203887
第900次迭代:0.35807050113237976
第1000次迭代:0.3394281538366413
第1100次迭代:0.30527536361962654
第1200次迭代:0.2749137728213015
第1300次迭代:0.24681768210614846
第1400次迭代:0.19850735037466097
第1500次迭代:0.17448318112556663
第1600次迭代:0.17080762978096892
第1700次迭代:0.11306524562164715
第1800次迭代:0.09629426845937145
第1900次迭代:0.08342617959726861
第2000次迭代:0.07439078704319078
第2100次迭代:0.06630748132267933
第2200次迭代:0.05919329501038171
第2300次迭代:0.05336140348560554
第2400次迭代:0.04855478562877018
==============测试two_layer_model==============
7.2 单隐藏层预测
def predict(X, Y, parameters):
"""
参数:
X - 数据集
Y - 预测集
parameters - 包含参数的字典
返回:
p - 预测结果
"""
m = X.shape[1]
n = len(parameters) // 2
p = np.zeros((1, m))
probas, cache = L_mode_foward(X, parameters)
for i in range(probas.shape[1]):
if probas[0, i] > 0.5:
p[0, i] = 1
else:
p[0, i] = 0
print("准确度:" + str(np.sum((p == Y))/m))
return p
运行:
predictions_train = predict(train_x, train_y, parameters)
predictions_test = predict(test_x, test_y, parameters)
运行结果:
准确度:1.0
准确度:0.72
比Logistic R.的0.70略有提升,再来试试多层
7.3 DNN及预测
def L_layer_model(X, Y, layerdims, learning_rate=0.0075, iterations_N=3000,
printcost=False,isPlot=True):
"""
参数:
X - 数据集
Y - 标签集
layers_dims - 数组,涵盖数据层、输出层、隐藏层。每个元素数值对应NN每层的节点数。
learning_rate - 学习率
num_iterations - 迭代次数
print_cost - 打印开关
isPlot - 绘图开关
返回:
parameters - 包含更新后参数的字典。
"""
np.random.seed(1)
costs = []
parameters = initialize_parameters_deep(layerdims)
for i in range(0, iterations_N):
AL, caches = L_mode_foward(X, parameters)
cost = compute_cost(Y, AL)
grads = L_mode_backward(AL, Y, caches)
parameters = update_parameters(parameters, grads, learning_rate)
if i % 100 == 0:
costs.append(cost)
if printcost:
print("第" + str(i) + "次迭代:" + str(cost))
if isPlot:
plt.plot(np.squeeze(costs))
plt.ylabel('costs')
plt.xlabel('iterations_N (per tens)')
plt.title("learning_rate" + str(learning_rate))
plt.show()
return parameters
加载数据,开始运行:
print("==============测试two_layer_model==============")
train_set_x_orig , train_set_y , test_set_x_orig , test_set_y , classes = lr_utils.load_dataset()
train_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
train_x = train_x_flatten / 255
train_y = train_set_y
test_x = test_x_flatten / 255
test_y = test_set_y
# #以上为加载数据,若之前已写则省略
layers_dims = [12288, 20, 7, 5, 1] # #5层NN
parameters = L_layer_model(train_x, train_y, layers_dims,
iterations_N=2500, printcost=True, isPlot=True)
print("==============测试two_layer_model==============")
运行结果:
第0次迭代:0.7157315134137129
第100次迭代:0.6747377593469114
第200次迭代:0.6603365433622128
第300次迭代:0.6462887802148751
第400次迭代:0.6298131216927771
第500次迭代:0.606005622926534
第600次迭代:0.5690041263975134
第700次迭代:0.519796535043806
第800次迭代:0.46415716786282296
第900次迭代:0.40842030048298916
第1000次迭代:0.37315499216069054
第1100次迭代:0.3057237457304713
第1200次迭代:0.2681015284774086
第1300次迭代:0.238724748276725
第1400次迭代:0.20632263257914707
第1500次迭代:0.17943886927493452
第1600次迭代:0.15798735818800563
第1700次迭代:0.1424041301227307
第1800次迭代:0.1286516599788137
第1900次迭代:0.11244314998141401
第2000次迭代:0.085056310349417
第2100次迭代:0.057583911985884556
第2200次迭代:0.044567534546855
第2300次迭代:0.03808275166593553
第2400次迭代:0.03441074901837627
执行预测:
pre_train = predict(train_x, train_y, parameters)
pre_test = predict(test_x, test_y, parameters)
运行结果:
准确度:0.9952153110047847
准确度:0.78
对比单隐藏层的0.72,也有一些提升。有兴趣的同学可以试试其它参数,看看有没有更大的提升。
四、可选部分
1. 预测错误分析
找出哪些错误预测
def name_differences(X, Y, AL):
m = Y.shape[1]
Difference_ID = []
diff_N = 0
# diff = AL + Y
# diff_ID = np.asarray(np.where(diff == 1))
for i in range(m):
if AL[0, i] != Y[0, i]:
Difference_ID.append(i)
diff_N = diff_N + 1
for j in range(diff_N):
index = Difference_ID[j]
plt.imshow(X[:, index].reshape(64, 64, 3))
# #这里考虑到以下train_x是处理后的数据(转置过),故用X[:, index],若是原始数据,则反过来
# plt.imshow(train_set_x_orig[Difference_ID[j], :])
plt.show()
return Difference_ID, diff_N
Difference_ID_train, diff_N_train = name_differences(train_x, train_y, AL_train)
Difference_ID_test, diff_N_test = name_differences(test_x, test_y, AL_test)
运行结果:
错误序号:[92] # #训练集
错误数目:1 # #训练集
错误序号:[5, 6, 13, 19, 28, 29, 34, 44, 45, 46, 48] # #测试集
错误数目:11 # #测试集
图片我就不摆了,见参考文献
原因分析(有的原因我也不太懂。。。):
- 猫身体在一个不同的位置
- 猫出现在相似颜色的背景下
- 不同的猫的颜色和品种
- 相机角度
- 图片的亮度
- 比例变化(猫的图像非常大或很小
2. 用自己的图片试试
注意:此段代码在resize图片方面可能涉及Python(scipy库)的版本问题
my_cat.jpg:
代码:
# #读取自己的照片
# #需将my_cat.jpg粘贴到目录下
my_image = "my_cat.jpg"
my_label = np.array([1])
my_label = my_label.reshape(1, 1)
num_px = 64
fname = my_image
# #3.72无scipy.mic,使用以下命令
image = np.array(plt.imread(fname, format=None))
my_image = np.array(Image.fromarray(image).resize((num_px, num_px))).\
reshape(num_px*num_px*3, 1)
# #3.72无scipy.mic,使用以上命令
# #若之前Python版本,可尝试:
# image = np.array(ndimage.imread(fname, flatten=False))
# my_image = scipy.misc.imresize(image, size=(num_px,num_px)).reshape((num_px*num_px*3,1))
plt.imshow(image)
# print(my_image.shape)
my_predicted_image, my_AL = predict(my_image, my_label, parameters)
print("y = " + str(np.squeeze(my_label)))
print("预测:" + str(my_AL[0, 0]))
运行结果:
(12288, 1)
y = 1
预测:1.0
完整代码
最后说一句:以后的程序会越来越复杂,模块越来越多。所以写代码时不建议在.py调试,在Jupyter notebook编写,调试完毕贴到.py即可。
import numpy as np
import matplotlib.pyplot as plt
import dnn_utils
import lr_utils
import scipy.misc
import testCases
from scipy import ndimage
from PIL import Image
np.random.seed(1)
# #初始化
def initialize_paras(layerdims): # #layerdims为NN所有层数,这一点要解释清楚。
np.random.seed(3)
parameters = {}
L = len(layerdims) # #代码中出现的几个L需要解释
for l in range(1, L):
parameters["W" + str(l)] = \
np.random.randn(layerdims[l], layerdims[l - 1]) / np.sqrt(layerdims[l - 1])
parameters["b" + str(l)] = np.zeros((layerdims[l], 1))
return parameters
# #forward——线型部分
def linear_F(A, W, b):
Z = np.dot(W, A) + b
cache = (A, W, b)
return Z, cache
# #forward——激活部分
def activate_F(A_prev, W, b, activation):
if activation == "relu":
Z, linear_cache = linear_F(A_prev, W, b)
A, activation_cache = dnn_utils.relu(Z)
elif activation == "sigmoid":
Z, linear_cache = linear_F(A_prev, W, b)
A, activation_cache = dnn_utils.sigmoid(Z)
cache = (linear_cache, activation_cache)
return A, cache
# #forwar——L_mode
def L_mode_F(X, parameters):
caches = []
A = X
L = len(parameters) // 2
for l in range(1, L):
A_prev = A
A, cache = activate_F(A_prev, parameters["W" + str(l)], parameters["b" + str(l)], "relu")
caches.append(cache)
AL, cache = activate_F(A, parameters["W" + str(L)], parameters["b" + str(L)], "sigmoid")
caches.append(cache)
return AL, caches
# #计算损失
def compute_cost(Y, AL):
m = Y.shape[1]
cost = (-1 / m) * np.sum(np.multiply(np.log(AL), Y) + np.multiply(np.log(1 - AL), 1 - Y))
return cost
# #backward——线型部分
def linear_B(dZ, cache):
m = dZ.shape[1]
(A, W, b) = cache
dW = (1 / m) * np.dot(dZ, A.T)
db = (1 / m) * np.sum(dZ, axis=1, keepdims=True)
dA_prev = np.dot(W.T, dZ)
return dA_prev, dW, db
# #backward——激活部分
def activate_B(dA, cache, activation):
(linear_cache, activate_cache) = cache
if activation == "relu":
dZ = dnn_utils.relu_backward(dA, activate_cache)
dA_prev, dW, db = linear_B(dZ, linear_cache)
elif activation == "sigmoid":
dZ = dnn_utils.sigmoid_backward(dA, activate_cache)
dA_prev, dW, db = linear_B(dZ, linear_cache)
return dA_prev, dW, db
# #backward——L_mode
def L_mode_B(AL, Y, caches):
L = len(caches)
grads = {}
dAL = -np.divide(Y, AL) + np.divide(1 - Y, 1 - AL) # #公式搞清楚
grads["dA" + str(L)] = dAL # #这一行没什么用,只是为对仗工整
current_cache = caches[L - 1]
grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)] = \
activate_B(dAL, current_cache, activation="sigmoid")
for l in reversed(range(L - 1)):
current_cache = caches[l]
dA_prev_temp, dW_temp, db_temp = \
activate_B(grads["dA" + str(l + 1)], current_cache, activation="relu")
grads["dA" + str(l)] = dA_prev_temp
grads["dW" + str(l + 1)] = dW_temp
grads["db" + str(l + 1)] = db_temp
return grads
# #更新参数
def update_paras(parameters, grads, learning_rate):
L = len(parameters) // 2
for l in range(L):
parameters["W" + str(l + 1)] = \
parameters["W" + str(l + 1)] - learning_rate * grads["dW" + str(l + 1)]
parameters["b" + str(l + 1)] = \
parameters["b" + str(l + 1)] - learning_rate * grads["db" + str(l + 1)]
return parameters
# #L_mode_NN
def L_mode_NN(X, Y, layerdims, learning_rate, iterations_N, printcost=False, isPlot=True):
np.random.seed(1)
costs = []
parameters = initialize_paras(layerdims)
for i in range(iterations_N):
AL, caches = L_mode_F(X, parameters)
cost = compute_cost(Y, AL)
grads = L_mode_B(AL, Y, caches)
parameters = update_paras(parameters, grads, learning_rate)
if i % 100 == 0:
costs.append(cost)
if printcost:
print("第" + str(i) + "次迭代:" + str(cost))
if isPlot:
plt.plot(costs)
plt.ylabel('costs')
plt.xlabel('iterations')
plt.title('learning_rate:' + str(learning_rate))
plt.show()
return parameters
# #加载数据
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = lr_utils.load_dataset()
train_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
train_x = train_x_flatten / 255
train_y = train_set_y
test_x = test_x_flatten / 255
test_y = test_set_y
# #开始测试
layerdims = [12288, 20, 7, 5, 1]
parameters = L_mode_NN(train_x, train_y, layerdims, 0.0075, 2500, printcost=True, isPlot=True)
# #预测
def predict(X, Y, parameters): # #注意predict的输入。即,predict需包含L_mode_NN
AL, caches = L_mode_F(X, parameters)
m = Y.shape[1]
for i in range(m):
if AL[0, i] > 0.5:
AL[0, i] = 1
else:
AL[0, i] = 0
allzeros = np.sum(np.multiply(AL, Y))
allones = np.sum(np.multiply(1 - AL, 1 - Y))
prediction = (allzeros + allones) / m
return prediction, AL
prediction_train, AL_train = predict(train_x, train_y, parameters)
prediction_test, AL_test = predict(test_x, test_y, parameters)
print(prediction_train)
print(prediction_test)
# #查看哪些不对
def name_differences(X, Y, AL):
m = Y.shape[1]
Difference_ID = []
diff_N = 0
# diff = AL + Y
# diff_ID = np.asarray(np.where(diff == 1))
for i in range(m):
if AL[0, i] != Y[0, i]:
Difference_ID.append(i)
diff_N = diff_N + 1
for j in range(diff_N):
plt.imshow(X[:, Difference_ID[j]].reshape(64, 64, 3))
# #这里考虑到以下train_x是处理后的数据(转置过),故用X[:, index],若是原始数据,则反过来
# plt.imshow(train_set_x_orig[Difference_ID[j], :])
plt.show()
return Difference_ID, diff_N
Difference_ID_train, diff_N_train = name_differences(train_x, train_y, AL_train)
Difference_ID_test, diff_N_test = name_differences(test_x, test_y, AL_test)
# #读取自己的照片
my_image = "my_cat.jpg"
my_label = np.array([1])
my_label = my_label.reshape(1, 1)
num_px = 64
fname = my_image
image = np.array(plt.imread(fname, format=None))
my_image = np.array(Image.fromarray(image).resize((64, 64))).\
reshape(64*64*3, 1)
plt.imshow(image)
# print(my_image.shape)
my_predicted_image, my_AL = predict(my_image, my_label, parameters)
print("y = " + str(np.squeeze(my_label)))
print("预测:" + str(my_AL[0, 0]))
End.