训练集和测试集:GitHub上的Desktop.rar中GitHub链接
一.公式:
前
向
传
播
:
z
1
=
w
1
∙
X
+
b
1
⇒
a
1
=
f
(
z
)
⇒
z
2
=
w
2
∙
a
1
+
b
2
⇒
a
2
=
δ
(
z
)
⇒
前向传播:z1 = w1\bullet X + b1\Rightarrow a1 = f(z)\Rightarrow z2 = w2\bullet a1 + b2\Rightarrow a2 = \delta(z)\Rightarrow
前向传播:z1=w1∙X+b1⇒a1=f(z)⇒z2=w2∙a1+b2⇒a2=δ(z)⇒
⇒
J
=
−
y
∗
l
o
g
(
a
2
)
+
(
1
−
y
)
∗
l
o
g
(
1
−
a
2
)
\Rightarrow J = - y * log(a2) + (1 - y) * log(1 - a2)
⇒J=−y∗log(a2)+(1−y)∗log(1−a2)
反
向
传
播
:
d
w
2
=
d
z
2
∙
a
1
,
d
b
2
=
d
z
2
⇐
d
z
2
=
d
a
2
∗
δ
′
(
z
2
)
⇐
d
a
2
=
−
y
a
2
+
1
−
y
1
−
a
2
反向传播:dw2=dz2\bullet a1,db2=dz2\Leftarrow dz2=da2*\delta '(z2)\Leftarrow da2 = -\frac{y}{a2}+\frac{1-y}{1-a2}
反向传播:dw2=dz2∙a1,db2=dz2⇐dz2=da2∗δ′(z2)⇐da2=−a2y+1−a21−y
d
w
1
=
d
z
1
∙
X
,
d
b
1
=
d
z
1
⇐
d
z
1
=
d
a
1
∗
f
′
(
z
1
)
⇐
d
a
1
=
w
2
∙
d
z
2
⇐
dw1=dz1\bullet X,db1 = dz1\Leftarrow dz1=da1*f'(z1)\Leftarrow da1=w2\bullet dz2\Leftarrow
dw1=dz1∙X,db1=dz1⇐dz1=da1∗f′(z1)⇐da1=w2∙dz2⇐
更
新
参
数
:
w
2
=
w
2
−
l
e
a
r
n
i
n
g
∗
d
w
2
,
b
2
=
b
2
−
l
e
a
r
n
i
n
g
∗
d
b
2
更新参数:w2 = w2-learning*dw2,b2=b2-learning*db2
更新参数:w2=w2−learning∗dw2,b2=b2−learning∗db2
更
新
参
数
:
w
1
=
w
1
−
l
e
a
r
n
i
n
g
∗
d
w
1
,
b
1
=
b
1
−
l
e
a
r
n
i
n
g
∗
d
b
1
更新参数:w1 = w1-learning*dw1,b1=b1-learning*db1
更新参数:w1=w1−learning∗dw1,b1=b1−learning∗db1
双层神经网络是重复2次的logistic回归,其反向传播公式的推导过程(数学符号太难打了,就手写了下,大家见谅):
二.核对矩阵维数:
n:表示样本个数
m:表示每一个样本特征个数
t:表示隐藏层节点个数
X:(m,n)
Y:(1,n)
w1:(t,m)
b1:(t,1)#Python中可广播
w2:(1,t)
b2:(1,1)#Python中可广播
z1:(t,n)
z2:(1,n)
三.代码
运行结果:
完整代码:
import numpy as np
import matplotlib.pyplot as plt
import h5py
def load_dataset():
train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # 保存的是训练集里面的图像数据(本训练集有209张64x64的图像)。
train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # 保存的是训练集的图像对应的分类值(【0 | 1】,0表示不是猫,1表示是猫)。
test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # 保存的是测试集里面的图像数据(本训练集有50张64x64的图像)。
test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # 保存的是测试集的图像对应的分类值(【0 | 1】,0表示不是猫,1表示是猫)。
classes = np.array(test_dataset["list_classes"][:]) # 保存的是以bytes类型保存的两个字符串数据,数据为:[b’non-cat’ b’cat’]。
train_set_y = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
test_set_y = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
print("训练集_图片的维数 : " + str(train_set_x_orig.shape))
print("训练集_标签的维数 : " + str(train_set_y.shape))
print("测试集_图片的维数: " + str(test_set_x_orig.shape))
print("测试集_标签的维数: " + str(test_set_y.shape))
print()
train_set_x = train_set_x_orig.reshape((train_set_x_orig.shape[0], -1)).T / 255 # 降维,化为区间(0,1)内的数
test_set_x = test_set_x_orig.reshape((test_set_x_orig.shape[0], -1)).T / 255 # 降维,化为区间(0,1)内的数
return train_set_x, train_set_y, test_set_x, test_set_y, classes
def relu(z):#relu函数
return np.maximum(0,z)
def relu_1(z):#relu函数的导数
return np.maximum(0, z/np.abs(z))
def tanh(z): # 用tanh函数作为激活函数
e1 = np.exp(z)
e2 = np.exp(-z)
return (e1 - e2) / (e1 + e2)
def tanh_1(a): # tanh函数的导数
return 1 - a ** 2
def sigmoid(z): # sigmoid函数作为激活函数
return 1 / (1 + np.exp(-z))
def sigmoid_1(a):
return a*(1-a)
def rand(n, m, dim):
w1 = np.random.randn(dim, m)*0.001
w2 = np.random.randn(1, dim)*0.001 # 随机从产生符合正态分布的数
b1 = np.zeros((dim, 1), dtype='float') # 初始化为0
b2 = np.zeros((1, 1), dtype='float') # python中的广播
w = {
"w1": w1,
"w2": w2
}
b = {
"b1": b1,
"b2": b2
}
return w, b
def backward(X, Y, w1, w2, b1, b2, learn):
m = X.shape[1] # 样本个数
z1 = np.dot(w1, X) + b1
a1 = relu(z1)
z2 = np.dot(w2, a1) + b2
a2 = sigmoid(z2)
L = -1 / m * np.sum(Y * np.log(a2) + (1 - Y) * np.log(1 - a2))
L = np.squeeze(L) # 去除多余的维度
da2 = - (np.divide(Y, a2) - np.divide(1 - Y, 1 - a2))
dz2 = da2 * sigmoid_1(a2)
dw2 = 1 / m * np.dot(dz2, a1.T)
db2 = 1 / m * np.sum(dz2, axis=1, keepdims=True)
da1 = np.dot(w2.T, dz2)
dz1 = da1 * relu_1(z1)
dw1 = 1 / m * np.dot(dz1, X.T)
db1 = 1 / m * np.sum(dz1, axis=1, keepdims=True)
w1 = w1 - learn * dw1
w2 = w2 - learn * dw2
b1 = b1 - learn * db1
b2 = b2 - learn * db2
w = {
"w1": w1,
"w2": w2
}
b = {
"b1": b1,
"b2": b2
}
return w, b, L
def test(w1,w2,b1 ,b2, X): # 通过优化后的参数w,b预测出 y的值
m = X.shape[1] # 样本个数
z1 = np.dot(w1, X) + b1
a1 = relu(z1)
z2 = np.dot(w2, a1) + b2
a2 = sigmoid(z2)
y = np.zeros(shape= (1,m),dtype = float)
for i in range(a2.shape[1]):
y[0, i] = 1 if a2[0, i] > 0.5 else 0
return y
def trainback(w1,w2, b1,b2, X, Y): # 测试
y = test(w1,w2,b1 ,b2, X)
lop = 100 * (1 - np.mean(np.abs(y - Y)))
print("训练集准确性:{0}%".format(lop))
return 0
def testback(w1,w2, b1,b2, X, Y): # 测试
y = test(w1,w2,b1 ,b2, X)
lop = 100 * (1 - np.mean(np.abs(y - Y)))
print("测试集准确性:{0}%".format(lop))
return 0
if __name__ == "__main__":
np.random.seed(1)
learning_rate = 0.0075
train_set_x, train_set_y, test_set_x, test_set_y, classes = load_dataset()
n, m, dim, i = train_set_x.shape[1], train_set_x.shape[0], 4, 0#n表示样本个数,m表示特征个数,dim表示节点个数
w, b = rand(n, m, dim)
w1, w2 = w["w1"], w["w2"]
b1, b2 = b["b1"], b["b2"]
L = []
for i in range(3000):
w, b, l = backward(train_set_x, train_set_y, w1, w2, b1, b2, learning_rate)
w1, w2 = w["w1"], w["w2"]
b1, b2 = b["b1"], b["b2"]
if i % 500 == 0:
L.append(l)
print("损失函数Loss:", l)
trainback(w1,w2 ,b1,b2, train_set_x, train_set_y)
testback(w1, w2, b1, b2, test_set_x, test_set_y)
# 绘制图
#plt.scatter(X[0, :], X[1, :], c=np.squeeze(Y), s=40, cmap=plt.cm.Spectral) # 绘制散点图
plt.plot(L)
plt.ylabel('Loss')
plt.xlabel('Number of training rounds')
plt.title("learning_rate =" + str(learning_rate))
plt.show()