我觉得其中的难点就是如何计算dw和db的值(即前馈和反馈):
比较容易出bug的地方就是各个矩阵变量的形状,编程过程中一定要保持头脑的清醒,最好把矩阵的形状一一写出来,之后就按照公式写出来就可以了
def propagate(w, b, x, y):
'''
:param w: shape 1, 12288
:param b: shape 1, 12288
:param x: shape 12288, 209
:param y: shape 1, 209
A.shape 1,209
dw.shape 1, 12288
db.shape 1, 12288
:return:
'''
# print('propagate:', w.shape, x.shape, y.shape)
# 实现单次数据更新(前馈和反馈)
# 只有一个输入层和一个输出层
# 前馈
A = Sigmoid(np.dot(w, x) + b)
# 反馈
dz = A - y
dw = (1/n_train) * np.dot(dz, x.T)
db = (1/n_train) * np.sum(dz)
assert (dw.shape == w.shape)
# 字典
Dict = {
'dw': dw,
'db': db
# 'loss': Loss
}
return Dict
附上完整程序:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from lr_utils import load_dataset
# 载入原始数据
train_x, train_y, test_x, test_y, classes = load_dataset()
'''
train_x.shape (209, 64, 64, 3)
train_y.shape (1, 209)
test_x.shape (50, 64, 64, 3)
test_y.shape (1, 50)
'''
# 训练集数目, 测试集数目, 每张图片高和宽的像素数
n_train = train_x.shape[0]
n_test = test_x.shape[0]
num_pix = train_x.shape[1]
'''
n_train.shape 209
n_test.shape 50
num_pix 64
'''
# 训练机和测试集降维并转置
train_x = train_x.reshape(n_train, -1).T
test_x = test_x.reshape(n_test, -1).T
'''
train_x.shape (12288, 209)
test_x.shape (12288, 50)
'''
# 标准化
train_x = train_x / 255
test_x = test_x / 255
# 激活函数
def Sigmoid(z):
return 1 / (1 + np.exp(-z))
def initial(size):
'''
w.shape 1, 12288
b.shape 1, 12288
b.shape 1, 1
:param size:
:return:
'''
# 初始化权重w和偏置b,w.shape = [输出数据的个数, 输入的个数]
w = np.zeros((1, size))
b = 0
return w, b
def propagate(w, b, x, y):
'''
:param w: shape 1, 12288
:param b: shape 1, 12288
:param x: shape 12288, 209
:param y: shape 1, 209
A.shape 1,209
Loss.shape 1,1
dw.shape 1, 12288
db.shape 1, 12288
:return:
'''
# print('propagate:', w.shape, x.shape, y.shape)
# 实现单次数据更新(前馈和反馈)
# 只有一个输入层和一个输出层
# 前馈
# A.size = 1
A = Sigmoid(np.dot(w, x) + b)
# Loss = (- 1 / n_train) * np.sum(y * np.log(A) + (1 - y) * (np.log(1 - A))) # 计算成本,请参考公式3和4。
# 反馈
dz = A - y
dw = (1/n_train) * np.dot(dz, x.T)
db = (1/n_train) * np.sum(dz)
assert (dw.shape == w.shape)
# dw = (1 / n_train) * np.dot(x, (A - y).T) # 请参考视频中的偏导公式。
# db = (1 / n_train) * np.sum(A - y) # 请参考视频中的偏导公式。
# 字典
Dict = {
'dw': dw,
'db': db
# 'loss': Loss
}
return Dict
def optimize(w, b, x, y, n_iter, learningRate):
'''
:param w: shape 1, 12288
:param b: shape 1, 12288
:param x: shape 12288, 209
:param y: shape 1, 209
:param n_iter:
:param learningRate:
:return:
'''
# 迭代优化
for i in range(n_iter):
Dict = propagate(w, b, x, y)
dw = Dict['dw']
db = Dict['db']
w = w - dw * learningRate
b = b - db * learningRate
if i % 500 == 0:
print(i)
# 保存w,b
Dict = {
'w': w,
'b': b
}
return Dict
def predict(w, b, x):
'''
:param w: shape 1, 12288
:param b: shape 1, 1
:param x: shape 12288, 1
A.shape 1, 1
:return:
'''
# 预测
A = Sigmoid(np.dot(w, x) + b)
return A
# print('predict', w.shape, x.shape, A.shape, A)
# Pred = 0
# Pred = A
# return Pred
w, b = initial(num_pix * num_pix * 3)
n_iter, learningRate = [5000, 0.005]
Dict = optimize(w, b, train_x, train_y, n_iter, learningRate)
w = Dict['w']
b = Dict['b']
root1 = './Image/t6.png' # cat
root2 = './Image/t10.png' # dog
root3 = './Image/t9.png' # deer
Input = Image.open(root2)
Input = Input.convert("RGB").resize((64, 64))
InputArray = np.array(Input).reshape(-1, 1)
InputArray = (255 - InputArray) / 255
Output = predict(w, b, InputArray)
print(Output)
输出的结果是一张图片是猫的概率,不太准,可能是数据太少了,209张训练图片