参考资料:大佬的博客
import numpy as np
import matplotlib.pyplot as plt
import h5py
from lr_utils import load_dataset
def sigmoid(z):
"""
:param z: 任何大小的标量或者np数组
:return:sigmoid(z)
"""
return 1.0/(1.0+np.exp(-z))
def initialize(dim):
"""
:param dim: 权重w的维度
:return: (w,b)
"""
w = np.zeros((dim, 1))
# w = np.random.random((dim, 1))*.001
b = 0
assert w.shape == (dim, 1)
assert isinstance(b, (int, float))
return (w, b)
def propagate(X, Y, w, b):
"""
:param X: 输入样本的属性,维度为(px*py*3, nums),其中px,py,nums分别表示输入图片的宽高和数量
:param Y: 输入样本的标签,正类为1,负类为0,维度为(1, nums)
:param w: 模型的权重,维度为(px*py*3, 1)
:param b: 模型的偏置,维度为(1,)
:return:
--cost: 逻辑回归的对数似然成本
--dw : w在更新过程中的梯度,维度为(px*py*3, 1)
--db : b在更新过程中的梯度,维度为(1,)
"""
m = X.shape[1]
Z = np.dot(w.T, X) + b
A = sigmoid(Z)
dz = A - Y
cost = (-1.0/m)*np.sum(Y*np.log(A)+(1-Y)*np.log(1-A))
dw = (1.0/m)*np.dot(X, dz.T)
db = (1.0/m)*np.sum(dz)
assert dw.shape == w.shape
assert isinstance(db, float)
cost = np.squeeze(cost)
assert cost.shape == ()
grids = {"dw": dw,
"db": db}
return cost, grids
def optimize(X, Y, w, b, iterations, lr, print_cost=True):
"""
此函数通过运行梯度下降算法来优化w和b
:param X: 输入样本的属性,维度为(px*py*3, nums),其中px,py,nums分别表示输入图片的宽高和数量
:param Y: 输入样本的标签,正类为1,负类为0,维度为(1, nums)
:param w: 模型的权重,维度为(px*py*3, 1)
:param b: 模型的偏置,维度为(1,)
:param iterations: 迭代次数
:param lr: 学习率
:param print_cost: 是否每个100个迭代打印cost
:return:
--params: 包含权重w和偏置b的字典
--cost : 优化期间计算的cost列表,用于绘制loss下降曲线
--grid : 包含权重w和偏差b在更新过程中的下降梯度的字典
"""
costs = []
for i in range(iterations):
cost, grids = propagate(X, Y, w, b)
dw, db = grids["dw"], grids["db"]
w, b = w-lr*dw, b-lr*db
if i%100==0:
costs.append(cost)
if print_cost:
print(str(i) + "th cost is: ", cost)
params = {"w": w,
"b": b}
grids = {"dw": dw,
"db": db}
return (params, grids, costs)
def predict(w, b, X):
"""
使用logistic回归对样本做出预测
:param w: 模型的权重,维度为(px*py*3, 1)
:param b: 模型的偏置,维度为(1,)
:param X: 输入样本的属性,维度为(px*py*3, nums),其中px,py,nums分别表示输入图片的宽高和数量
:return:
--Y_prediction: 包含X中所有图片的预测结果,维度为(1, nums)
"""
num_samples = X.shape[1]
Y_predict = np.zeros((1, num_samples))
w = w.reshape(X.shape[0], 1)
Z = np.dot(w.T, X) + b
A = sigmoid(Z)
for i in range(num_samples):
Y_predict[0, i] = 0 if A[0, i]<=0.5 else 1
assert Y_predict.shape == (1, num_samples)
return Y_predict
def model(X_train, Y_train, X_test, Y_test, iterations=5000, lr=0.005, print_cost=True):
"""
构建logistic回归模型
:param X_train: 训练集中的样本属性,为numpy数组,维度为(px*py*3, m_train)
:param Y_train: 训练集的标签文件,为numpy数组,维度为(1, m_train)
:param X_test: 测试集中的样本属性,为numpy数组,维度为(px*py*3, m_test)
:param Y_test: 测试集的标签文件,为numpy数组,维度为(1, m_test)
:param iterations: 模型的迭代次数
:param lr: 学习率
:param print_cost: 是否每个100代打印模型的损失
:return:
--d: 包含模型信息的一个字典
"""
w, b = initialize(X_train.shape[0])
params, grids, costs = optimize(X_train, Y_train, w, b, iterations, lr, print_cost)
w, b = params['w'], params['b']
Y_train_prediction = predict(w, b, X_train)
Y_test_prediction = predict(w, b, X_test)
Acc_train = (100 - np.mean(np.abs(Y_train - Y_train_prediction))) / 100
Acc_test = (100 - np.mean(np.abs(Y_test - Y_test_prediction))) / 100
print("Acc_trian: ", Acc_train)
print("Acc_test: ", Acc_test)
d = {
"costs": costs,
"Y_train_prediction": Y_train_prediction,
"Y_test_prediction": Y_test_prediction,
"Acc_train": Acc_train,
"Acc_test": Acc_test,
"w": w,
"b": b,
"lr": lr,
"iterations": iterations
}
return d
def plt_loss_figure(d):
costs = np.squeeze(d["costs"])
plt.plot(costs, label="lr="+str(d["lr"]))
plt.ylabel("cost")
plt.xlabel("iteration(per hundreds)")
plt.title("lr="+str(d["lr"]))
plt.show()
if __name__=="__main__":
# w, b, X, Y = np.array([[1], [2]]), 2, np.array([[1, 2], [3, 4]]), np.array([[1, 0]])
train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes = load_dataset()
train_set_x_orig = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T / 255
test_set_x_orig = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T / 255
lrs = [0.01, 0.05, 0.005, 0.0005]
ds = []
for lr in lrs:
d = model(train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, iterations=2500, lr=lr, print_cost=True)
ds.append(d)
for i in range(len(ds)):
plt.plot(np.squeeze(ds[i]["costs"]), label="lr=" + str(ds[i]["lr"]))
plt.ylabel("cost")
plt.xlabel("iteration(per hundreds)")
legend = plt.legend(loc="upper right", shadow=True)
frame = legend.get_frame()
frame.set_facecolor("0.90")
plt.show()
问题1:代码编写完成后训练cost一直为nan
在该模型中,权重的初始化并不是造成这个问题的原因。应当注意对于输入的数据X,是否进行归一化或标准化
问题二:权重的初始化
可直接将权重和偏置全部初始化为0,也可用随机数进行初始化如:
w=np.random.random((dim, 1))
b = np.random.random()
也可使用kaiming初始化,即:
w=np.zeros((dim, 1))
nn.init.kaiming_uniform_(w, mode='fan_in', nonlinearity='relu') # 均匀分布的初始化
nn.init.kaiming_normal_(w, mode='fan_in', nonlinearity='relu') # 正态分布的初始化