本文参考GitHub上作者【Kulbear】的文章,链接如下:
https://github.com/Kulbear/deep-learning-coursera/blob/master/Neural%20Networks%20and%20Deep%20Learning/Logistic%20Regression%20with%20a%20Neural%20Network%20mindset.ipynb
相关资料也可以从何宽大佬的博客中下载,数据以及py包
本文适用于python0基础,但拥有一些其他编程经验,同时理解吴恩达深度学习视频中数学逻辑的小伙伴。
本人是一位土木工程专业的研究生,但传统土木越来越不好做了,就想着学科交叉,通过深度学习对建筑结构施工中产生的大量数据做处理或者挖掘出更有用的信息,估计现在看这篇文章的有很多做跨专业做学科交叉的小伙伴(笑)。原来想用matlab,但很多同学都说python好用,因为社区更活跃而且有很多好用的包,所以我就用python了。大家可能已经用其他大佬的代码跑过或者自己手打过一遍,用手打的同学会不会发现怎么我编出来的代码有一堆的bug?所以这篇文章的目的就是从另一个角度对【Kulbear】大佬的代码进行解读,以方便大家更好理解python。
1.数据标准化
数据标准化这部分非常简单易懂,跳了
import numpy as np
import matplotlib as plt
import h5py
from PIL import Image
from scipy import ndimage
from lr_utils import load_dataset
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()
# 数据导入后需要标准化处理变成2维矩阵,即209*64*64*3变为12288*209
train_set_x_s = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_set_x_s = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
train_set_x = train_set_x_s / 255
test_set_x = test_set_x_s / 255 # 数据标准化完成
2.函数结构重排列
在【Kulbear】大佬的文章里面,数据标准化之后,是各个函数组件的代码,比如激活函数、初始化函数、优化函数等,最后整合进入model函数中,分总结构逻辑鲜明,但最后的代码中的函数参数
d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_i = 3000, learning_rate = 0.005, print_cost = True)
与数据标准化导入的数据
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()
相差的代码行数有点大,导致编写的过程中,时常参数错乱。
因此给大家分享一个总分的函数结构
2.1 model函数
先设计集成后的model函数
def model(X_train, Y_train, X_test, Y_test, num_i, learning_rate, print_cost):#做一次神经网络,需要哪些数据?训练集、测试集、迭代次数、学习率
w,b=w_and_b(X_train.shape[0])#顺序操作,已知数据集X(训练、测试)条件下,需要通过初始化函数w_and_b给出初始w、b
params, grads, costs=optimize(w,b,X_train,Y_train,num_i,learning_rate,print_cost)#顺序操作,已知X,w,b,通过正向传播、反向传播,迭代优化得到对应cost最小时的w,b,与最小cost存至字典内
w=params["w"]
b=params["b"]#从字典中读取对应cost最小时的w,b
Y_pre_test=predict(w,b,X_test)#得到预测的Y^
Y_pre_train=predict(w,b,X_train)#得到预测的Y^
print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_pre_train - Y_train)) * 100))#*测试正确率*
print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_pre_test - Y_test)) * 100))#测试正确率
d={"costs": costs,
"Y_prediction_test": Y_pre_test,
"Y_prediction_train" : Y_pre_train,
"w" : w,
"b" : b,
"learning_rate" : learning_rate,
"num_iterations": num_i}#导出
return d
2.2 w_and_b函数,wb初始化函数
按照2.1中model函数的设计顺序来
def w_and_b(dim):#dim对应X_train.shape[0]
w = np.zeros(shape=(dim, 1))#训练集X_train的维度为12288*209,为使w.T的维度对应,w的维度应为12288*1,12288为X_train.shape[0]
b = 0
return w, b
2.3 optimize函数,优化函数
按照2.1中model函数的设计顺序来
def optimize(w, b, X, Y, num_i, learning_rate, print_cost=False):#优化需要6个参数,分别对应model函数内的w,b,X_train,Y_train,num_i,learning_rate,其中w,b由w_and_b函数初始化得到,训练集已知,通过设置迭代次数和学习率完成优化,以下为具体实现过程
costs = []#定义一个cost函数汇总矩阵为空矩阵
for i in range(num_i):#循环迭代,num_i为迭代次数
grads, cost = propagate(w, b, X, Y)#优化函数需要传播函数,传播函数需要每次传播后的w,b、训练集以及dw、db,可给出每次下降梯度
dw = grads["dw"]
db = grads["db"]
w = w - learning_rate*dw
b = b - learning_rate*db
if i % 100 == 0:
costs.append(cost)
if print_cost and i % 100 == 0:
print("Cost after iteration %i: %f" % (i, cost))
params = {"w": w,
"b": b}
grads = {"dw": dw,
"db": db}#给出最终的w,b,dw,db
return params, grads, costs
2.3.1 propagate函数,传播函数
按照2.3优化函数的顺序来
def propagate(w, b, X, Y):#传播函数需要4个参数,每次迭代更新后的w,b以及训练集
m = X.shape[1]#209
# 正向传播
A = sigmoid(np.dot(w.T, X) + b)#传播函数需要激活函数,激活函数需要每次更新后的w,b,所以在传播函数内运行,而传播函数在迭代优化函数内运行
cost = (-1 / m) * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))
# 反向传播
dw = (1 / m) * np.dot(X, (A - Y).T)
db = (1 / m) * np.sum(A - Y)
# 设置断言函数
assert (dw.shape == w.shape)
assert (db.dtype == float)
cost = np.squeeze(cost)
grads = {"dw": dw,
"db": db}
return grads, cost
2.3.2 sigmoid函数,激活函数
跳了
def sigmoid(z):
s = 1 / (1 + np.exp(-z))
return s
2.4 predict函数,预测函数
按照2.1中model函数的设计顺序来
def predict(w,b,X):#预测函数需要优化后的w,b与训练集X
m=X.shape[1]#209
Y_pre=np.zeros((1,m))#Y^维度为(1,209)
w=w.reshape(X.shape[0],1)#w维度为(12288,1)
A=sigmoid(np.dot(w.T,X)+b)
for i in range(A.shape[1]):
Y_pre[0,i]=1 if A[0,i]>0.5 else 0
assert (Y_pre.shape==(1,m))
return Y_pre
总代码
import numpy as np
import matplotlib as plt
import h5py
from PIL import Image
from scipy import ndimage
from lr_utils import load_dataset
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()
# 数据导入后需要标准化处理变成2维矩阵
train_set_x_s = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_set_x_s = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
train_set_x = train_set_x_s / 255
test_set_x = test_set_x_s / 255 # 数据标准化完成
def model(X_train, Y_train, X_test, Y_test, num_i, learning_rate, print_cost):
w,b=w_and_b(X_train.shape[0])
params, grads, costs=optimize(w,b,X_train,Y_train,num_i,learning_rate,print_cost)
w=params["w"]
b=params["b"]
Y_pre_test=predict(w,b,X_test)
Y_pre_train=predict(w,b,X_train)
print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_pre_train - Y_train)) * 100))
print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_pre_test - Y_test)) * 100))
d={"costs": costs,
"Y_prediction_test": Y_pre_test,
"Y_prediction_train" : Y_pre_train,
"w" : w,
"b" : b,
"learning_rate" : learning_rate,
"num_iterations": num_i}
return d
def w_and_b(dim):
w = np.zeros(shape=(dim, 1))
b = 0
return w, b
def optimize(w, b, X, Y, num_i, learning_rate, print_cost=False):
costs = []
for i in range(num_i):
grads, cost = propagate(w, b, X, Y)
dw = grads["dw"]
db = grads["db"]
w = w - learning_rate*dw
b = b - learning_rate*db
if i % 100 == 0:
costs.append(cost)
if print_cost and i % 100 == 0:
print("Cost after iteration %i: %f" % (i, cost))
params = {"w": w,
"b": b}
grads = {"dw": dw,
"db": db}
return params, grads, costs
def propagate(w, b, X, Y):
m = X.shape[1]
# 正向传播
A = sigmoid(np.dot(w.T, X) + b)
cost = (-1 / m) * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))
# 反向传播
dw = (1 / m) * np.dot(X, (A - Y).T)
db = (1 / m) * np.sum(A - Y)
# 设置断言函数
assert (dw.shape == w.shape)
assert (db.dtype == float)
cost = np.squeeze(cost)
grads = {"dw": dw,
"db": db}
return grads, cost
def sigmoid(z):
s = 1 / (1 + np.exp(-z))
return s
def predict(w,b,X):
m=X.shape[1]
Y_pre=np.zeros((1,m))
w=w.reshape(X.shape[0],1)
A=sigmoid(np.dot(w.T,X)+b)
for i in range(A.shape[1]):
Y_pre[0,i]=1 if A[0,i]>0.5 else 0
assert (Y_pre.shape==(1,m))
return Y_pre
d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_i = 3000, learning_rate = 0.005, print_cost = True)