建立神经网络的主要步骤是:
-
定义模型结构(例如输入特征的数量)
-
初始化模型的参数
-
循环:
3.1 计算当前损失(正向传播)
3.2 计算当前梯度(反向传播)
3.3 更新参数(梯度下降)
①图片降维+归一化#
train_set_x_orig , train_set_y , test_set_x_orig , test_set_y , classes = load_dataset()
#图片降维+归一化
#X_flatten = X.reshape(X.shape [0],-1).T #X.T是X的转置
#将训练集的维度降低并转置。
train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0],-1).T
#将测试集的维度降低并转置。
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
train_set_x = train_set_x_flatten / 255
test_set_x = test_set_x_flatten / 255
#2.定义sigmoid函数
#2.定义sigmoid函数
def sigmoid(z):
s=1/(1+np.exp(-z))
return s
#初始化w,b
#3.初始化w,b
def initialize_with_zeros(dim):
w=np.zeros(shape=(dim,1))
b=0
# 使用断言来确保我要的数据是正确的
assert (w.shape == (dim, 1)) # w的维度是(dim,1)
assert (isinstance(b, float) or isinstance(b, int)) # b的类型是float或者是int
return (w, b)
4计算成本函数与梯度L,dw,db
def propagate(w,b,X,Y):
m=X.shape[1]#m是图片的数量
#正向传播
Z=np.dot(w.T,X)+b
A=sigmoid(Z)
L=Y*np.log(A)+(1-Y)*np.log(1-A)
cost=(-1/m)*np.sum(L)
#反向传播
dw = (1 / m) * np.dot(X, (A - Y).T) # 请参考视频中的偏导公式。
db = (1 / m) * np.sum(A - Y) # 请参考视频中的偏导公式。
# 使用断言确保我的数据是正确的
assert (dw.shape == w.shape)
assert (db.dtype == float)
cost = np.squeeze(cost)
assert (cost.shape == ())
grads = {
"dw": dw,
"db": db
}
return (grads, cost)
#5更新参数w,b
def optimize(w,b,X,Y,num_iterations,learning_rate,print_cost=False):
""" num_iterations - #优化的迭代次数
learning_rate - #梯度下降的学习率
print_cost - 每100步打印一次损失值
返回:
params - 包含权重w和偏差b的字典
grads - #包含权重和偏差相对于成本函数的梯度的字典dw,db
成本 - 优化期间计算的所有成本列表,将用于绘制学习曲线。
"""
cost=[]
for i in range(num_iterations):
grads,cost=propagate(w,b,X,Y)
dw=grads["dw"]
db=grads["db"]
w=w-learning_rate *dw
b=b-learning_rate*db
if i%100==0:
cost.append(cost)
if (print_cost) and (i % 100 == 0):
print("迭代的次数: %i , 误差值: %f" % (i, cost))
params={
"w":w,
"b":b
}
grads = {
"dw": dw,
"db": db}
return (params, grads, cost)
#6预测
#6预测1/0
def predit(w,b,X):
m=X.shape[1]#m是图片的数量
Y_prediction=np.zeros((1,m))#创建了一个全零数组 Y_prediction
w=w.reshape(X.shape[0],1)#确保权重向量与输入数据的维度匹配
#计算概率0/1
A=sigmoid(np.dot(w.T,X)+b)
for i in range(A.shape[1]):
Y_prediction[0,i]=1 if A[0,i]>0.5 else 0
# 使用断言
assert (Y_prediction.shape == (1, m))
return Y_prediction
#7model
def model(X_train,Y_train,X_test,Y_test,num_iterations=2000,learning_rate=0.5,print_cost=False):
w,b=initialize_with_zeros(X_train.shape[0])
parameters, grads, cost = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)
# 从字典“参数”中检索参数w和b
w, b = parameters["w"], parameters["b"]
# 预测测试/训练集的例子
Y_prediction_test = predit(w, b, X_test)
Y_prediction_train = predit(w, b, X_train)
print("训练集准确性",format(100-np.mean(np.abs(Y_prediction_train-Y_train))*100))
print("测试集集准确性", format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))
d={
"cost":cost,
"Y_prediction_test":Y_prediction_test,
"Y_PREDICTion_train":Y_prediction_train,
"w":w,
"b":b,
"learning_rate":learning_rate,
"num_iterations":num_iterations
}
return d
出现误差值为:nan
发现学习率写成了0.5,改成0.005即可解决问题。