纯手搓多层神经网络,强烈建议大家也自己手动写一个。看完第二部分课程现在回头看,踩的坑都在第二部分讲了,加深了理解。
又是卡死的一天,本来以为只是很简单的按之前的差不多搭个多层神经网络。结果踩了无数的坑,从早上一直搞到现在终于算是完了。
总结深坑:
1.多层神经网络初始权重一定不要像之前一样乘0.01。说多了都是泪,因为这个我构建好多层神经网络后,loss一直降不下去害我一层层检查调试。
2.根据loss变化情况调整迭代次数和学习率,这是个漫长的过程。
累了,直接上代码了:
先导包和初始化:
mport numpy as np
import matplotlib.pyplot as plt
import lr_utils as lr
# 搭建多层神经网络
# 初始化
def init(x):
"""
要构建如下条件的深层神经网络
4层:4个隐藏层,1个输出层
input->(hidden1:100个节点,激活函数relu)->(hidden2:50个节点,激活函数relu)->
(hidden3:10个节点,激活函数relu)——>(output, 激活函数sigma)
"""
first_num = 20
second_num = 7
third_num = 3
np.random.seed(3)
w1 = np.random.randn(first_num, x.shape[0]) * np.sqrt(2/x.shape[0])
b1 = np.zeros((first_num, 1))
w2 = np.random.randn(second_num, first_num) * np.sqrt(2/first_num)
b2 = np.zeros((second_num, 1))
w3 = np.random.randn(third_num, second_num) * np.sqrt(2/second_num)
b3 = np.zeros((third_num, 1))
w4 = np.random.randn(1, third_num)* np.sqrt(2/third_num)
b4 = np.zeros((1, 1))
ini_param = {
"w1": w1,
"b1": b1,
"w2": w2,
"b2": b2,
"w3": w3,
"b3": b3,
"w4": w4,
"b4": b4
}
return ini_param
构建前向后向
# 构建前向
def cal_z(w, a, b): # a维度:上层节点数*例子数 w维度:该层节点数*上层节点数
return np.dot(w, a) + b
def cal_sigma(z): # z维度:该层节点数*例子数
return 1 / (1 + np.exp(-z))
def cal_tan_h(z): # z1维度hidden_num*x.shape[1]
return np.tanh(z)
def cal_relu(z): # z维度:该层节点数*例子数
return np.maximum(0, z)
def forward_f(x, p):
z1 = cal_z(p["w1"], x, p["b1"])
a1 = cal_relu(z1)
z2 = cal_z(p["w2"], a1, p["b2"])
a2 = cal_relu(z2)
z3 = cal_z(p["w3"], a2, p["b3"])
a3 = cal_relu(z3)
z4 = cal_z(p["w4"], a3, p["b4"])
a4 = cal_sigma(z4)
forward_param = {
"z1": z1,
"a1": a1,
"z2": z2,
"a2": a2,
"z3": z3,
"a3": a3,
"z4": z4,
"a4": a4
}
return forward_param
# 计算损失函数
def cost_f(a, y): # a维度: 1*例子数 y维度:1*例子数
m = y.shape[1]
return -np.sum(y * np.log(a) + (1 - y) * np.log(1 - a)) / m
# 后向
def cal_dz_last(a, y): # a维度:第i层节点数*例子数 y维度:1*例子数
return a - y
def cal_dw_db(dz, a, m): # dzi维度:第i层节点数*例子数 a维度:第层节点数*例子数
return np.dot(dz, a.T)/m, np.sum(dz, axis=1, keepdims=True)/m
def cal_da(dz, w):
return np.dot(w.T, dz)
def cal_dtanh(da, a): # dai维度:第i层节点数*例子数 ai维度:第i层节点数*例子数
return da * (1 - (a * a))
def cal_drelu(da, a): # dai维度:第i层节点数*例子数 ai维度:第i层节点数*例子数
return da * np.where(a < 0, 0, 1)
def back_f(p, f_p, x, y):
dz4 = cal_dz_last(f_p["a4"], y)
dw4, db4 = cal_dw_db(dz4, f_p["a3"], y.shape[1])
da3 = cal_da(dz4, p["w4"])
dz3 = cal_drelu(da3, f_p["a3"])
dw3, db3 = cal_dw_db(dz3, f_p["a2"], y.shape[1])
da2 = cal_da(dz3, p["w3"])
dz2 = cal_drelu(da2, f_p["a2"])
dw2, db2 = cal_dw_db(dz2, f_p["a1"], y.shape[1])
da1 = cal_da(dz2, p["w2"])
dz1 = cal_drelu(da1, f_p["a1"])
dw1, db1 = cal_dw_db(dz1, x, y.shape[1])
back_param = {
"dw4": dw4,
"db4": db4,
"dw3": dw3,
"db3": db3,
"dw2": dw2,
"db2": db2,
"dw1": dw1,
"db1": db1
}
return back_param
更新参数,迭代建模
# 更新参数
def update_p(p, b_p, learning_rate):
upd_p = {
"w1": p["w1"] - learning_rate * b_p["dw1"],
"b1": p["b1"] - learning_rate * b_p["db1"],
"w2": p["w2"] - learning_rate * b_p["dw2"],
"b2": p["b2"] - learning_rate * b_p["db2"],
"w3": p["w3"] - learning_rate * b_p["dw3"],
"b3": p["b3"] - learning_rate * b_p["db3"],
"w4": p["w4"] - learning_rate * b_p["dw4"],
"b4": p["b4"] - learning_rate * b_p["db4"]
}
return upd_p
# 建模
def model(x, y, learning_rate, loop_num):
p = init(x)
cost = []
for i in range(loop_num):
f_p = forward_f(x, p)
b_p = back_f(p, f_p, x, y)
p = update_p(p, b_p, learning_rate)
if i % 100 == 0:
cost.append(cost_f(f_p["a4"], y))
#print("------------------------")
#print(b_p["dz2"])
return p, cost
读取数据,调参,验证
# 开始读取数据集,跟第二周作业一样的数据集
train_x_raw, train_y, test_x_raw, test_y, cs = lr.load_dataset()
'''数据说明:
输入(a0):维度(N_x, 例子数)
Y:标签维度(1,例子数)
learn_rate:学习率
loop_num:循环次数
'''
# 降维转置标准化
def red_dim(arr):
return arr.reshape(arr.shape[0], -1).T/255
train_x_raw = red_dim(train_x_raw)
test_x_raw = red_dim(test_x_raw)
w_final, cost = model(train_x_raw,train_y,0.001,3000)
test_f_p = forward_f(test_x_raw,w_final)
train_f_p = forward_f(train_x_raw,w_final)
print("训练集准确度:",100*(1-np.sum(np.abs(np.round(train_f_p["a4"])-train_y))/train_y.shape[1]),'%')
print("测试集准确度:",100*(1-np.sum(np.abs(np.round(test_f_p["a4"])-test_y))/test_y.shape[1]),'%')
plt.plot(cost)
plt.show()
训练集准确度: 97.60765550239235 %
测试集准确度: 76.0 %