code:
from planar_utils import plot_decision_boundary
import numpy as np
import h5py
import matplotlib.pyplot as plt
from dnn_utils_v2 import relu_backward
from create_data import load_data
class dnn:
def __init__(self,layer_dims,init_mod='random') -> None:
'''layer_dimes:各层的神经元数量
init_mod:初始化方法:zeros,random,he三种初始化'''
self.WL={}
self.bL={}
self.L=len(layer_dims)-1
# 初始化参数
if init_mod=='random':
for i in range(1,self.L+1):
self.WL['W'+str(i)]=np.random.randn(layer_dims[i], layer_dims[i-1])
self.bL['b'+str(i)]=np.zeros((layer_dims[i],1))
elif init_mod=='zeros':
for i in range(1,self.L+1):
self.WL['W'+str(i)]=np.zeros((layer_dims[i], layer_dims[i-1]))
self.bL['b'+str(i)]=np.zeros((layer_dims[i],1))
else:
for i in range(1,self.L+1):
self.WL['W'+str(i)]=np.random.randn(layer_dims[i], layer_dims[i-1])*np.sqrt(2./layer_dims[i-1])
self.bL['b'+str(i)]=np.zeros((layer_dims[i],1))
self.XL={}
self.AL={}
self.ZL={}
self.dZ={}
self.dW={}
self.db={}
self.dA={}
def input_data(self,X,Y,learning_rate):
self.m=X.shape[1]
self.AL["A0"]=X
self.Y=Y
self.learning_rate=learning_rate
def set_data(self,X):
self.m=X.shape[1]
self.AL["A0"]=X
#下面是前向传播模块,前向传播过程中需要记录Z,A
def linear_activation_forward(self,i,activation):
'''实现一层的正向传播'''
# self.ZL[Zi]和self.AL[Ai]记录第i层的数据
# 存储了计算的Z ,A,W,b,对象自带的有
self.ZL['Z'+str(i)]=np.dot(self.WL['W'+str(i)],self.AL['A'+str(i-1)])+self.bL['b'+str(i)]
if activation=="sigmoid":
self.AL['A'+str(i)]=1/(1+np.exp(-self.ZL['Z'+str(i)]))
elif activation=="relu":
self.AL['A'+str(i)]=np.maximum(0,self.ZL['Z'+str(i)])
def L_model_forward(self):
# 前L-1层使用relu函数激活,最后一层使用sigmoid函数激活
for i in range(1,self.L):
self.linear_activation_forward(i,"relu")
self.linear_activation_forward(self.L,"sigmoid")
# 确定最后的输出是否是二分类所需要的输出
assert(self.AL['A'+str(self.L)].shape==(1,self.m))
# 下面是计算损失函数
def computer_cost(self):
return np.squeeze(-1/self.m*np.sum(self.Y*np.log(self.AL['A'+str(self.L)]+1e-5)+(1-self.Y)*np.log(1-self.AL['A'+str(self.L)]+1e-5)))
# 下面是反向传播模块
def linear_backward(self,i):
'''根据dz[L]计算dw[L],db[L],dA[L-1]'''
self.dW['dW'+str(i)]=1/self.m*np.dot(self.dZ['dZ'+str(i)],self.AL["A"+str(i-1)].T)
self.db['db'+str(i)]=1/self.m*np.sum(self.dZ['dZ'+str(i)],axis=1,keepdims=True)
self.dA['dA'+str(i-1)]=np.dot(self.WL['W'+str(i)].T,self.dZ['dZ'+str(i)])
def L_model_backforward(self):
# 先计算最后一层
# 计算dA
self.dA['dA'+str(self.L)]=-(np.divide(self.Y,self.AL['A'+str(self.L)]+1e-10)-np.divide(1-self.Y,1-self.AL['A'+str(self.L)]+1e-10))
# 计算dz
s=1/(1+np.exp(-self.ZL['Z'+str(self.L)]))
self.dZ['dZ'+str(self.L)]=self.dA['dA'+str(self.L)]*s*(1-s)
# 计算dw,db,dA[L-1]
self.linear_backward(self.L)
for i in reversed(range(self.L)):
if i==0:
break
else:
self.dZ['dZ'+str(i)]=relu_backward(self.dA['dA'+str(i)],self.ZL['Z'+str(i)])
# 计算当前i层的dw,db,和dA[L-1]
self.linear_backward(i)
# 更新参数
def update_wb(self):
for i in range(1,self.L+1):
self.WL['W'+str(i)]=self.WL['W'+str(i)]-self.learning_rate*self.dW['dW'+str(i)]
self.bL['b'+str(i)]=self.bL['b'+str(i)]-self.learning_rate*self.db['db'+str(i)]
def train(self,iterations):
costs=[]
for i in range(iterations):
# 前向传播
self.L_model_forward()
# 计算损失
cost=self.computer_cost()
# 后向传播
self.L_model_backforward()
# 更新参数
self.update_wb()
if i%100==0:
costs.append(cost)
print("第"+str(i)+"次迭代cost:"+str(cost))
return costs
def predict(self,X):
self.set_data(X)
self.L_model_forward()
return self.AL['A'+str(self.L)]>=0.5
#加载数据
x, y = load_data()
train_num=int(0.8*x.shape[1])
# 划分数据集
train_x=x[:,:train_num]
test_x=x[:,train_num:]
train_y=y[:,:train_num]
test_y=y[:,train_num:]
# 4层的神经网络
my_dnn=dnn([2, 10, 5, 1],'random')
# 设置数据
my_dnn.input_data(train_x,train_y,0.01)
# 训练
cost=my_dnn.train(50000)
# 预测
y_predict=my_dnn.predict(x)
print(y_predict.shape)
print(np.sum(np.abs(y_predict-y)))
plt.subplot(1,2,1)
plot_decision_boundary(lambda x: my_dnn.predict(x.T), x, y)
plt.subplot(1,2,2)
plt.plot(cost)
plt.show()
随机初始化:
zeros初始化:
he初始化:
总结:
零初始化:未能打破对称性
随机初始化:打破了对称性,但权重太大也不行
he初始化:挺好的