code:
from dnn_utils_v2 import *
import numpy as np
import h5py
import matplotlib.pyplot as plt
from dnn_utils_v2 import relu_backward
class dnn:
def __init__(self,layer_dims,optimezer="gd") -> None:
'''layer_dimes:各层的神经元数量
init_mod:初始化方法:zeros,random,he三种初始化'''
self.WL={}
self.bL={}
self.L=len(layer_dims)-1
# 初始化参数
np.random.seed(3)
for i in range(1,self.L+1):
self.WL['W'+str(i)]=np.random.randn(layer_dims[i], layer_dims[i-1]) / np.sqrt(layer_dims[i-1])
self.bL['b'+str(i)]=np.zeros((layer_dims[i],1))
self.XL={}
self.AL={}
self.ZL={}
self.dZ={}
self.dW={}
self.db={}
self.dA={}
self.minibatchs=[]
def input_data(self,X,Y,learning_rate,optimizer,minibatchsize=64,b1=0.9,b2=0.8):
self.learning_rate=learning_rate
# minibatch的大小
self.minbatch_size=minibatchsize
self.optimizer=optimizer
self.AL["A0"]=X
self.Y=Y
self.m=X.shape[1]
if optimizer=='':
# 无任何优化
self.m=X.shape[1]
elif optimizer=='gd':
# minibatch
# 初始化minibatch
self.ini_gd()
elif optimizer=='moment':
self.ini_mom(b1)
elif optimizer=='adam':
self.ini_ada(b1,b2)
def ini_gd(self):
# state=np.random.get_state()
# np.random.shuffle(self.AL["A0"])
# np.random.set_state(state)
# np.random.shuffle(self.Y)
self.minibatch_num=np.floor(self.Y.shape[1]/self.minbatch_size)
print("分成"+str(self.minibatch_num)+str("个minibatch"))
print(self.minibatch_num)
# 划分minibatch
for k in range(int(self.minibatch_num)):
temp_minibatch_x=self.AL["A0"][:,k*self.minbatch_size:(k+1)*self.minbatch_size]
temp_minibatch_y=self.Y[:,k*self.minbatch_size:(k+1)*self.minbatch_size]
temp_minibatch=(temp_minibatch_x,temp_minibatch_y)
print("第"+str(k)+"个minibatch大小:x.shape="+str(temp_minibatch_x.shape)+"y.shape="+str(temp_minibatch_y.shape))
self.minibatchs.append(temp_minibatch)
if self.Y.shape[1]%self.minbatch_size!=0:
temp_minibatch_x=self.AL["A0"][:,int(self.minibatch_num)*self.minbatch_size:]
temp_minibatch_y=self.Y[:,int(self.minibatch_num)*self.minbatch_size:]
temp_minibatch=(temp_minibatch_x,temp_minibatch_y)
print("第"+str(self.minibatch_num+1)+"个minibatch大小:x.shape="+str(temp_minibatch_x.shape)+"y.shape="+str(temp_minibatch_y.shape))
self.minibatchs.append(temp_minibatch)
def ini_mom(self,b1):
# 超参数
self.b1=b1
self.ini_gd()
self.ini_v()
def ini_ada(self,b1,b2):
# 超参数
self.b1=b1
self.b2=b2
self.ini_gd()
self.ini_v()
self.ini_s()
def set_data(self,X):
# 预测时设置输入
self.m=X.shape[1]
self.AL["A0"]=X
def ini_v(self):
# momentum优化方法的初始化动量
self.vdW={}
self.vdb={}
for i in range(1,self.L+1):
self.vdW['dW'+str(i)]=np.zeros(self.WL["W"+str(i)].shape)
self.vdb['db'+str(i)]=np.zeros(self.bL['b'+str(i)].shape)
def ini_s(self):
self.SdW={}
self.Sdb={}
self.vWc={}
self.vbc={}
self.sWc={}
self.sbc={}
for i in range(1,self.L+1):
self.SdW['dW'+str(i)]=np.zeros(self.WL["W"+str(i)].shape)
self.Sdb['db'+str(i)]=np.zeros(self.bL['b'+str(i)].shape)
#下面是前向传播模块,前向传播过程中需要记录Z,A
def linear_activation_forward(self,i,activation):
'''实现一层的正向传播'''
# self.ZL[Zi]和self.AL[Ai]记录第i层的数据
# 存储了计算的Z ,A,W,b,对象自带的有
self.ZL['Z'+str(i)]=np.dot(self.WL['W'+str(i)],self.AL['A'+str(i-1)])+self.bL['b'+str(i)]
if activation=="sigmoid":
self.AL['A'+str(i)]=1/(1+np.exp(-self.ZL['Z'+str(i)]))
elif activation=="relu":
self.AL['A'+str(i)]=np.maximum(0,self.ZL['Z'+str(i)])
def L_model_forward(self):
# 前L-1层使用relu函数激活,最后一层使用sigmoid函数激活
for i in range(1,self.L):
self.linear_activation_forward(i,"relu")
self.linear_activation_forward(self.L,"sigmoid")
# 确定最后的输出是否是二分类所需要的输出
# print("AL.shape="+str(self.AL['A'+str(self.L)].shape)+" Y.shape=s"+str(self.Y.shape[1]))
assert(self.AL['A'+str(self.L)].shape==(1,self.m))
# 下面是计算损失函数
def computer_cost(self):
return np.squeeze(-1/self.m*np.sum(self.Y*np.log(self.AL['A'+str(self.L)]+1e-5)+(1-self.Y)*np.log(1-self.AL['A'+str(self.L)]+1e-5)))
# 下面是反向传播模块
def linear_backward(self,i):
'''根据dz[L]计算dw[L],db[L],dA[L-1]'''
self.dW['dW'+str(i)]=1/self.Y.shape[1]*np.dot(self.dZ['dZ'+str(i)],self.AL["A"+str(i-1)].T)
self.db['db'+str(i)]=1/self.Y.shape[1]*np.sum(self.dZ['dZ'+str(i)],axis=1,keepdims=True)
self.dA['dA'+str(i-1)]=np.dot(self.WL['W'+str(i)].T,self.dZ['dZ'+str(i)])
def L_model_backforward(self):
# 先计算最后一层
# 计算dA
self.dA['dA'+str(self.L)]=-(np.divide(self.Y,self.AL['A'+str(self.L)]+1e-10)-np.divide(1-self.Y,1-self.AL['A'+str(self.L)]+1e-10))
# 计算dz
s=1/(1+np.exp(-self.ZL['Z'+str(self.L)]))
self.dZ['dZ'+str(self.L)]=self.dA['dA'+str(self.L)]*s*(1-s)
# 计算dw,db,dA[L-1]
self.linear_backward(self.L)
for i in reversed(range(self.L)):
if i==0:
break
else:
self.dZ['dZ'+str(i)]=relu_backward(self.dA['dA'+str(i)],self.ZL['Z'+str(i)])
# 计算当前i层的dw,db,和dA[L-1]
self.linear_backward(i)
# 更新参数
# minibatch的更新参数
def update_wb(self):
for i in range(1,self.L+1):
self.WL['W'+str(i)]=self.WL['W'+str(i)]-self.learning_rate*self.dW['dW'+str(i)]
self.bL['b'+str(i)]=self.bL['b'+str(i)]-self.learning_rate*self.db['db'+str(i)]
# 使用momentum优化的更新参数
def update_wb_withmomentum(self):
for i in range(1,self.L+1):
self.WL['W'+str(i)]=self.WL['W'+str(i)]-self.learning_rate*self.vdW['dW'+str(i)]
self.bL['b'+str(i)]=self.bL['b'+str(i)]-self.learning_rate*self.vdb['db'+str(i)]
# 使用adam优化的更新参数
def update_wb_withadam(self):
for i in range(1,self.L+1):
self.WL['W'+str(i)]=self.WL['W'+str(i)]-self.learning_rate*(self.vWc['dW'+str(i)]/(np.sqrt(self.sWc['dW'+str(i)])+1e-7))
self.bL['b'+str(i)]=self.bL['b'+str(i)]-self.learning_rate*(self.vbc['db'+str(i)]/(np.sqrt(self.sbc['db'+str(i)])+1e-7))
# 更新动力vdw, vdb
def update_vdwb(self):
for i in range(1,self.L+1):
self.vdW['dW'+str(i)]=self.b1*self.vdW['dW'+str(i)]+(1-self.b1)*self.dW['dW'+str(i)]
self.vdb['db'+str(i)]=self.b1*self.vdb['db'+str(i)]+(1-self.b1)*self.db['db'+str(i)]
# 更新vdwvdb和s
def update_svdwb(self):
for i in range(1,self.L+1):
self.vdW['dW'+str(i)]=self.b1*self.vdW['dW'+str(i)]+(1-self.b1)*self.dW['dW'+str(i)]
self.vdb['db'+str(i)]=self.b1*self.vdb['db'+str(i)]+(1-self.b1)*self.db['db'+str(i)]
self.vWc['dW'+str(i)]=self.vdW['dW'+str(i)]/(1-self.b1**2)
self.vbc['db'+str(i)]=self.vdb['db'+str(i)]/(1-self.b1**2)
self.SdW['dW'+str(i)]=self.b2*self.SdW['dW'+str(i)]+(1-self.b2)*(self.dW['dW'+str(i)]**2)
self.Sdb['db'+str(i)]=self.b2*self.Sdb['db'+str(i)]+(1-self.b2)*(self.db['db'+str(i)]**2)
self.sWc['dW'+str(i)]=self.SdW['dW'+str(i)]/(1-self.b2**2)
self.sbc['db'+str(i)]=self.Sdb['db'+str(i)]/(1-self.b2**2)
def train(self,iterations):
costs=[]
for i in range(iterations):
if self.optimizer=='':
# 前向传播
self.L_model_forward()
# 计算损失
cost=self.computer_cost()
# 后向传播
self.L_model_backforward()
# 更新参数
self.update_wb()
if i%100==0:
costs.append(cost)
print("第"+str(i)+"次迭代cost:"+str(cost))
elif self.optimizer=='gd':
for minibatch in self.minibatchs:
(minibatch_x,minibatch_y)=minibatch
self.AL['A0']=minibatch_x
self.Y=minibatch_y
# 更新每个minibatch在正向传播时的m
self.m=minibatch_x.shape[1]
self.L_model_forward()
cost=self.computer_cost()
self.L_model_backforward()
self.update_wb()
if i%100==0:
costs.append(cost)
# print("第"+str(i)+"次迭代的minibatch_cost:"+str(cost))
elif self.optimizer=='moment':
for minibatch in self.minibatchs:
(minibatch_x,minibatch_y)=minibatch
self.AL['A0']=minibatch_x
self.Y=minibatch_y
self.m=minibatch_x.shape[1]
self.L_model_forward()
cost=self.computer_cost()
self.L_model_backforward()
self.update_vdwb()
self.update_wb_withmomentum()
if i%100==0:
costs.append(cost)
print("第"+str(i)+"次迭代的minibatch_cost:"+str(cost))
elif self.optimizer=='adam':
for minibatch in self.minibatchs:
(minibatch_x,minibatch_y)=minibatch
self.AL['A0']=minibatch_x
self.Y=minibatch_y
self.m=minibatch_x.shape[1]
self.L_model_forward()
cost=self.computer_cost()
self.L_model_backforward()
self.update_svdwb()
self.update_wb_withadam()
if i%100==0:
costs.append(cost)
print("第"+str(i)+"次迭代的minibatch_cost:"+str(cost))
return costs
def predict(self,X):
self.set_data(X)
self.L_model_forward()
return self.AL['A'+str(self.L)]>=0.5
#加载数据
train_x, train_y, test_x, test_y = load_2D_dataset()
# 4层的神经网络
my_dnn=dnn([2, 3, 2, 1])
# 设置数据
my_dnn.input_data(train_x,train_y,0.01,"adam",32,0.9)
# 训练
cost=my_dnn.train(3000)
# 预测
y_predict_train=my_dnn.predict(train_x)
y_predict_train_int = [int(x) for x in y_predict_train[0]]
y_predict_test=my_dnn.predict(test_x)
y_predict_test_int = [int(x) for x in y_predict_test[0]]
plt.subplot(1,2,1)
plot_decision_boundary(lambda x: my_dnn.predict(x.T), train_x, train_y)
plt.subplot(1,2,2)
plt.plot(cost)
print("训练集准确度:"+str((1-np.sum(np.abs(y_predict_train_int-train_y))/train_y.shape[1])*100)+"%")
print("测试集准确度"+str((1-np.sum(np.abs(y_predict_test_int-test_y))/test_y.shape[1])*100)+"%")
plt.show()
使用的数据集还是上一个作业的数据集
1.无任何优化的实验结果:
2.使用minibatch的优化结果:
每个minibatch的大小时32;
损失的变化幅度太大了,应该是数据问题,也有可能时代码问题,大佬路过的如果有空可以检查检查
3.使用moment的优化
结果和上面的一样,就很奇怪。。。。。
4.使用Adam优化
依旧损失反复跳动的问题。哎