吴恩达编程作业L2W2

本文介绍了一个基于Python的深度学习神经网络类,包括初始化权重、前向传播、反向传播和不同优化器(GD、Momentum、Adam)的训练过程。作者还展示了使用不同优化器训练时遇到的问题,如损失函数波动大和准确性分析。
摘要由CSDN通过智能技术生成

code:

from dnn_utils_v2 import *
import numpy as np
import h5py
import matplotlib.pyplot as plt
from dnn_utils_v2 import relu_backward
class dnn:
    def __init__(self,layer_dims,optimezer="gd") -> None:
        '''layer_dimes:各层的神经元数量
            init_mod:初始化方法:zeros,random,he三种初始化'''
        self.WL={}
        self.bL={}
        self.L=len(layer_dims)-1
        # 初始化参数
        np.random.seed(3)
        for i in range(1,self.L+1):
            self.WL['W'+str(i)]=np.random.randn(layer_dims[i], layer_dims[i-1])  / np.sqrt(layer_dims[i-1])
            self.bL['b'+str(i)]=np.zeros((layer_dims[i],1))
        self.XL={}
        self.AL={}
        self.ZL={}
        self.dZ={}
        self.dW={}
        self.db={}
        self.dA={}
        
        self.minibatchs=[]
    def input_data(self,X,Y,learning_rate,optimizer,minibatchsize=64,b1=0.9,b2=0.8):
        self.learning_rate=learning_rate
        # minibatch的大小
        self.minbatch_size=minibatchsize
        self.optimizer=optimizer
        self.AL["A0"]=X
        self.Y=Y
        self.m=X.shape[1]
        if optimizer=='':
            # 无任何优化
            self.m=X.shape[1]
            
        elif optimizer=='gd':
            # minibatch
            # 初始化minibatch
            self.ini_gd()
        elif optimizer=='moment':
            self.ini_mom(b1)
        elif optimizer=='adam':
            self.ini_ada(b1,b2)
    
    def ini_gd(self):
        # state=np.random.get_state()
        # np.random.shuffle(self.AL["A0"])
        # np.random.set_state(state)
        # np.random.shuffle(self.Y)
        self.minibatch_num=np.floor(self.Y.shape[1]/self.minbatch_size)
        print("分成"+str(self.minibatch_num)+str("个minibatch"))
        print(self.minibatch_num)
        # 划分minibatch
        for k in range(int(self.minibatch_num)):
            temp_minibatch_x=self.AL["A0"][:,k*self.minbatch_size:(k+1)*self.minbatch_size]
            temp_minibatch_y=self.Y[:,k*self.minbatch_size:(k+1)*self.minbatch_size]
            temp_minibatch=(temp_minibatch_x,temp_minibatch_y)
            print("第"+str(k)+"个minibatch大小:x.shape="+str(temp_minibatch_x.shape)+"y.shape="+str(temp_minibatch_y.shape))
            self.minibatchs.append(temp_minibatch)
        if self.Y.shape[1]%self.minbatch_size!=0:
            temp_minibatch_x=self.AL["A0"][:,int(self.minibatch_num)*self.minbatch_size:]
            temp_minibatch_y=self.Y[:,int(self.minibatch_num)*self.minbatch_size:]
            temp_minibatch=(temp_minibatch_x,temp_minibatch_y)
            print("第"+str(self.minibatch_num+1)+"个minibatch大小:x.shape="+str(temp_minibatch_x.shape)+"y.shape="+str(temp_minibatch_y.shape))
            self.minibatchs.append(temp_minibatch)
    def ini_mom(self,b1):
        # 超参数
        self.b1=b1
        self.ini_gd()
        self.ini_v()
    def ini_ada(self,b1,b2):
        # 超参数
        self.b1=b1
        self.b2=b2
        self.ini_gd()
        self.ini_v()
        self.ini_s()
    def set_data(self,X):
        # 预测时设置输入
        self.m=X.shape[1]
        self.AL["A0"]=X
    def ini_v(self):
        # momentum优化方法的初始化动量
        self.vdW={}
        self.vdb={}
        for i in range(1,self.L+1):
            self.vdW['dW'+str(i)]=np.zeros(self.WL["W"+str(i)].shape)
            self.vdb['db'+str(i)]=np.zeros(self.bL['b'+str(i)].shape)
    def ini_s(self):
        self.SdW={}
        self.Sdb={}
        self.vWc={}
        self.vbc={}
        self.sWc={}
        self.sbc={}
        for i in range(1,self.L+1):
            self.SdW['dW'+str(i)]=np.zeros(self.WL["W"+str(i)].shape)
            self.Sdb['db'+str(i)]=np.zeros(self.bL['b'+str(i)].shape)
    #下面是前向传播模块,前向传播过程中需要记录Z,A 
    def linear_activation_forward(self,i,activation):
        
        '''实现一层的正向传播'''
        # self.ZL[Zi]和self.AL[Ai]记录第i层的数据
        # 存储了计算的Z ,A,W,b,对象自带的有
        self.ZL['Z'+str(i)]=np.dot(self.WL['W'+str(i)],self.AL['A'+str(i-1)])+self.bL['b'+str(i)]
        if activation=="sigmoid":
            self.AL['A'+str(i)]=1/(1+np.exp(-self.ZL['Z'+str(i)]))
        elif activation=="relu":
            self.AL['A'+str(i)]=np.maximum(0,self.ZL['Z'+str(i)])    
        
    def L_model_forward(self):
        # 前L-1层使用relu函数激活,最后一层使用sigmoid函数激活
        for i in range(1,self.L):
            self.linear_activation_forward(i,"relu")
        self.linear_activation_forward(self.L,"sigmoid")
        # 确定最后的输出是否是二分类所需要的输出
        # print("AL.shape="+str(self.AL['A'+str(self.L)].shape)+" Y.shape=s"+str(self.Y.shape[1]))
        assert(self.AL['A'+str(self.L)].shape==(1,self.m))

    # 下面是计算损失函数
    def computer_cost(self):
        return np.squeeze(-1/self.m*np.sum(self.Y*np.log(self.AL['A'+str(self.L)]+1e-5)+(1-self.Y)*np.log(1-self.AL['A'+str(self.L)]+1e-5)))

    # 下面是反向传播模块
    def linear_backward(self,i):
        '''根据dz[L]计算dw[L],db[L],dA[L-1]'''
        self.dW['dW'+str(i)]=1/self.Y.shape[1]*np.dot(self.dZ['dZ'+str(i)],self.AL["A"+str(i-1)].T)
        self.db['db'+str(i)]=1/self.Y.shape[1]*np.sum(self.dZ['dZ'+str(i)],axis=1,keepdims=True)
        self.dA['dA'+str(i-1)]=np.dot(self.WL['W'+str(i)].T,self.dZ['dZ'+str(i)])
    def L_model_backforward(self):
        #  先计算最后一层
        # 计算dA
        self.dA['dA'+str(self.L)]=-(np.divide(self.Y,self.AL['A'+str(self.L)]+1e-10)-np.divide(1-self.Y,1-self.AL['A'+str(self.L)]+1e-10))
        # 计算dz
        s=1/(1+np.exp(-self.ZL['Z'+str(self.L)]))
        self.dZ['dZ'+str(self.L)]=self.dA['dA'+str(self.L)]*s*(1-s)
        # 计算dw,db,dA[L-1]
        self.linear_backward(self.L)
        for i in reversed(range(self.L)):
            if i==0:
                break
            else:
                self.dZ['dZ'+str(i)]=relu_backward(self.dA['dA'+str(i)],self.ZL['Z'+str(i)])
                # 计算当前i层的dw,db,和dA[L-1]
                self.linear_backward(i)   

    # 更新参数
    # minibatch的更新参数
    def update_wb(self):
        for i in range(1,self.L+1):
            self.WL['W'+str(i)]=self.WL['W'+str(i)]-self.learning_rate*self.dW['dW'+str(i)]
            self.bL['b'+str(i)]=self.bL['b'+str(i)]-self.learning_rate*self.db['db'+str(i)]
    
    # 使用momentum优化的更新参数
    def update_wb_withmomentum(self):
        for i in range(1,self.L+1):
            self.WL['W'+str(i)]=self.WL['W'+str(i)]-self.learning_rate*self.vdW['dW'+str(i)]
            self.bL['b'+str(i)]=self.bL['b'+str(i)]-self.learning_rate*self.vdb['db'+str(i)]
    # 使用adam优化的更新参数
    def update_wb_withadam(self):
        for i in range(1,self.L+1):
            self.WL['W'+str(i)]=self.WL['W'+str(i)]-self.learning_rate*(self.vWc['dW'+str(i)]/(np.sqrt(self.sWc['dW'+str(i)])+1e-7))
            self.bL['b'+str(i)]=self.bL['b'+str(i)]-self.learning_rate*(self.vbc['db'+str(i)]/(np.sqrt(self.sbc['db'+str(i)])+1e-7))

    # 更新动力vdw, vdb
    def update_vdwb(self):
        for i in range(1,self.L+1):
            self.vdW['dW'+str(i)]=self.b1*self.vdW['dW'+str(i)]+(1-self.b1)*self.dW['dW'+str(i)]
            self.vdb['db'+str(i)]=self.b1*self.vdb['db'+str(i)]+(1-self.b1)*self.db['db'+str(i)]

    # 更新vdwvdb和s
    def update_svdwb(self):
        for i in range(1,self.L+1):
            self.vdW['dW'+str(i)]=self.b1*self.vdW['dW'+str(i)]+(1-self.b1)*self.dW['dW'+str(i)]
            self.vdb['db'+str(i)]=self.b1*self.vdb['db'+str(i)]+(1-self.b1)*self.db['db'+str(i)]
            self.vWc['dW'+str(i)]=self.vdW['dW'+str(i)]/(1-self.b1**2)
            self.vbc['db'+str(i)]=self.vdb['db'+str(i)]/(1-self.b1**2)
            self.SdW['dW'+str(i)]=self.b2*self.SdW['dW'+str(i)]+(1-self.b2)*(self.dW['dW'+str(i)]**2)
            self.Sdb['db'+str(i)]=self.b2*self.Sdb['db'+str(i)]+(1-self.b2)*(self.db['db'+str(i)]**2)
            self.sWc['dW'+str(i)]=self.SdW['dW'+str(i)]/(1-self.b2**2)
            self.sbc['db'+str(i)]=self.Sdb['db'+str(i)]/(1-self.b2**2)
    def train(self,iterations):
        costs=[]
        for i in range(iterations):
            if self.optimizer=='':
                # 前向传播
                self.L_model_forward()
                # 计算损失
                cost=self.computer_cost()
                # 后向传播
                self.L_model_backforward()
                # 更新参数
                self.update_wb()
                if i%100==0:
                    costs.append(cost)
                    print("第"+str(i)+"次迭代cost:"+str(cost))
            elif self.optimizer=='gd':
                for minibatch in self.minibatchs:
                    (minibatch_x,minibatch_y)=minibatch
                    self.AL['A0']=minibatch_x
                    self.Y=minibatch_y
                    # 更新每个minibatch在正向传播时的m
                    self.m=minibatch_x.shape[1]
                    self.L_model_forward()
                    cost=self.computer_cost()
                    self.L_model_backforward()
                    self.update_wb()
                    if i%100==0:
                        costs.append(cost)
                    #     print("第"+str(i)+"次迭代的minibatch_cost:"+str(cost))
            elif self.optimizer=='moment':
                for minibatch in self.minibatchs:
                    (minibatch_x,minibatch_y)=minibatch
                    self.AL['A0']=minibatch_x
                    self.Y=minibatch_y
                    self.m=minibatch_x.shape[1]
                    self.L_model_forward()
                    cost=self.computer_cost()
                    self.L_model_backforward()
                    self.update_vdwb()
                    self.update_wb_withmomentum()
                    if i%100==0:
                        costs.append(cost)
                        print("第"+str(i)+"次迭代的minibatch_cost:"+str(cost))   
            elif self.optimizer=='adam':
                for minibatch in self.minibatchs:
                    (minibatch_x,minibatch_y)=minibatch
                    self.AL['A0']=minibatch_x
                    self.Y=minibatch_y
                    self.m=minibatch_x.shape[1]
                    self.L_model_forward()
                    cost=self.computer_cost()
                    self.L_model_backforward()
                    self.update_svdwb()
                    self.update_wb_withadam()
                    if i%100==0:
                        costs.append(cost)
                        print("第"+str(i)+"次迭代的minibatch_cost:"+str(cost))
        return costs
    def predict(self,X):
        self.set_data(X)
        self.L_model_forward()
        return self.AL['A'+str(self.L)]>=0.5


#加载数据
train_x, train_y, test_x, test_y = load_2D_dataset()


# 4层的神经网络
my_dnn=dnn([2, 3,  2, 1])

# 设置数据
my_dnn.input_data(train_x,train_y,0.01,"adam",32,0.9)

# 训练
cost=my_dnn.train(3000)


# 预测

y_predict_train=my_dnn.predict(train_x)
y_predict_train_int = [int(x) for x in y_predict_train[0]]
y_predict_test=my_dnn.predict(test_x)
y_predict_test_int = [int(x) for x in y_predict_test[0]]

plt.subplot(1,2,1)
plot_decision_boundary(lambda x: my_dnn.predict(x.T), train_x, train_y)
plt.subplot(1,2,2)
plt.plot(cost)
print("训练集准确度:"+str((1-np.sum(np.abs(y_predict_train_int-train_y))/train_y.shape[1])*100)+"%")
print("测试集准确度"+str((1-np.sum(np.abs(y_predict_test_int-test_y))/test_y.shape[1])*100)+"%")
plt.show()

使用的数据集还是上一个作业的数据集
1.无任何优化的实验结果:
在这里插入图片描述
2.使用minibatch的优化结果:
每个minibatch的大小时32;
在这里插入图片描述
损失的变化幅度太大了,应该是数据问题,也有可能时代码问题,大佬路过的如果有空可以检查检查
3.使用moment的优化
在这里插入图片描述
结果和上面的一样,就很奇怪。。。。。
4.使用Adam优化
在这里插入图片描述
依旧损失反复跳动的问题。哎

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值