吴恩达神经网络学习-L1W4作业1

构建一个任意层数的深度神经网络

  • 使用ReLU等非线性单位来改善模型
  • 建立更深的神经网络(具有1个以上的隐藏层)
  • 实现一个易于使用的神经网络类
    import numpy as np
    import matplotlib.pyplot as plt
    import h5py
    from testCases_v2 import * #提供了一些测试用例来评估函数的正确性
    from dnn_utils_v2 import sigmoid, sigmoid_backward, relu, relu_backward #为此笔记本提供了一些必要的函数
    
    
    plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
    plt.rcParams['image.interpolation'] = 'nearest'
    plt.rcParams['image.cmap'] = 'gray'
    
    np.random.seed(1) #使所有随机函数调用保持一致
    
    #初始化 写两个辅助函数用来初始化模型的参数。 第一个函数将用于初始化两层模型的参数。 第二个将把初始化过程推广到L层模型上
    #2层
    # 模型的结构为:LINEAR -> RELU -> LINEAR -> SIGMOID。
    # 随机初始化权重矩阵。 确保准确的维度,使用np.random.randn(shape)* 0.01。
    # 将偏差初始化为0。 使用np.zeros(shape)
    
    def initialize_parameters(n_x,n_y,n_h):
        # n_x -- size of the input layer
        # n_h -- size of the hidden layer
        # n_y -- size of the output layer
        np.random.seed(1)
    
        W1=np.random.randn(n_h,n_x)*0.01
        b1=np.zeros((n_h,1))
        W2=np.random.randn(n_y,n_h)*0.01
        b2=np.zeros((n_y,1))
    
        assert (W1.shape == (n_h, n_x))
        assert (b1.shape == (n_h, 1))
        assert (W2.shape == (n_y, n_h))
        assert (b2.shape == (n_y, 1))
    
        parameters={
            "W1":W1,
            "b1":b1,
            "W2":W2,
            "b2":b2
        }
    
        return parameters
    
    parameters = initialize_parameters(2,1,2)
    # print("W1 = " + str(parameters["W1"]))
    # print("b1 = " + str(parameters["b1"]))
    # print("W2 = " + str(parameters["W2"]))
    # print("b2 = " + str(parameters["b2"]))
    
    
    # 我们将在不同的layer_dims变量中存储n_l,即不同层中的神经元数。例如,上周“二维数据分类模型”的layer_dims为[2,4,1]:
    # 即有两个输入,一个隐藏层包含4个隐藏单元,一个输出层包含1个输出单元。(这里相当于是layer0、layer1、layer2),虽然layer_dims的长度为3,但层数L=2
    # 因此,W1的维度为(4,2),b1的维度为(4,1),W2的维度为(1,4),而b2的维度为(1,1)。现在你将把它应用到L层!
    def initialize_parameters_deep(layer_dims):
        np.random.seed(3)
        parameters = {}
        for i in range(1,len(layer_dims)):
            parameters["W"+str(i)]=np.random.randn(layer_dims[i],layer_dims[i-1])*0.01 #注,这里使用[i]和[i-1],使用[i+1]和[i]会超限
            parameters["b"+str(i)]=np.zeros((layer_dims[i],1))
    
            assert (parameters['W' + str(i)].shape == (layer_dims[i], layer_dims[i-1]))
            assert (parameters['b' + str(i)].shape == (layer_dims[i], 1))
    
        return parameters
    
    parameters = initialize_parameters_deep([5,4,3])
    # print("W1 = " + str(parameters["W1"]))
    # print("b1 = " + str(parameters["b1"]))
    # print("W2 = " + str(parameters["W2"]))
    # print("b2 = " + str(parameters["b2"]))
    
    
    #正向传播模块
    #1.线性正向
    def linear_forward(A,W,b):
        Z=np.dot(W,A)+b
    
        assert (Z.shape == (W.shape[0], A.shape[1]))
        cache=(A,W,b)
    
        return Z,cache
    
    A,W,b=linear_forward_test_case()
    Z,linear_cache=linear_forward(A,W,b)
    # print("Z = " + str(Z))
    
    #2.正向线性激活
    #我们将实现一个函数用以执行LINEAR正向步骤和ACTIVATION正向步骤。
    #把两个函数(线性和激活)组合为一个函数(LINEAR-> ACTIVATION)
    def linear_activation_forward(A_prev,W,b,activation):
        if activation=="sigmoid":
            Z,linear_cache=linear_forward(A_prev,W,b)
            A,activation_cache=sigmoid(Z)
        elif activation=="relu":
            Z, linear_cache = linear_forward(A_prev, W, b)
            A,activation_cache=relu(Z)
    
        assert (A.shape == (W.shape[0], A_prev.shape[1]))
        cache=(linear_cache,activation_cache)
    
        return A,cache
    
    A_prev, W, b = linear_activation_forward_test_case()
    
    A, linear_activation_cache = linear_activation_forward(A_prev, W, b, activation = "sigmoid")
    # print("With sigmoid: A = " + str(A))
    # print(linear_activation_cache[0])
    # print(linear_activation_cache[1])
    #
    A, linear_activation_cache = linear_activation_forward(A_prev, W, b, activation = "relu")
    # print("With ReLU: A = " + str(A))
    
    
    #3.L层模型
    #需要一个函数来复制前一个函数(使用RELU的linear_activation_forward)L-1次,以及复制带有SIGMOID的linear_activation_forward。
    # 使用你先前编写的函数
    # 使用for循环复制[LINEAR-> RELU](L-1)次
    # 不要忘记在“cache”列表中更新缓存。
    def L_model_forward(X,parameters):
        caches=[]
        L=len(parameters)//2
        A=X
        for l in range(1,L):
            A_prev=A
            W=parameters["W"+str(l)]
            b=parameters["b"+str(l)]
            A,linear_activation_cache=linear_activation_forward(A_prev,W,b,"relu")
            caches.append(linear_activation_cache)
        WL=parameters["W"+str(L)]
        bL=parameters["b"+str(L)]
        Y_hat,cache=linear_activation_forward(A,WL,bL,"sigmoid")
        caches.append(cache)
    
        assert (Y_hat.shape == (1, X.shape[1]))
    
        return Y_hat,caches
    
    X,parameters=L_model_forward_test_case()
    Y_hat,caches=L_model_forward(X,parameters)
    # print("Y_hat = " + str(Y_hat))
    # print("Length of caches list = " + str(len(caches)))#它接受输入X并输出包含预测的行向量AL。 它还将所有中间值记录在"caches"中以计算预测的损失值
    
    #损失函数
    def compute_cost(Y_hat,Y):
        m=Y.shape[1]
        cost=-1/m*np.sum((Y*np.log(Y_hat)+(1-Y)*np.log(1-Y_hat)),axis=1,keepdims=True)
        cost=np.squeeze(cost)
        return cost
    Y, Y_hat= compute_cost_test_case()
    # print("cost = " + str(compute_cost(Y_hat, Y)))
    
    #反向传播模块
    #1.线性反向
    def linear_backward(dZ,cache):
        # Arguments:
        # dZ -- Gradient of the cost with respect to the linear output (of current layer l)
        # cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layer
        m=dZ.shape[1]
        A_prev=cache[0]
        W=cache[1]
    
        dW=1/m*np.dot(dZ,A_prev.T)
        db=1/m*np.sum(dZ,axis=1,keepdims=True)
        dA_prev=np.dot(W.T,dZ)
    
        # assert (dA_prev.shape == A_prev.shape)
        # assert (dW.shape == W.shape)
        # assert (db.shape == b.shape)
    
        return dA_prev,dW,db
    
    dZ, linear_cache = linear_backward_test_case()
    dA_prev, dW, db = linear_backward(dZ, linear_cache)
    # print ("dA_prev = "+ str(dA_prev))
    # print ("dW = " + str(dW))
    # print ("db = " + str(db))
    
    #2.反向线性激活
    #创建一个合并两个辅助函数的函数:linear_backward 和反向步骤的激活 linear_activation_backward
    def linear_activation_backward(dA,cache,activation):
        # Arguments:
        # dA -- post-activation gradient for current layer l
        # cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
        # activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
        # dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
        linear_cache,activation_cache=cache
        if activation=="sigmoid":
            dZ=sigmoid_backward(dA,activation_cache)
            dA_prev, dW, db = linear_backward(dZ, linear_cache)
        elif activation=="relu":
            dZ=relu_backward(dA,activation_cache)
            dA_prev, dW, db = linear_backward(dZ, linear_cache)
        return dA_prev,dW,db
    
    AL, linear_activation_cache = linear_activation_backward_test_case()
    
    dA_prev, dW, db = linear_activation_backward(AL, linear_activation_cache, activation = "sigmoid")
    # print ("sigmoid:")
    # print ("dA_prev = "+ str(dA_prev))
    # print ("dW = " + str(dW))
    # print ("db = " + str(db) + "\n")
    
    dA_prev, dW, db = linear_activation_backward(AL, linear_activation_cache, activation = "relu")
    # print ("relu:")
    # print ("dA_prev = "+ str(dA_prev))
    # print ("dW = " + str(dW))
    # print ("db = " + str(db))
    
    #3.反向L层模型
    def L_model_backward(Y_hat,Y,caches):
        grads={}
        L=len(caches)
        m=Y_hat.shape[1]
        Y=Y.reshape(Y_hat.shape) # after this line, Y is the same shape as AL
        dAL = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat))  # derivative of cost with respect to AL
        current_cache=caches[L-1]
        grads["dA"+str(L)],grads["dW"+str(L)],grads["db"+str(L)]=linear_activation_backward(dAL,current_cache,"sigmoid")
        for l in reversed(range(L-1)):
            # Inputs: "grads["dA" + str(l + 2)], caches". Outputs: "grads["dA" + str(l + 1)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)]
            current_cache=caches[l]#l=L-2-->0
            dA_prev_temp,dW_temp,db_temp=linear_activation_backward(grads["dA"+str(l+2)],current_cache,"relu") #"dA"+str(l+2)]="dA"+str(L)!=dAL
            grads["dA"+str(l+1)]=dA_prev_temp
            grads["dW"+str(l+1)]=dW_temp
            grads["db"+str(l+1)]=db_temp
        return grads
    
    AL, Y_assess, caches = L_model_backward_test_case()
    grads = L_model_backward(AL, Y_assess, caches)
    # print ("dW1 = "+ str(grads["dW1"]))
    # print ("db1 = "+ str(grads["db1"]))
    # print ("dA1 = "+ str(grads["dA1"]))
    
    #更新参数
    def update_parameters(parameters,grads,learning_rate):
        L=len(parameters)//2
        for l in range(L):
            parameters["W"+str(l+1)]=parameters["W"+str(l+1)]-learning_rate*grads["dW"+str(l+1)]
            parameters["b"+str(l+1)]=parameters["b"+str(l+1)]-learning_rate*grads["db"+str(l+1)]
        return parameters
    
    parameters, grads = update_parameters_test_case()
    parameters = update_parameters(parameters, grads, 0.1)
    
    print ("W1 = "+ str(parameters["W1"]))
    print ("b1 = "+ str(parameters["b1"]))
    print ("W2 = "+ str(parameters["W2"]))
    print ("b2 = "+ str(parameters["b2"]))
    
    
    
    # def two_layer_model(X,Y,layers_dims,learning_rate=0.0075,num_iterations=3000,print_cost=False):
    #     costs=[]
    #     grads={}
    #     n_x, n_h, n_y=layers_dims
    #     parameters=initialize_parameters(n_x, n_y, n_h)
    #     W1=parameters["W1"]
    #     b1=parameters["b1"]
    #     W2=parameters["W2"]
    #     b2=parameters["b2"]
    #     for i in range(num_iterations):
    #         A,cache1=linear_activation_forward(X,W1,b1,"relu")
    #         Y_hat,cache2=linear_activation_forward(A,W2,b2,"sigmoid")
    #         cost=compute_cost(Y_hat,Y)
    #         # Backward propagation. Inputs: "dA2, cache2, cache1". Outputs: "dA1, dW2, db2; also dA0 (not used), dW1, db1".
    #         dA2 = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat))
    #         dA1,dW2,db2=linear_activation_backward(dA2,cache2,"sigmoid")
    #         dA0,dW1,db1=linear_activation_backward(dA1,cache1,"relu")
    #
    #         grads['dW1'] = dW1
    #         grads['db1'] = db1
    #         grads['dW2'] = dW2
    #         grads['db2'] = db2
    #
    #         parameters=update_parameters(parameters,grads,learning_rate)
    #
    #         W1 = parameters["W1"]
    #         b1 = parameters["b1"]
    #         W2 = parameters["W2"]
    #         b2 = parameters["b2"]
    #
    #         if print_cost and i%100==0:
    #             print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
    #         if print_cost and i % 100 == 0:
    #             costs.append(cost)
    #
    #     plt.plot(np.squeeze(costs))
    #     plt.ylabel('cost')
    #     plt.xlabel('iterations (per tens)')
    #     plt.title("Learning rate =" + str(learning_rate))
    #     plt.show()
    #
    #     return parameters
    #
    

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值