吴恩达神经网络学习-L2W1作业3

最新推荐文章于 2024-07-25 22:24:14 发布

MarkAssassin

最新推荐文章于 2024-07-25 22:24:14 发布

阅读量59

点赞数

分类专栏：神经网络文章标签：神经网络学习 python

本文链接：https://blog.csdn.net/MarkAssassin/article/details/133245353

版权

神经网络专栏收录该内容

11 篇文章 0 订阅

订阅专栏

#模型的反向传播很难实现，有时还会有错误
#证明你的反向传播实际上是有效的！为了保证这一点，你将应用到“梯度检验”
import numpy as np
from testCases_L2W1 import *
from gc_utils import sigmoid, relu, dictionary_to_vector, vector_to_dictionary, gradients_to_vector

#1.一维梯度检查
#为此简单函数实现“正向传播”和“向后传播”。 即在两个单独的函数中，计算J（正向传播）及其相对于theta（反向传播）的导数
def forward_propagation(x,theta):
    """
        Implement the linear forward propagation (compute J) presented in Figure 1 (J(theta) = theta * x)
        Arguments:
        x -- a real-valued input
        theta -- our parameter, a real number as well
        Returns:
        J -- the value of function J, computed using the formula J(theta) = theta * x
        """
    J=theta*x
    return J

theta,x=2,4
J=forward_propagation(x,theta)
print ("J = " + str(J))

#现在，执行图1的反向传播步骤（导数计算）。也就是说，计算J相对于 theta的导数。为避免进行演算，你应该得到dtheta=x
def backward_propagation(x,theta):
    """
        Computes the derivative of J with respect to theta (see Figure 1).
        Arguments:
        x -- a real-valued input
        theta -- our parameter, a real number as well
        Returns:
        dtheta -- the gradient of the cost with respect to theta
        """
    dtheta=x
    return dtheta

x, theta = 2, 4
dtheta = backward_propagation(x, theta)
print ("dtheta = " + str(dtheta))

#让我们实施梯度检验
def gradient_check(x,theta,epsilon=1e-7):
    """
        Implement the backward propagation presented in Figure 1.

        Arguments:
        x -- a real-valued input
        theta -- our parameter, a real number as well
        epsilon -- tiny shift to the input to compute approximated gradient with formula(1)

        Returns:
        difference -- difference (2) between the approximated gradient and the backward propagation gradient
        """
    theta_opt=theta+epsilon
    theta_neg=theta-epsilon
    J_opt=forward_propagation(x,theta_opt)
    J_neg=forward_propagation(x,theta_neg)
    grad=backward_propagation(x,theta)
    gradapprox=(J_opt-J_neg)/(2*epsilon)
    difference=np.linalg.norm(gradapprox-grad)/(np.linalg.norm(grad)+np.linalg.norm(gradapprox))

    if difference<1e-7:
        print("The gradient is correct!")
    else:
        print("The gradient is wrong!")

    return difference

x, theta = 2, 4
difference = gradient_check(x, theta)
print("difference = " + str(difference))



#2.N维梯度检验
def forward_propagation_n(X,Y,parameters):
    # cache=[]
    # m=X.shape[1]
    # L=len(parameters)//2
    # A=X
    # for l in range(1,L):
    #     A_prev=A
    #     W=parameters["W"+str(l)]
    #     b=parameters["b"+str(l)]
    #     Z=W*A_prev+b
    #     A=relu(Z)
    # WL=parameters["WL"]
    # bL=parameters["bL"]
    # ZL = WL * A + bL
    # Y_hat=sigmoid(ZL)
    #
    # cost=-1/m*np.sum(np.multiply(Y,np.log(Y_hat))+np.multiply(1-Y,np.log(1-Y_hat)))

    m=X.shape[1]
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]
    Z1=np.dot(W1,X)+b1
    A1=relu(Z1)
    Z2=np.dot(W2,A1)+b2
    A2=relu(Z2)
    Z3=np.dot(W3,A2)+b3
    A3=sigmoid(Z3)

    # cost = -1 / m * np.sum(np.multiply(Y, np.log(A3)) + np.multiply(1 - Y, np.log(1 - A3)))
    logprobs = np.multiply(-np.log(A3), Y) + np.multiply(-np.log(1 - A3), 1 - Y)
    cost = 1. / m * np.sum(logprobs)

    cache = (Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3)
    return cost,cache

def backward_propagation_n(X,Y,cache):
    m=X.shape[1]
    Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3=cache
    dZ3=A3-Y
    dW3=1/m*np.dot(dZ3,A2.T)
    db3=1/m*np.sum(dZ3,axis=1,keepdims=True)

    dA2=np.dot(W3.T,dZ3)
    dZ2 = np.multiply(dA2, np.int64(A2 > 0))
    dW2=1/m*np.dot(dZ2,A1.T)
    db2=1/m*np.sum(dZ2,axis=1,keepdims=True)

    dA1=np.dot(W2.T,dZ2)
    dZ1=np.multiply(dA1, np.int64(A1 > 0))
    dW1=1/m*np.dot(dZ1,X.T)
    db1=1/m*np.sum(dZ1,axis=1,keepdims=True)

    gradients = {"dZ3": dZ3, "dW3": dW3, "db3": db3,
                 "dA2": dA2, "dZ2": dZ2, "dW2": dW2, "db2": db2,
                 "dA1": dA1, "dZ1": dZ1, "dW1": dW1, "db1": db1}

    return gradients

#我们为你实现了一个函数"dictionary_to_vector()"。它将“参数”字典转换为称为“值”的向量，
# 该向量是通过将所有参数(W1, b1, W2, b2, W3, b3)重塑为向量并将它们串联而获得的。
#反函数是“vector_to_dictionary”，它输出回“parameters”字典

def gradient_check_n(parameters,gradients,X,Y,epsilon=1e-7):
    theta,_=dictionary_to_vector(parameters)
    grad=gradients_to_vector(gradients)
    num_parameters=theta.shape[0]
    J_plus=np.zeros((num_parameters,1))
    J_minus=np.zeros((num_parameters,1))
    gradapprox=np.zeros((num_parameters,1))
    # print(num_parameters) #theta的维度为(47,1)

    for i in range(num_parameters):
        # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]".
        thetaplus=np.copy(theta) #只是单纯的复制，两者没有一点关系
        # print(thetaplus[i])
        # thetaplus[i][0]=thetaplus[i][0]+epsilon #官方示例，但其实不用加上[0]，因为矩阵的列只是1，即它每一行只有一个数
        thetaplus[i] = thetaplus[i] + epsilon
        J_plus[i],_=forward_propagation_n(X,Y,vector_to_dictionary(thetaplus))

        thetaminus=np.copy(theta)
        thetaminus[i][0]=thetaplus[i][0]-epsilon
        J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus))

        gradapprox[i]=(J_plus[i]-J_minus[i])/(2*epsilon)
    print(grad.shape)
    print(gradapprox.shape)
    difference=np.linalg.norm(gradapprox-grad)/(np.linalg.norm(grad)+np.linalg.norm(gradapprox))

    if difference > 1e-7:
        print(
            "\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m")
    else:
        print(
            "\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m")

    return difference

X, Y, parameters = gradient_check_n_test_case()

cost, cache = forward_propagation_n(X, Y, parameters)
gradients = backward_propagation_n(X, Y, cache)
difference = gradient_check_n(parameters, gradients, X, Y)