吴恩达神经网络学习-L1W4作业1

最新推荐文章于 2024-07-18 21:11:52 发布
MarkAssassin
最新推荐文章于 2024-07-18 21:11:52 发布
阅读量64
点赞数
文章标签：神经网络学习人工智能
本文链接：https://blog.csdn.net/MarkAssassin/article/details/132884815
版权
构建一个任意层数的深度神经网络
使用ReLU等非线性单位来改善模型
建立更深的神经网络（具有1个以上的隐藏层）
实现一个易于使用的神经网络类
import numpy as np
import matplotlib.pyplot as plt
import h5py
from testCases_v2 import * #提供了一些测试用例来评估函数的正确性
from dnn_utils_v2 import sigmoid, sigmoid_backward, relu, relu_backward #为此笔记本提供了一些必要的函数


plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

np.random.seed(1) #使所有随机函数调用保持一致

#初始化 写两个辅助函数用来初始化模型的参数。 第一个函数将用于初始化两层模型的参数。 第二个将把初始化过程推广到L层模型上
#2层
# 模型的结构为：LINEAR -> RELU -> LINEAR -> SIGMOID。
# 随机初始化权重矩阵。 确保准确的维度，使用np.random.randn（shape）* 0.01。
# 将偏差初始化为0。 使用np.zeros（shape）

def initialize_parameters(n_x,n_y,n_h):
    # n_x -- size of the input layer
    # n_h -- size of the hidden layer
    # n_y -- size of the output layer
    np.random.seed(1)

    W1=np.random.randn(n_h,n_x)*0.01
    b1=np.zeros((n_h,1))
    W2=np.random.randn(n_y,n_h)*0.01
    b2=np.zeros((n_y,1))

    assert (W1.shape == (n_h, n_x))
    assert (b1.shape == (n_h, 1))
    assert (W2.shape == (n_y, n_h))
    assert (b2.shape == (n_y, 1))

    parameters={
        "W1":W1,
        "b1":b1,
        "W2":W2,
        "b2":b2
    }

    return parameters

parameters = initialize_parameters(2,1,2)
# print("W1 = " + str(parameters["W1"]))
# print("b1 = " + str(parameters["b1"]))
# print("W2 = " + str(parameters["W2"]))
# print("b2 = " + str(parameters["b2"]))


# 我们将在不同的layer_dims变量中存储n_l，即不同层中的神经元数。例如，上周“二维数据分类模型”的layer_dims为[2,4,1]：
# 即有两个输入，一个隐藏层包含4个隐藏单元，一个输出层包含1个输出单元。(这里相当于是layer0、layer1、layer2)，虽然layer_dims的长度为3，但层数L=2
# 因此，W1的维度为（4,2），b1的维度为（4,1），W2的维度为（1,4），而b2的维度为（1,1）。现在你将把它应用到L层！
def initialize_parameters_deep(layer_dims):
    np.random.seed(3)
    parameters = {}
    for i in range(1,len(layer_dims)):
        parameters["W"+str(i)]=np.random.randn(layer_dims[i],layer_dims[i-1])*0.01 #注，这里使用[i]和[i-1],使用[i+1]和[i]会超限
        parameters["b"+str(i)]=np.zeros((layer_dims[i],1))

        assert (parameters['W' + str(i)].shape == (layer_dims[i], layer_dims[i-1]))
        assert (parameters['b' + str(i)].shape == (layer_dims[i], 1))

    return parameters

parameters = initialize_parameters_deep([5,4,3])
# print("W1 = " + str(parameters["W1"]))
# print("b1 = " + str(parameters["b1"]))
# print("W2 = " + str(parameters["W2"]))
# print("b2 = " + str(parameters["b2"]))


#正向传播模块
#1.线性正向
def linear_forward(A,W,b):
    Z=np.dot(W,A)+b

    assert (Z.shape == (W.shape[0], A.shape[1]))
    cache=(A,W,b)

    return Z,cache

A,W,b=linear_forward_test_case()
Z,linear_cache=linear_forward(A,W,b)
# print("Z = " + str(Z))

#2.正向线性激活
#我们将实现一个函数用以执行LINEAR正向步骤和ACTIVATION正向步骤。
#把两个函数（线性和激活）组合为一个函数（LINEAR-> ACTIVATION）
def linear_activation_forward(A_prev,W,b,activation):
    if activation=="sigmoid":
        Z,linear_cache=linear_forward(A_prev,W,b)
        A,activation_cache=sigmoid(Z)
    elif activation=="relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A,activation_cache=relu(Z)

    assert (A.shape == (W.shape[0], A_prev.shape[1]))
    cache=(linear_cache,activation_cache)

    return A,cache

A_prev, W, b = linear_activation_forward_test_case()

A, linear_activation_cache = linear_activation_forward(A_prev, W, b, activation = "sigmoid")
# print("With sigmoid: A = " + str(A))
# print(linear_activation_cache[0])
# print(linear_activation_cache[1])
#
A, linear_activation_cache = linear_activation_forward(A_prev, W, b, activation = "relu")
# print("With ReLU: A = " + str(A))


#3.L层模型
#需要一个函数来复制前一个函数（使用RELU的linear_activation_forward）L-1次，以及复制带有SIGMOID的linear_activation_forward。
# 使用你先前编写的函数
# 使用for循环复制[LINEAR-> RELU]（L-1）次
# 不要忘记在“cache”列表中更新缓存。
def L_model_forward(X,parameters):
    caches=[]
    L=len(parameters)//2
    A=X
    for l in range(1,L):
        A_prev=A
        W=parameters["W"+str(l)]
        b=parameters["b"+str(l)]
        A,linear_activation_cache=linear_activation_forward(A_prev,W,b,"relu")
        caches.append(linear_activation_cache)
    WL=parameters["W"+str(L)]
    bL=parameters["b"+str(L)]
    Y_hat,cache=linear_activation_forward(A,WL,bL,"sigmoid")
    caches.append(cache)

    assert (Y_hat.shape == (1, X.shape[1]))

    return Y_hat,caches

X,parameters=L_model_forward_test_case()
Y_hat,caches=L_model_forward(X,parameters)
# print("Y_hat = " + str(Y_hat))
# print("Length of caches list = " + str(len(caches)))#它接受输入X并输出包含预测的行向量AL。 它还将所有中间值记录在"caches"中以计算预测的损失值

#损失函数
def compute_cost(Y_hat,Y):
    m=Y.shape[1]
    cost=-1/m*np.sum((Y*np.log(Y_hat)+(1-Y)*np.log(1-Y_hat)),axis=1,keepdims=True)
    cost=np.squeeze(cost)
    return cost
Y, Y_hat= compute_cost_test_case()
# print("cost = " + str(compute_cost(Y_hat, Y)))

#反向传播模块
#1.线性反向
def linear_backward(dZ,cache):
    # Arguments:
    # dZ -- Gradient of the cost with respect to the linear output (of current layer l)
    # cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layer
    m=dZ.shape[1]
    A_prev=cache[0]
    W=cache[1]

    dW=1/m*np.dot(dZ,A_prev.T)
    db=1/m*np.sum(dZ,axis=1,keepdims=True)
    dA_prev=np.dot(W.T,dZ)

    # assert (dA_prev.shape == A_prev.shape)
    # assert (dW.shape == W.shape)
    # assert (db.shape == b.shape)

    return dA_prev,dW,db

dZ, linear_cache = linear_backward_test_case()
dA_prev, dW, db = linear_backward(dZ, linear_cache)
# print ("dA_prev = "+ str(dA_prev))
# print ("dW = " + str(dW))
# print ("db = " + str(db))

#2.反向线性激活
#创建一个合并两个辅助函数的函数：linear_backward 和反向步骤的激活 linear_activation_backward
def linear_activation_backward(dA,cache,activation):
    # Arguments:
    # dA -- post-activation gradient for current layer l
    # cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    # activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
    # dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    linear_cache,activation_cache=cache
    if activation=="sigmoid":
        dZ=sigmoid_backward(dA,activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    elif activation=="relu":
        dZ=relu_backward(dA,activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    return dA_prev,dW,db

AL, linear_activation_cache = linear_activation_backward_test_case()

dA_prev, dW, db = linear_activation_backward(AL, linear_activation_cache, activation = "sigmoid")
# print ("sigmoid:")
# print ("dA_prev = "+ str(dA_prev))
# print ("dW = " + str(dW))
# print ("db = " + str(db) + "\n")

dA_prev, dW, db = linear_activation_backward(AL, linear_activation_cache, activation = "relu")
# print ("relu:")
# print ("dA_prev = "+ str(dA_prev))
# print ("dW = " + str(dW))
# print ("db = " + str(db))

#3.反向L层模型
def L_model_backward(Y_hat,Y,caches):
    grads={}
    L=len(caches)
    m=Y_hat.shape[1]
    Y=Y.reshape(Y_hat.shape) # after this line, Y is the same shape as AL
    dAL = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat))  # derivative of cost with respect to AL
    current_cache=caches[L-1]
    grads["dA"+str(L)],grads["dW"+str(L)],grads["db"+str(L)]=linear_activation_backward(dAL,current_cache,"sigmoid")
    for l in reversed(range(L-1)):
        # Inputs: "grads["dA" + str(l + 2)], caches". Outputs: "grads["dA" + str(l + 1)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)]
        current_cache=caches[l]#l=L-2-->0
        dA_prev_temp,dW_temp,db_temp=linear_activation_backward(grads["dA"+str(l+2)],current_cache,"relu") #"dA"+str(l+2)]="dA"+str(L)!=dAL
        grads["dA"+str(l+1)]=dA_prev_temp
        grads["dW"+str(l+1)]=dW_temp
        grads["db"+str(l+1)]=db_temp
    return grads

AL, Y_assess, caches = L_model_backward_test_case()
grads = L_model_backward(AL, Y_assess, caches)
# print ("dW1 = "+ str(grads["dW1"]))
# print ("db1 = "+ str(grads["db1"]))
# print ("dA1 = "+ str(grads["dA1"]))

#更新参数
def update_parameters(parameters,grads,learning_rate):
    L=len(parameters)//2
    for l in range(L):
        parameters["W"+str(l+1)]=parameters["W"+str(l+1)]-learning_rate*grads["dW"+str(l+1)]
        parameters["b"+str(l+1)]=parameters["b"+str(l+1)]-learning_rate*grads["db"+str(l+1)]
    return parameters

parameters, grads = update_parameters_test_case()
parameters = update_parameters(parameters, grads, 0.1)

print ("W1 = "+ str(parameters["W1"]))
print ("b1 = "+ str(parameters["b1"]))
print ("W2 = "+ str(parameters["W2"]))
print ("b2 = "+ str(parameters["b2"]))



# def two_layer_model(X,Y,layers_dims,learning_rate=0.0075,num_iterations=3000,print_cost=False):
#     costs=[]
#     grads={}
#     n_x, n_h, n_y=layers_dims
#     parameters=initialize_parameters(n_x, n_y, n_h)
#     W1=parameters["W1"]
#     b1=parameters["b1"]
#     W2=parameters["W2"]
#     b2=parameters["b2"]
#     for i in range(num_iterations):
#         A,cache1=linear_activation_forward(X,W1,b1,"relu")
#         Y_hat,cache2=linear_activation_forward(A,W2,b2,"sigmoid")
#         cost=compute_cost(Y_hat,Y)
#         # Backward propagation. Inputs: "dA2, cache2, cache1". Outputs: "dA1, dW2, db2; also dA0 (not used), dW1, db1".
#         dA2 = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat))
#         dA1,dW2,db2=linear_activation_backward(dA2,cache2,"sigmoid")
#         dA0,dW1,db1=linear_activation_backward(dA1,cache1,"relu")
#
#         grads['dW1'] = dW1
#         grads['db1'] = db1
#         grads['dW2'] = dW2
#         grads['db2'] = db2
#
#         parameters=update_parameters(parameters,grads,learning_rate)
#
#         W1 = parameters["W1"]
#         b1 = parameters["b1"]
#         W2 = parameters["W2"]
#         b2 = parameters["b2"]
#
#         if print_cost and i%100==0:
#             print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
#         if print_cost and i % 100 == 0:
#             costs.append(cost)
#
#     plt.plot(np.squeeze(costs))
#     plt.ylabel('cost')
#     plt.xlabel('iterations (per tens)')
#     plt.title("Learning rate =" + str(learning_rate))
#     plt.show()
#
#     return parameters
#