Planar data classification with one hidden layer-1

最新推荐文章于 2023-01-29 11:33:55 发布
gz153016
最新推荐文章于 2023-01-29 11:33:55 发布
阅读量202
点赞数
分类专栏：吴恩达深度学习
本文链接：https://blog.csdn.net/gz153016/article/details/86564732
版权
吴恩达深度学习专栏收录该内容
6 篇文章 0 订阅
订阅专栏
testCases_v2.py
import numpy as np


def layer_sizes_test_case():
    np.random.seed(1)
    X_assess = np.random.randn(5, 3)
    Y_assess = np.random.randn(2, 3)
    return X_assess, Y_assess


def initialize_parameters_test_case():
    n_x, n_h, n_y = 2, 4, 1
    return n_x, n_h, n_y


def forward_propagation_test_case():
    np.random.seed(1)
    X_assess = np.random.randn(2, 3)
    b1 = np.random.randn(4, 1)
    b2 = np.array([[-1.3]])

    parameters = {'W1': np.array([[-0.00416758, -0.00056267],
                                  [-0.02136196, 0.01640271],
                                  [-0.01793436, -0.00841747],
                                  [0.00502881, -0.01245288]]),
                  'W2': np.array([[-0.01057952, -0.00909008, 0.00551454, 0.02292208]]),
                  'b1': b1,
                  'b2': b2}

    return X_assess, parameters


def compute_cost_test_case():
    np.random.seed(1)
    Y_assess = (np.random.randn(1, 3) > 0)
    parameters = {'W1': np.array([[-0.00416758, -0.00056267],
                                  [-0.02136196, 0.01640271],
                                  [-0.01793436, -0.00841747],
                                  [0.00502881, -0.01245288]]),
                  'W2': np.array([[-0.01057952, -0.00909008, 0.00551454, 0.02292208]]),
                  'b1': np.array([[0.],
                                  [0.],
                                  [0.],
                                  [0.]]),
                  'b2': np.array([[0.]])}

    a2 = (np.array([[0.5002307, 0.49985831, 0.50023963]]))

    return a2, Y_assess, parameters


def backward_propagation_test_case():
    np.random.seed(1)
    X_assess = np.random.randn(2, 3)
    Y_assess = (np.random.randn(1, 3) > 0)
    parameters = {'W1': np.array([[-0.00416758, -0.00056267],
                                  [-0.02136196, 0.01640271],
                                  [-0.01793436, -0.00841747],
                                  [0.00502881, -0.01245288]]),
                  'W2': np.array([[-0.01057952, -0.00909008, 0.00551454, 0.02292208]]),
                  'b1': np.array([[0.],
                                  [0.],
                                  [0.],
                                  [0.]]),
                  'b2': np.array([[0.]])}

    cache = {'A1': np.array([[-0.00616578, 0.0020626, 0.00349619],
                             [-0.05225116, 0.02725659, -0.02646251],
                             [-0.02009721, 0.0036869, 0.02883756],
                             [0.02152675, -0.01385234, 0.02599885]]),
             'A2': np.array([[0.5002307, 0.49985831, 0.50023963]]),
             'Z1': np.array([[-0.00616586, 0.0020626, 0.0034962],
                             [-0.05229879, 0.02726335, -0.02646869],
                             [-0.02009991, 0.00368692, 0.02884556],
                             [0.02153007, -0.01385322, 0.02600471]]),
             'Z2': np.array([[0.00092281, -0.00056678, 0.00095853]])}
    return parameters, cache, X_assess, Y_assess


def update_parameters_test_case():
    parameters = {'W1': np.array([[-0.00615039, 0.0169021],
                                  [-0.02311792, 0.03137121],
                                  [-0.0169217, -0.01752545],
                                  [0.00935436, -0.05018221]]),
                  'W2': np.array([[-0.0104319, -0.04019007, 0.01607211, 0.04440255]]),
                  'b1': np.array([[-8.97523455e-07],
                                  [8.15562092e-06],
                                  [6.04810633e-07],
                                  [-2.54560700e-06]]),
                  'b2': np.array([[9.14954378e-05]])}

    grads = {'dW1': np.array([[0.00023322, -0.00205423],
                              [0.00082222, -0.00700776],
                              [-0.00031831, 0.0028636],
                              [-0.00092857, 0.00809933]]),
             'dW2': np.array([[-1.75740039e-05, 3.70231337e-03, -1.25683095e-03,
                               -2.55715317e-03]]),
             'db1': np.array([[1.05570087e-07],
                              [-3.81814487e-06],
                              [-1.90155145e-07],
                              [5.46467802e-07]]),
             'db2': np.array([[-1.08923140e-05]])}
    return parameters, grads


def nn_model_test_case():
    np.random.seed(1)
    X_assess = np.random.randn(2, 3)
    Y_assess = (np.random.randn(1, 3) > 0)
    return X_assess, Y_assess


def predict_test_case():
    np.random.seed(1)
    X_assess = np.random.randn(2, 3)
    parameters = {'W1': np.array([[-0.00615039, 0.0169021],
                                  [-0.02311792, 0.03137121],
                                  [-0.0169217, -0.01752545],
                                  [0.00935436, -0.05018221]]),
                  'W2': np.array([[-0.0104319, -0.04019007, 0.01607211, 0.04440255]]),
                  'b1': np.array([[-8.97523455e-07],
                                  [8.15562092e-06],
                                  [6.04810633e-07],
                                  [-2.54560700e-06]]),
                  'b2': np.array([[9.14954378e-05]])}
    return parameters, X_assess

planar_utils.py


import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets
import sklearn.linear_model


def plot_decision_boundary(model, X, y):
    # Set min and max values and give it some padding
    x_min, x_max = X[0, :].min() - 1, X[0, :].max() + 1
    y_min, y_max = X[1, :].min() - 1, X[1, :].max() + 1
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # Predict the function value for the whole grid
    Z = model(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # Plot the contour and training examples
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.ylabel('x2')
    plt.xlabel('x1')
    plt.scatter(X[0, :], X[1, :], c=y, cmap=plt.cm.Spectral)


def sigmoid(x):
    """
    Compute the sigmoid of x
    Arguments:
    x -- A scalar or numpy array of any size.
    Return:
    s -- sigmoid(x)
    """
    s = 1 / (1 + np.exp(-x))
    return s


def load_planar_dataset():
    np.random.seed(1)
    m = 400  # number of examples
    N = int(m / 2)  # number of points per class
    D = 2  # dimensionality
    X = np.zeros((m, D))  # data matrix where each row is a single example
    Y = np.zeros((m, 1), dtype='uint8')  # labels vector (0 for red, 1 for blue)
    a = 4  # maximum ray of the flower

    for j in range(2):
        ix = range(N * j, N * (j + 1))
        t = np.linspace(j * 3.12, (j + 1) * 3.12, N) + np.random.randn(N) * 0.2  # theta
        r = a * np.sin(4 * t) + np.random.randn(N) * 0.2  # radius
        X[ix] = np.c_[r * np.sin(t), r * np.cos(t)]
        Y[ix] = j

    X = X.T
    Y = Y.T

    return X, Y


def load_extra_datasets():
    N = 200
    noisy_circles = sklearn.datasets.make_circles(n_samples=N, factor=.5, noise=.3)
    noisy_moons = sklearn.datasets.make_moons(n_samples=N, noise=.2)
    blobs = sklearn.datasets.make_blobs(n_samples=N, random_state=5, n_features=2, centers=6)
    gaussian_quantiles = sklearn.datasets.make_gaussian_quantiles(mean=None, cov=0.5, n_samples=N, n_features=2,
                                                                  n_classes=2, shuffle=True, random_state=None)
    no_structure = np.random.rand(N, 2), np.random.rand(N, 2)

    return noisy_circles, noisy_moons, blobs, gaussian_quantiles, no_structure

classification.py

import numpy as np
import matplotlib.pyplot as plt
from testCases_v2 import *
import sklearn
import sklearn.datasets
import sklearn.linear_model
from planar_utils import plot_decision_boundary,sigmoid,load_planar_dataset,load_extra_datasets
np.random.seed(1)
X,Y=load_planar_dataset()
plt.scatter(X[0, :], X[1, :],c=Y.reshape(X[0,:].shape),  s=40, cmap=plt.cm.Spectral);
plt.show()
shape_X=X.shape
shape_Y=Y.shape
m=X.shape[1]

# print("The shape of X is:"+str(shape_X))
# print("The shape of Y is:"+str(shape_Y))
# print("I hava m=%d training example!" %(m))

# clf=sklearn.linear_model.LogisticRegressionCV()
# clf.fit(X.T,Y.T.flatten())
# print(Y)
# print(Y.T)
# print(Y.T.flatten())#搞成的一维数组
#
# plot_decision_boundary(lambda x: clf.predict(x), X, Y.flatten())
# plt.title("Logistic Regression")
# plt.show()
#
# LR_predictions = clf.predict(X.T)
# print("Accuracy of logistic regression: %d" % float(
#     (np.dot(Y, LR_predictions) + np.dot(1 - Y, 1 - LR_predictions)) / float(
#         Y.size) * 100) + "%" + "(percentage of correctly labelled datapoints)")

#Interpretation: The dataset is not linearly separable, so logistic regression doesn't perform well.
# Hopefully a neural network will do better. Let's try this now!

def layer_sizes(X,Y):
       """
        Arguments:
        X -- input dataset of shape (input size, number of examples)
        Y -- labels of shape (output size, number of examples)
        Returns:
        n_x -- the size of the input layer
        n_h -- the size of the hidden layer
        n_y -- the size of the output layer
        """
       n_x=X.shape[0]# the size of input
       n_h=4# 隐藏层
       n_y=Y.shape[0]
       return (n_x, n_h, n_y)
# X_assess,Y_assess=layer_sizes_test_case()
# (n_x,n_h,n_y)=layer_sizes(X_assess,Y_assess)
# print("The size of the input layer is:n_x="+str(n_x))
# print("The size of the hidden layer is:n_h="+str(n_h))
# print("The size of the output layer is:n_y="+str(n_y))

def initalize_parameters(n_x,n_h,n_y):
    """
        Argument:
        n_x -- size of the input layer
        n_h -- size of the hidden layer
        n_y -- size of the output layer
        Returns:
        params -- python dictionary containing your parameters:
                        W1 -- weight matrix of shape (n_h, n_x)
                        b1 -- bias vector of shape (n_h, 1)
                        W2 -- weight matrix of shape (n_y, n_h)
                        b2 -- bias vector of shape (n_y, 1)
        """
    np.random.seed(2) # we set up a seed so that your output matches outs although the initialization is random
    W1=np.random.randn(n_h,n_x)*0.01
    b1=np.zeros((n_h,1))
    W2=np.random.randn(n_y,n_h)*0.01
    b2=np.zeros((n_y,1))
    assert (W1.shape==(n_h,n_x))
    assert (b1.shape==(n_h,1))
    assert (W2.shape==(n_y,n_h))
    assert (b2.shape==(n_y,1))
    parameters={"W1":W1,
                "b1":b1,
                "W2":W2,
                "b2":b2}
    return parameters


# n_x,n_h,n_y=initialize_parameters_test_case()
# parameters=initalize_parameters(n_x,n_h,n_y)
# print("W1="+str(parameters["W1"]))
# print("b1="+str(parameters["b1"]))
# print("W2="+str(parameters["W2"]))
# print("b2="+str(parameters["b2"]))

"""
4.3 - The Loop
Question: Implement forward_propagation().
Instructions:
Look above at the mathematical representation of your classifier.
You can use the function sigmoid(). 
It is built-in (imported) in the notebook.
You can use the function np.tanh(). 
It is part of the numpy library.
The steps you have to implement are:
Retrieve each parameter from the dictionary "parameters" (which is the output of initialize_parameters()) by using parameters[".."].
Implement Forward Propagation.
Compute Z[1],A[1],Z[2]and A[2] (the vector of all your predictions on all the examples in the training set).
Values needed in the backpropagation are stored in "cache". 
The cache will be given as an input to the backpropagation function.
【中文翻译】
问题: 实现 forward_propagation ().
说明:
　　看看你的分类器的数学表达示。
　　您可以使用函数 sigmoid()。它内置在笔记本中 (导入)。
　　您可以使用函数 tanh ()。它是 numpy 库的一部分。
　　您必须实现的步骤包括:
　　　　使用参数 [".."], 从字典 "参数" (即 initialize_parameters () 的输出) 中检索每个参数。
　　　　实现向前传播。计算 Z[1],A[1],Z[2]和 A[2] (在训练集上，所有预测值的向量)。
　　    反向传播中所需的值存储在 "cache 中。cache将作为对反向函数的输入。

"""
# GRADED FUNCTION:forward_propagation
def forward_propagation(X,parameters):
    """
    Argument:
    X -- input data of size (n_x, m)
    parameters -- python dictionary containing your parameters (output of initialization function)
    Returns:
    A2 -- The sigmoid output of the second activation
    cache -- a dictionary containing "Z1", "A1", "Z2" and "A2"
    """
    W1=parameters["W1"]
    b1=parameters["b1"]
    W2=parameters["W2"]
    b2=parameters["b2"]
    # Implement Forward Propagation to calculate A2 (probabilities)
    Z1=np.dot(W1,X)+b1
    A1=np.tanh(Z1)
    Z2=np.dot(W2,A1)+b2
    A2=sigmoid(Z2)
    assert (A2.shape==(1,X.shape[1]))
    cache={"Z1":Z1,
           "A1":A1,
           "Z2":Z2,
           "A2":A2}
    return A2,cache


# X_assess,parameters=forward_propagation_test_case()
# A2,cache=forward_propagation(X_assess,parameters)
# print(np.mean(cache["Z1"]),np.mean(cache["A1"]),np.mean(cache["Z2"]),np.mean(cache["A2"]))

def compute_cost(A2,Y,parameters):
    """
       Computes the cross-entropy cost given in equation (13)
       Arguments:
       A2 -- The sigmoid output of the second activation, of shape (1, number of examples)
       Y -- "true" labels vector of shape (1, number of examples)
       parameters -- python dictionary containing your parameters W1, b1, W2 and b2
       Returns:
       cost -- cross-entropy cost given equation (13)
       """
    m=Y.shape[1]# number of example
    # compute the cross-entropy cost
    logprobs=np.multiply(np.log(A2),Y)+np.multiply(np.log(1-A2),1-Y)
    cost=-1/m*np.sum(logprobs)
    cost=np.squeeze(cost)# makes sure cost is the dimension we expect
                         # E.g turns [[17]] into 17
    assert (isinstance(cost,float))
    return cost

# A2,Y_assess,parameters=compute_cost_test_case()
# print("cost = " + str(compute_cost(A2,Y_assess,parameters)))

"""
Question: Implement the function backward_propagation().
Instructions: Backpropagation is usually the hardest (most mathematical) part in deep learning. 
To help you, here again is the slide from the lecture on backpropagation. 
You'll want to use the six equations on the right of this slide, since you are building a vectorized implementation.
【中文翻译】
问题: 实现函数 backward_propagation ()。
说明: 反向传播通常是深学习中最难 (数学) 的部分。为了帮助你, 这里再次给出反向传播的幻灯片。因为您正在构建一个量化的实现, 所以您需要使用本幻灯片右侧的六公式。
"""
def backward_propagation(parameters,cache,X,Y):
       """
        Implement the backward propagation using the instructions above.
        Arguments:
        parameters -- python dictionary containing our parameters(W1,b1,W2,b2)
        cache -- a dictionary containing "Z1", "A1", "Z2" and "A2".
        X -- input data of shape (2, number of examples)
        Y -- "true" labels vector of shape (1, number of examples)
        Returns:
        grads -- python dictionary containing your gradients with respect to different parameters
        """
       m=X.shape[1]
       W1=parameters["W1"]
       W2=parameters["W2"]
       A1=cache["A1"]
       A2=cache["A2"]

       dZ2=A2-Y
       dW2=1/m*np.dot(dZ2,A1.T)
       db2=1/m*np.sum(dZ2,axis=1,keepdims=True)
       dZ1=np.dot(W2.T,dZ2)*(1-np.power(A1,2))
       dW1=1/m*np.dot(dZ1,X.T)
       db1=1/m*np.sum(dZ1,axis=1,keepdims=True)
       grads={
           "dW1":dW1,
           "db1":db1,
           "dW2":dW2,
           "db2":db2
       }
       return grads

parameters,cache,X_assess,Y_assess=backward_propagation_test_case()
grads=backward_propagation(parameters,cache,X_assess,Y_assess)
# print("dW1="+str(grads["dW1"]))
# print("db1="+str(grads["db1"]))
# print("dW2="+str(grads["dW2"]))
# print("db2"+str(grads["db2"]))
def update_parameters(parameters,grads,learning_rate=1.2):
       """
        Updates parameters using the gradient descent update rule given above
        Arguments:
        parameters -- python dictionary containing your parameters
        grads -- python dictionary containing your gradients
        Returns:
        parameters -- python dictionary containing your updated parameters
        """
       W1=parameters["W1"]
       b1=parameters["b1"]
       W2=parameters["W2"]
       b2=parameters["b2"]

       dW1=grads["dW1"]
       db1=grads["db1"]
       dW2=grads["dW2"]
       db2=grads["db2"]
       W1=W1-learning_rate*dW1
       b1=b1-learning_rate*db1
       W2=W2-learning_rate*dW2
       b2=b2-learning_rate*db2
       parameters={"W1":W1,
                   "b1":b1,
                   "W2":W2,
                   "b2":b2}
       return parameters

# parameters,grads=update_parameters_test_case()
# parameters=update_parameters(parameters,grads)
# print("W1="+str(parameters["W1"]))
# print("b1="+str(parameters["b1"]))
# print("W2="+str(parameters["W2"]))
# print("b2="+str(parameters["b2"]))


def nn_model(X,Y,n_h,num_iterations=1000,print_cost=False):
        """
        Arguments:
        X -- dataset of shape (2, number of examples)
        Y -- labels of shape (1, number of examples)
        n_h -- size of the hidden layer
        num_iterations -- Number of iterations in gradient descent loop
        print_cost -- if True, print the cost every 1000 iterations
        Returns:
        parameters -- parameters learnt by the model. They can then be used to predict.
        """
        np.random.seed(3)
        n_x=layer_sizes(X,Y)[0]
        n_y=layer_sizes(X,Y)[2]

        parameters=initalize_parameters(n_x,n_h,n_y)
        W1=np.random.randn(n_h,n_x)*0.01
        b1=np.zeros((n_h,1))
        W2=np.random.randn(n_y,n_h)*0.01
        b2=np.zeros((n_y,1))
        for i in range(0,num_iterations):
            A2,cache=forward_propagation(X,parameters)
            cost=compute_cost(A2,Y,parameters)
            grads=backward_propagation(parameters,cache,X,Y)
            parameters=update_parameters(parameters,grads,learning_rate=1.2)
            if print_cost and i%100==0:
                print("Cost after iteration %i:%f"%(i,cost))


        return parameters

# X_assess,Y_assess=nn_model_test_case()
# parameters=nn_model(X_assess,Y_assess,4,num_iterations=10000,print_cost=True)
#
# print("W1="+str(parameters["W1"]))
# print("b1"+str(parameters["b1"]))
# print("W2="+str(parameters["W2"]))
# print("b2="+str(parameters["b2"]))


def predict(parameters,X):
        """
        Using the learned parameters, predicts a class for each example in X
        Arguments:
        parameters -- python dictionary containing your parameters
        X -- input data of size (n_x, m)
        Returns
        predictions -- vector of predictions of our model (red: 0 / blue: 1)
        """
        A2,cache=forward_propagation(X,parameters)
        predictions=(A2>0.5)
        return predictions

# parameters,X_assess=predict_test_case()
# predictions=predict(parameters,X_assess)
# print("predictions mean = " + str(np.mean(predictions)))

hidden_layer_sizes = [1, 2, 3, 4, 5, 20, 50]
for i in range(len(hidden_layer_sizes)):
    print("hidden_layers_sizes="+str(hidden_layer_sizes[i]))
    X,Y=load_planar_dataset()
    parameters=nn_model(X,Y,n_h=hidden_layer_sizes[i],num_iterations=1000,print_cost=True)
    plot_decision_boundary(lambda  x: predict(parameters,x.T),X,Y.flatten())
    plt.title("Decision Boundary for hidden layer size" + str(4))
    plt.show()


    # Print accuracy
    predictions = predict(parameters, X)
    print ('Accuracy: %d' % float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.size)*100) + '%')  #把predictions和Y相同的找出来


"""
Accuracy is really high compared to Logistic Regression. The model has learnt the leaf patterns of the flower! Neural networks are able to learn even highly non-linear decision boundaries, unlike logistic regression.
Now, let's try out several hidden layer sizes.
【中文翻译】
与逻辑回归相比, 精确度确实很高。模型学会了花的叶子样式!神经网络可以学习高度非线性的决策边界, 而不像逻辑回归。
现在, 让我们尝试几个不同隐藏层隐含单元的神经网络。
"""
# This may take about 2 minutes to run
gz153016
关注
0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Planar data classification with one hidden layer-1

testCases_v2.pyimport numpy as npdef layer_sizes_test_case(): np.random.seed(1) X_assess = np.random.randn(5, 3) Y_assess = np.random.randn(2, 3) return X_assess, Y_assessdef in...
复制链接

扫一扫
专栏目录