吴恩达神经网络与深度学习 第三周课后作业

import numpy as np
import matplotlib.pyplot as plt
from testCases import *
from planar_utils import *
import sklearn
import sklearn.datasets
import sklearn.linear_model
from planar_utils import plot_decision_boundary, sigmoid, load_planar_dataset, load_extra_datasets


def tanh(z):
    a = ( np.exp(z) - np.exp(-z) ) / ( np.exp(z) + np.exp(-z) )
    return a


"""
本例子中不需要分训练集与数据集,因为分布不同,测试集必定准确率非常低
"""
def split_train_test(X, Y):
    X_train = X
    y_train = Y

    return X_train, y_train


def initialize_parameters(n_x, n_h, n_y):
    np.random.seed(2)

    w1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros(shape=(n_h, 1))
    w2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.zeros(shape=(n_y, 1))

    parameters = {
        'w1': w1,
        'b1': b1,
        'w2': w2,
        'b2': b2,
    }
    return parameters


# 正向传播
def forward_propagation(w1, w2, b1, b2, X, y):
    m = X.shape[1]

    z1 = np.dot(w1, X) + b1
    a1 = tanh(z1)
    z2 = np.dot(w2, a1) + b2
    a2 = sigmoid(z2)
    cost_function = (-1/m) * np.sum(y * np.log(a2) + (1-y) * np.log(1-a2))

    cache = {
        "z1": z1,
        "a1": a1,
        "z2": z2,
        "a2": a2,
        'cost': cost_function,
    }

    return a2, cache


# 反向传播
def backward_propagation(cache, w1, w2, X, y):
    m = X.shape[1]

    z1 = cache['z1']
    a1 = cache['a1']
    z2 = cache['z2']
    a2 = cache['a2']

    dz2 = a2 - y
    dw2 = (1/m) * np.dot(dz2, a1.T)
    db2 = (1/m) * np.sum(dz2, axis=1, keepdims=True)

    dz1 = np.dot(w2.T, dz2) * (1-a1**2)
    dw1 = (1/m) * np.dot(dz1, X.T)
    db1 = (1/m) * np.sum(dz1, axis=1, keepdims=True)
    parameter = {
        'dw1': dw1,
        'dw2': dw2,
        'db1': db1,
        'db2': db2,
    }

    return parameter





def layer_sizes(X, Y):
    """
    参数:
     X - 输入数据集,维度为(输入的数量,训练/测试的数量)
     Y - 标签,维度为(输出的数量,训练/测试数量)

    返回:
     n_x - 输入层的数量
     n_h - 隐藏层的数量
     n_y - 输出层的数量
     也就是:n_0 n_1 n_2
    """
    n_x = X.shape[0]# 输入层
    n_y = Y.shape[0] # 输出层

    return n_x, n_y


def predict(w1, w2, b1, b2, X, y):
    a2, cache = forward_propagation(w1, w2, b1, b2, X, y)
    predictions = np.round(a2)

    return predictions


def nm_model_train(X, y, n_h, iteration_times, learning_rate, print_cost = False):
    # n_x:输入层x的特征数,n_h:隐藏层节点数,n_y:输出层节点数
    n_x, n_y = layer_sizes(X, Y)
    # 初始化参数
    init_param = initialize_parameters(n_x, n_h, n_y)
    w1 = init_param['w1']
    w2 = init_param['w2']
    b1 = init_param['b1']
    b2 = init_param['b2']
    # print(w1.shape, w2.shape, b1.shape, b2.shape)

    for i in range(iteration_times):
        # 测试正向传播
        a2, cache = forward_propagation(w1, w2, b1, b2, X, y)

        # 测试反向传播
        parameter = backward_propagation(cache, w1, w2, X, y)

        # 更新参数
        w1 = w1 - learning_rate * parameter['dw1']
        w2 = w2 - learning_rate * parameter['dw2']
        b1 = b1 - learning_rate * parameter['db1']
        b2 = b2 - learning_rate * parameter['db2']

        if(i % 999 == 0) and (print_cost == True):
            print('第', i+1, '次迭代损失函数为:', cache['cost'])

    return (w1, w2, b1, b2)


if __name__ == "__main__":
    np.random.seed(1)

    X, Y = load_planar_dataset()
    """
    导入数据集可视化
    plt.scatter(X[0, :], X[1, :], c=Y, s=40, cmap=plt.cm.Spectral)
    plt.show()
    print(X.shape, Y.shape) # (2, 400)   (1, 400)
    """

    """
    逻辑回归效果测试 
    clf = sklearn.linear_model.LogisticRegressionCV()
    clf.fit(X.T, Y.T)
    plot_decision_boundary(lambda x: clf.predict(x), X, Y)
    plt.title("Logistic Regression")
    plt.show()
    LR_predictions = clf.predict(X.T)
    print("逻辑回归的准确性: %d " % float((np.dot(Y, LR_predictions) +
                                   np.dot(1 - Y, 1 - LR_predictions)) / float(Y.size) * 100) +
          "% " + "(正确标记的数据点所占的百分比)")
    """


    # 训练集、测试集划分
    X_train, y_train = split_train_test(X, Y)
    # 开始优化
    w1, w2, b1, b2 = nm_model_train(X_train, y_train, n_h=4, iteration_times=10000, learning_rate=1.2, print_cost=True)

    predictions = predict(w1, w2, b1, b2, X_train, y_train)
    print('准确率: %d' % float((np.dot(y_train, predictions.T) + np.dot(1 - y_train, 1 - predictions.T)) / float(y_train.size) * 100) + '%')

 

planar_utils.py

import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets
import sklearn.linear_model

def plot_decision_boundary(model, X, y):
    # Set min and max values and give it some padding
    x_min, x_max = X[0, :].min() - 1, X[0, :].max() + 1
    y_min, y_max = X[1, :].min() - 1, X[1, :].max() + 1
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # Predict the function value for the whole grid
    Z = model(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # Plot the contour and training examples
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.ylabel('x2')
    plt.xlabel('x1')
    plt.scatter(X[0, :], X[1, :], c=y, cmap=plt.cm.Spectral)


def sigmoid(x):
    s = 1/(1+np.exp(-x))
    return s

def load_planar_dataset():
    np.random.seed(1)
    m = 400 # number of examples
    N = int(m/2) # number of points per class
    D = 2 # dimensionality
    X = np.zeros((m,D)) # data matrix where each row is a single example
    Y = np.zeros((m,1), dtype='uint8') # labels vector (0 for red, 1 for blue)
    a = 4 # maximum ray of the flower

    for j in range(2):
        ix = range(N*j,N*(j+1))
        t = np.linspace(j*3.12,(j+1)*3.12,N) + np.random.randn(N)*0.2 # theta
        r = a*np.sin(4*t) + np.random.randn(N)*0.2 # radius
        X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
        Y[ix] = j

    X = X.T
    Y = Y.T

    return X, Y

def load_extra_datasets():  
    N = 200
    noisy_circles = sklearn.datasets.make_circles(n_samples=N, factor=.5, noise=.3)
    noisy_moons = sklearn.datasets.make_moons(n_samples=N, noise=.2)
    blobs = sklearn.datasets.make_blobs(n_samples=N, random_state=5, n_features=2, centers=6)
    gaussian_quantiles = sklearn.datasets.make_gaussian_quantiles(mean=None, cov=0.5, n_samples=N, n_features=2, n_classes=2, shuffle=True, random_state=None)
    no_structure = np.random.rand(N, 2), np.random.rand(N, 2)

    return noisy_circles, noisy_moons, blobs, gaussian_quantiles, no_structure

工具utils包、实现思路、数据集参考:https://blog.csdn.net/u013733326/article/details/79702148

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值