0-2.浅层神经网络 ------ 双层神经网络

训练集和测试集:GitHub上的Desktop.rar中GitHub链接

一.公式:

前 向 传 播 : z 1 = w 1 ∙ X + b 1 ⇒ a 1 = f ( z ) ⇒ z 2 = w 2 ∙ a 1 + b 2 ⇒ a 2 = δ ( z ) ⇒ 前向传播:z1 = w1\bullet X + b1\Rightarrow a1 = f(z)\Rightarrow z2 = w2\bullet a1 + b2\Rightarrow a2 = \delta(z)\Rightarrow z1=w1X+b1a1=f(z)z2=w2a1+b2a2=δ(z)
⇒ J = − y ∗ l o g ( a 2 ) + ( 1 − y ) ∗ l o g ( 1 − a 2 ) \Rightarrow J = - y * log(a2) + (1 - y) * log(1 - a2) J=ylog(a2)+(1y)log(1a2)
反 向 传 播 : d w 2 = d z 2 ∙ a 1 , d b 2 = d z 2 ⇐ d z 2 = d a 2 ∗ δ ′ ( z 2 ) ⇐ d a 2 = − y a 2 + 1 − y 1 − a 2 反向传播:dw2=dz2\bullet a1,db2=dz2\Leftarrow dz2=da2*\delta '(z2)\Leftarrow da2 = -\frac{y}{a2}+\frac{1-y}{1-a2} dw2=dz2a1,db2=dz2dz2=da2δ(z2)da2=a2y+1a21y
d w 1 = d z 1 ∙ X , d b 1 = d z 1 ⇐ d z 1 = d a 1 ∗ f ′ ( z 1 ) ⇐ d a 1 = w 2 ∙ d z 2 ⇐ dw1=dz1\bullet X,db1 = dz1\Leftarrow dz1=da1*f'(z1)\Leftarrow da1=w2\bullet dz2\Leftarrow dw1=dz1X,db1=dz1dz1=da1f(z1)da1=w2dz2
更 新 参 数 : w 2 = w 2 − l e a r n i n g ∗ d w 2 , b 2 = b 2 − l e a r n i n g ∗ d b 2 更新参数:w2 = w2-learning*dw2,b2=b2-learning*db2 w2=w2learningdw2,b2=b2learningdb2
更 新 参 数 : w 1 = w 1 − l e a r n i n g ∗ d w 1 , b 1 = b 1 − l e a r n i n g ∗ d b 1 更新参数:w1 = w1-learning*dw1,b1=b1-learning*db1 w1=w1learningdw1,b1=b1learningdb1

双层神经网络是重复2次的logistic回归,其反向传播公式的推导过程(数学符号太难打了,就手写了下,大家见谅):

在这里插入图片描述

二.核对矩阵维数:

n:表示样本个数
m:表示每一个样本特征个数
t:表示隐藏层节点个数
X:(m,n)
Y:(1,n)
w1:(t,m)
b1:(t,1)#Python中可广播
w2:(1,t)
b2:(1,1)#Python中可广播
z1:(t,n)
z2:(1,n)
在这里插入图片描述

三.代码

运行结果:

在这里插入图片描述
在这里插入图片描述

完整代码:
import numpy as np
import matplotlib.pyplot as plt
import h5py
def load_dataset():
    train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:])  # 保存的是训练集里面的图像数据(本训练集有209张64x64的图像)。
    train_set_y_orig = np.array(train_dataset["train_set_y"][:])  # 保存的是训练集的图像对应的分类值(【0 | 1】,0表示不是猫,1表示是猫)。
    test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:])  # 保存的是测试集里面的图像数据(本训练集有50张64x64的图像)。
    test_set_y_orig = np.array(test_dataset["test_set_y"][:])  # 保存的是测试集的图像对应的分类值(【0 | 1】,0表示不是猫,1表示是猫)。
    classes = np.array(test_dataset["list_classes"][:])  # 保存的是以bytes类型保存的两个字符串数据,数据为:[b’non-cat’ b’cat’]。
    train_set_y = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))

    print("训练集_图片的维数 : " + str(train_set_x_orig.shape))
    print("训练集_标签的维数 : " + str(train_set_y.shape))
    print("测试集_图片的维数: " + str(test_set_x_orig.shape))
    print("测试集_标签的维数: " + str(test_set_y.shape))
    print()
    train_set_x = train_set_x_orig.reshape((train_set_x_orig.shape[0], -1)).T / 255  # 降维,化为区间(0,1)内的数
    test_set_x = test_set_x_orig.reshape((test_set_x_orig.shape[0], -1)).T / 255  # 降维,化为区间(0,1)内的数
    return train_set_x, train_set_y, test_set_x, test_set_y, classes
def relu(z):#relu函数
    return np.maximum(0,z)
def relu_1(z):#relu函数的导数
    return np.maximum(0, z/np.abs(z))
def tanh(z):  # 用tanh函数作为激活函数
    e1 = np.exp(z)
    e2 = np.exp(-z)
    return (e1 - e2) / (e1 + e2)
def tanh_1(a):  # tanh函数的导数
    return 1 - a ** 2

def sigmoid(z):  # sigmoid函数作为激活函数
    return 1 / (1 + np.exp(-z))

def sigmoid_1(a):
    return a*(1-a)
def rand(n, m, dim):
    w1 = np.random.randn(dim, m)*0.001
    w2 = np.random.randn(1, dim)*0.001  # 随机从产生符合正态分布的数
    b1 = np.zeros((dim, 1), dtype='float')  # 初始化为0
    b2 = np.zeros((1, 1), dtype='float')  # python中的广播
    w = {
        "w1": w1,
        "w2": w2
    }
    b = {
        "b1": b1,
        "b2": b2
    }
    return w, b

def backward(X, Y, w1, w2, b1, b2, learn):
    m = X.shape[1]  # 样本个数
    z1 = np.dot(w1, X) + b1
    a1 = relu(z1)
    z2 = np.dot(w2, a1) + b2
    a2 = sigmoid(z2)
    L = -1 / m * np.sum(Y * np.log(a2) + (1 - Y) * np.log(1 - a2))
    L = np.squeeze(L)  # 去除多余的维度

    da2 = - (np.divide(Y, a2) - np.divide(1 - Y, 1 - a2))
    dz2 = da2 * sigmoid_1(a2)
    dw2 = 1 / m * np.dot(dz2, a1.T)
    db2 = 1 / m * np.sum(dz2, axis=1, keepdims=True)
    da1 = np.dot(w2.T, dz2)
    dz1 = da1 * relu_1(z1)

    dw1 = 1 / m * np.dot(dz1, X.T)
    db1 = 1 / m * np.sum(dz1, axis=1, keepdims=True)

    w1 = w1 - learn * dw1
    w2 = w2 - learn * dw2
    b1 = b1 - learn * db1
    b2 = b2 - learn * db2

    w = {
        "w1": w1,
        "w2": w2
    }
    b = {
        "b1": b1,
        "b2": b2
    }
    return w, b, L


def test(w1,w2,b1 ,b2, X):  # 通过优化后的参数w,b预测出 y的值
    m = X.shape[1]  # 样本个数
    z1 = np.dot(w1, X) + b1
    a1 = relu(z1)
    z2 = np.dot(w2, a1) + b2
    a2 = sigmoid(z2)
    y = np.zeros(shape= (1,m),dtype = float)
    for i in range(a2.shape[1]):
        y[0, i] = 1 if a2[0, i] > 0.5 else 0
    return y

def trainback(w1,w2, b1,b2, X, Y):  # 测试
    y = test(w1,w2,b1 ,b2, X)
    lop = 100 * (1 - np.mean(np.abs(y - Y)))
    print("训练集准确性:{0}%".format(lop))
    return 0
def testback(w1,w2, b1,b2, X, Y):  # 测试
    y = test(w1,w2,b1 ,b2, X)
    lop = 100 * (1 - np.mean(np.abs(y - Y)))
    print("测试集准确性:{0}%".format(lop))
    return 0
if __name__ == "__main__":
    np.random.seed(1)
    learning_rate = 0.0075
    train_set_x, train_set_y, test_set_x, test_set_y, classes = load_dataset()
    n, m, dim, i = train_set_x.shape[1], train_set_x.shape[0], 4, 0#n表示样本个数,m表示特征个数,dim表示节点个数

    w, b = rand(n, m, dim)
    w1, w2 = w["w1"], w["w2"]
    b1, b2 = b["b1"], b["b2"]
    L = []
    for i in range(3000):
        w, b, l = backward(train_set_x, train_set_y, w1, w2, b1, b2, learning_rate)
        w1, w2 = w["w1"], w["w2"]
        b1, b2 = b["b1"], b["b2"]
        if i % 500 == 0:
            L.append(l)
            print("损失函数Loss:", l)
    trainback(w1,w2 ,b1,b2, train_set_x, train_set_y)
    testback(w1, w2, b1, b2, test_set_x, test_set_y)
    # 绘制图
    #plt.scatter(X[0, :], X[1, :], c=np.squeeze(Y), s=40, cmap=plt.cm.Spectral)  # 绘制散点图
    plt.plot(L)
    plt.ylabel('Loss')
    plt.xlabel('Number of training rounds')
    plt.title("learning_rate =" + str(learning_rate))
    plt.show()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值