Numpy神经网络

前言

  学习神经网络需要的是数学知识,本章将借助numpy实现神经网络。

import numpy as np
from sklearn.datasets import make_moons
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

一、数据准备

X, y = make_moons(n_samples = 1000, noise=0.3)#数据和标签
X.shape, y.shape#1000个数据,数据有两个特征

输出((1000, 2), (1000,))

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)#拆分数据集,训练集:测试集=9:1
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train)
plt.xlabel("X0")
plt.ylabel("X1")
plt.show()

在这里插入图片描述

二、初始化参数

#初始化模型参数
nn_cfg = [{"in_features": 2,  "out_features": 25, "activation": "relu"},#(2,25)
           {"in_features": 25,  "out_features": 50, "activation": "relu"},#(25,50)
           {"in_features": 50,  "out_features": 50, "activation": "relu"},#(50,50)
           {"in_features": 50,  "out_features": 25, "activation": "relu"},#(50,25)
         {"in_features": 25,  "out_features": 2, "activation": "sigmoid"}]#(25,2)
#初始化模型参数函数
def init_layers(nn_cfg, seed = 99):
    np.random.seed(seed)#保证每次初始化参数一致
    params = {}
    for idx, layer in enumerate(nn_cfg):
        layer_idx = idx + 1
        in_features = layer["in_features"]
        out_features = layer["out_features"]
        params['w' + str(layer_idx)] = np.random.randn(in_features, out_features) * 0.1
        params['b' + str(layer_idx)] = np.random.randn(1, out_features) * 0.1
    return params

三、激活函数

在这里插入图片描述

def sigmoid(z):
    return 1/(1+np.exp(-z))

def relu(z):
    return np.maximum(0,z)

def sigmoid_derivative(da, z):
    sig = sigmoid(z)
    return da * sig * (1 - sig)

def relu_derivative(da, z):
    dz = np.array(da)
    dz[z <= 0] = 0
    return dz

四、前向传播

def forward_single_layer(a_prev, w_curr, b_curr, activation="relu"):
    z_curr = np.dot(a_prev, w_curr) + b_curr#数据乘以权重加偏置
    if activation is "relu":
        act = relu
    elif activation is "sigmoid":
        act = sigmoid
    else:
        raise Exception('Non-supported activation function')
    return act(z_curr), z_curr#激活,不激活
def forward_full_layer(X, params, nn_cfg):
    memory = {}
    a_curr = X
    for idx, layer in enumerate(nn_cfg):
        layer_idx = idx + 1
        a_prev = a_curr
        act = layer["activation"]
        w_curr = params["w" + str(layer_idx)]
        b_curr = params["b" + str(layer_idx)]
        a_curr, z_curr = forward_single_layer(a_prev, w_curr, b_curr, act)
        memory["a" + str(idx)] = a_prev
        memory["z" + str(layer_idx)] = z_curr
    return a_curr, memory
#交叉熵损失函数
def calc_cost(Y_hat, Y):
    m = Y_hat.shape[0]
    cost = -1 / m * (np.dot(Y.T, np.log(Y_hat)) + np.dot((1 - Y).T, np.log(1 - Y_hat)))
    return np.sum(np.diagonal(cost))#这里取对角进行计算损失值

  交叉熵损失函数如下。

L ( y ^ , y ) = − ( y log ⁡ y ^ + ( 1 − y ) log ⁡ ( 1 − y ^ ) ) L(\hat{y}, y) = -(y\log\hat{y} + (1 - y)\log(1 - \hat{y})) L(y^,y)=(ylogy^+(1y)log(1y^))
J ( W , b ) = 1 m ∑ i = 1 m L ( y ^ ( i ) , y ( i ) ) J(W, b) = \frac{1}{m}\sum^m_{i=1}L(\hat{y}^{(i)}, y^{(i)}) J(W,b)=m1i=1mL(y^(i),y(i))

#根据最后一层输出计算标签
def prob2class(y_hat):
    y_pred = np.argmax(y_hat, axis=1)
    return y_pred
#onehot编码
def idx2onehot(y):
    cls = max(y) + 1
    num = len(y)
    x = range(num)
    labels = np.zeros((num, cls))
    labels[x , y] = 1
    return labels
#计算准确率
def calc_accuracy(Y_hat, Y, train=True):
    Y_hat_ = prob2class(Y_hat)#输出转标签
    if train:#训练需要转化为标签,测试不用
        Y = prob2class(Y)
    return (Y_hat_ == Y).mean()

五、反向传播

def backward_single_layer(da_curr, w_curr, b_curr, z_curr, a_prev, activation="relu"):
    m = a_prev.shape[0]
    #根据模型配置选择对应的激活函数导数
    if activation is "relu":
        act_derivative = relu_derivative
    elif activation is "sigmoid":
        act_derivative = sigmoid_derivative
    else:
        raise Exception('Non-supported activation function')
    dz_curr = act_derivative(da_curr, z_curr)
    dw_curr = np.dot(a_prev.T, dz_curr) / m#权重求平均
    db_curr = np.sum(dz_curr, axis=0, keepdims=True) / m#偏置求平均
    da_prev = np.dot(dz_curr, w_curr.T)
    return da_prev, dw_curr, db_curr

  交叉熵损失函数对 Y ^ \hat{Y} Y^求导。

∂ L ∂ Y ^ = − ( Y Y ^ − 1 − Y 1 − Y ^ ) \frac{\partial L}{\partial \hat{Y}} = -\big(\frac{Y}{\hat{Y}} - \frac{1 - Y}{1 - \hat{Y}}\big) Y^L=(Y^Y1Y^1Y)

def full_backward_propagation(Y_hat, Y, memory, params, nn_cfg):
    grads = {}
    m = Y.shape[0]
    Y = Y.reshape(Y_hat.shape)
    da_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat))#计算交叉熵损失函数对Y_hat求导
    for layer_idx_prev, layer in reversed(list(enumerate(nn_cfg))):#从最后一层往前传播
        layer_idx_curr = layer_idx_prev + 1
        act = layer["activation"]
        da_curr = da_prev
        a_prev = memory["a" + str(layer_idx_prev)]
        z_curr = memory["z" + str(layer_idx_curr)]
        w_curr = params["w" + str(layer_idx_curr)]
        b_curr = params["b" + str(layer_idx_curr)]
        da_prev, dw_curr, db_curr = backward_single_layer(
            da_curr, w_curr, b_curr, z_curr, a_prev, act)
        grads["dw" + str(layer_idx_curr)] = dw_curr
        grads["db" + str(layer_idx_curr)] = db_curr
    return grads

六、参数更新

def update(params, grads, nn_cfg, learning_rate):
    for layer_idx, layer in enumerate(nn_cfg, 1):
        params["w" + str(layer_idx)] -= learning_rate * grads["dw" + str(layer_idx)]        
        params["b" + str(layer_idx)] -= learning_rate * grads["db" + str(layer_idx)]
    return params

七、模型训练

def train(X, Y, nn_cfg, epochs, learning_rate, train=True):
    params = init_layers(nn_cfg, 2)
    acc_history = []
    cost_history = []
    for i in range(epochs):
        #前向传播
        Y_hat, memory = forward_full_layer(X, params, nn_cfg)
        #计算准确率
        accuracy = calc_accuracy(Y_hat, Y, train=train)
        #计算损失值
        cost = calc_cost(Y_hat, Y)
        acc_history.append(accuracy)
        cost_history.append(cost)
        #反向传播
        grads = full_backward_propagation(Y_hat, Y, memory, params, nn_cfg)
        #更新参数
        params = update(params, grads, nn_cfg, learning_rate)    
    return params, acc_history, cost_history
y_train = idx2onehot(y_train)#标签转化,独热编码
params, acc_history, cost_history = train(x_train, y_train, nn_cfg, 10000, 0.01)

八、模型测试

y_hat, _ = forward_full_layer(x_test, params, nn_cfg)
test_accuracy = calc_accuracy(y_hat, y_test, train=False)
print('The accuracy of this test dataset is {}%.'.format(test_accuracy * 100))

输出

The accuracy of this test dataset is 94.0%.

总结

  虽然我们使用numpy实现了神经网络,但是我们还是需要进一步验证。我们将在下一章节使用成熟的深度学习框架Pytorch去验证结果。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值