前言
学习神经网络需要的是数学知识,本章将借助numpy实现神经网络。
import numpy as np
from sklearn.datasets import make_moons
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
一、数据准备
X, y = make_moons(n_samples = 1000, noise=0.3)#数据和标签
X.shape, y.shape#1000个数据,数据有两个特征
输出((1000, 2), (1000,))
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)#拆分数据集,训练集:测试集=9:1
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train)
plt.xlabel("X0")
plt.ylabel("X1")
plt.show()
二、初始化参数
#初始化模型参数
nn_cfg = [{"in_features": 2, "out_features": 25, "activation": "relu"},#(2,25)
{"in_features": 25, "out_features": 50, "activation": "relu"},#(25,50)
{"in_features": 50, "out_features": 50, "activation": "relu"},#(50,50)
{"in_features": 50, "out_features": 25, "activation": "relu"},#(50,25)
{"in_features": 25, "out_features": 2, "activation": "sigmoid"}]#(25,2)
#初始化模型参数函数
def init_layers(nn_cfg, seed = 99):
np.random.seed(seed)#保证每次初始化参数一致
params = {}
for idx, layer in enumerate(nn_cfg):
layer_idx = idx + 1
in_features = layer["in_features"]
out_features = layer["out_features"]
params['w' + str(layer_idx)] = np.random.randn(in_features, out_features) * 0.1
params['b' + str(layer_idx)] = np.random.randn(1, out_features) * 0.1
return params
三、激活函数
def sigmoid(z):
return 1/(1+np.exp(-z))
def relu(z):
return np.maximum(0,z)
def sigmoid_derivative(da, z):
sig = sigmoid(z)
return da * sig * (1 - sig)
def relu_derivative(da, z):
dz = np.array(da)
dz[z <= 0] = 0
return dz
四、前向传播
def forward_single_layer(a_prev, w_curr, b_curr, activation="relu"):
z_curr = np.dot(a_prev, w_curr) + b_curr#数据乘以权重加偏置
if activation is "relu":
act = relu
elif activation is "sigmoid":
act = sigmoid
else:
raise Exception('Non-supported activation function')
return act(z_curr), z_curr#激活,不激活
def forward_full_layer(X, params, nn_cfg):
memory = {}
a_curr = X
for idx, layer in enumerate(nn_cfg):
layer_idx = idx + 1
a_prev = a_curr
act = layer["activation"]
w_curr = params["w" + str(layer_idx)]
b_curr = params["b" + str(layer_idx)]
a_curr, z_curr = forward_single_layer(a_prev, w_curr, b_curr, act)
memory["a" + str(idx)] = a_prev
memory["z" + str(layer_idx)] = z_curr
return a_curr, memory
#交叉熵损失函数
def calc_cost(Y_hat, Y):
m = Y_hat.shape[0]
cost = -1 / m * (np.dot(Y.T, np.log(Y_hat)) + np.dot((1 - Y).T, np.log(1 - Y_hat)))
return np.sum(np.diagonal(cost))#这里取对角进行计算损失值
交叉熵损失函数如下。
L
(
y
^
,
y
)
=
−
(
y
log
y
^
+
(
1
−
y
)
log
(
1
−
y
^
)
)
L(\hat{y}, y) = -(y\log\hat{y} + (1 - y)\log(1 - \hat{y}))
L(y^,y)=−(ylogy^+(1−y)log(1−y^))
J
(
W
,
b
)
=
1
m
∑
i
=
1
m
L
(
y
^
(
i
)
,
y
(
i
)
)
J(W, b) = \frac{1}{m}\sum^m_{i=1}L(\hat{y}^{(i)}, y^{(i)})
J(W,b)=m1i=1∑mL(y^(i),y(i))
#根据最后一层输出计算标签
def prob2class(y_hat):
y_pred = np.argmax(y_hat, axis=1)
return y_pred
#onehot编码
def idx2onehot(y):
cls = max(y) + 1
num = len(y)
x = range(num)
labels = np.zeros((num, cls))
labels[x , y] = 1
return labels
#计算准确率
def calc_accuracy(Y_hat, Y, train=True):
Y_hat_ = prob2class(Y_hat)#输出转标签
if train:#训练需要转化为标签,测试不用
Y = prob2class(Y)
return (Y_hat_ == Y).mean()
五、反向传播
def backward_single_layer(da_curr, w_curr, b_curr, z_curr, a_prev, activation="relu"):
m = a_prev.shape[0]
#根据模型配置选择对应的激活函数导数
if activation is "relu":
act_derivative = relu_derivative
elif activation is "sigmoid":
act_derivative = sigmoid_derivative
else:
raise Exception('Non-supported activation function')
dz_curr = act_derivative(da_curr, z_curr)
dw_curr = np.dot(a_prev.T, dz_curr) / m#权重求平均
db_curr = np.sum(dz_curr, axis=0, keepdims=True) / m#偏置求平均
da_prev = np.dot(dz_curr, w_curr.T)
return da_prev, dw_curr, db_curr
交叉熵损失函数对 Y ^ \hat{Y} Y^求导。
∂ L ∂ Y ^ = − ( Y Y ^ − 1 − Y 1 − Y ^ ) \frac{\partial L}{\partial \hat{Y}} = -\big(\frac{Y}{\hat{Y}} - \frac{1 - Y}{1 - \hat{Y}}\big) ∂Y^∂L=−(Y^Y−1−Y^1−Y)
def full_backward_propagation(Y_hat, Y, memory, params, nn_cfg):
grads = {}
m = Y.shape[0]
Y = Y.reshape(Y_hat.shape)
da_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat))#计算交叉熵损失函数对Y_hat求导
for layer_idx_prev, layer in reversed(list(enumerate(nn_cfg))):#从最后一层往前传播
layer_idx_curr = layer_idx_prev + 1
act = layer["activation"]
da_curr = da_prev
a_prev = memory["a" + str(layer_idx_prev)]
z_curr = memory["z" + str(layer_idx_curr)]
w_curr = params["w" + str(layer_idx_curr)]
b_curr = params["b" + str(layer_idx_curr)]
da_prev, dw_curr, db_curr = backward_single_layer(
da_curr, w_curr, b_curr, z_curr, a_prev, act)
grads["dw" + str(layer_idx_curr)] = dw_curr
grads["db" + str(layer_idx_curr)] = db_curr
return grads
六、参数更新
def update(params, grads, nn_cfg, learning_rate):
for layer_idx, layer in enumerate(nn_cfg, 1):
params["w" + str(layer_idx)] -= learning_rate * grads["dw" + str(layer_idx)]
params["b" + str(layer_idx)] -= learning_rate * grads["db" + str(layer_idx)]
return params
七、模型训练
def train(X, Y, nn_cfg, epochs, learning_rate, train=True):
params = init_layers(nn_cfg, 2)
acc_history = []
cost_history = []
for i in range(epochs):
#前向传播
Y_hat, memory = forward_full_layer(X, params, nn_cfg)
#计算准确率
accuracy = calc_accuracy(Y_hat, Y, train=train)
#计算损失值
cost = calc_cost(Y_hat, Y)
acc_history.append(accuracy)
cost_history.append(cost)
#反向传播
grads = full_backward_propagation(Y_hat, Y, memory, params, nn_cfg)
#更新参数
params = update(params, grads, nn_cfg, learning_rate)
return params, acc_history, cost_history
y_train = idx2onehot(y_train)#标签转化,独热编码
params, acc_history, cost_history = train(x_train, y_train, nn_cfg, 10000, 0.01)
八、模型测试
y_hat, _ = forward_full_layer(x_test, params, nn_cfg)
test_accuracy = calc_accuracy(y_hat, y_test, train=False)
print('The accuracy of this test dataset is {}%.'.format(test_accuracy * 100))
输出
The accuracy of this test dataset is 94.0%.
总结
虽然我们使用numpy实现了神经网络,但是我们还是需要进一步验证。我们将在下一章节使用成熟的深度学习框架Pytorch去验证结果。