一、链式求导
二、numpy layer和反向传播
全部脚本见笔者github: numpynn.py
import numpy as np
class npLayer():
def __init__(self, n_input, n_out, activation=None, weights=None
,bias=None):
self.weights = weights if weights is not None else np.random.randn(n_input, n_out) * np.sqrt(1 / n_out)
self.bias = bias if bias is not None else np.random.randn(n_out) * 0.1
self.activation = activation
self.last_activation = None
self.error = None
self.delta = None
def activate(self, x):
# 前向传播
r = np.dot(x, self.weights) + self.bias
self.last_activation = self.apply_activation(r)
return self.last_activation
def apply_activation(self, r):
# 计算激活函数的输出
if self.activation is None:
return r
elif self.activation == 'relu':
return np.maximum(r, 0)
elif self.activation == 'tanh':
return np.tanh(r)
elif self.activation == 'sigmoid':
return 1/(1 + np.exp(-r))
return r
def apply_activation_derivative(self, act_r):
# 计算激活函数的导数
if self.activation is None:
return np.ones_like(act_r)
elif self.activation == 'relu':
return (act_r > 0) * 1
elif self.activation == 'tanh':
return 1 - act_r ** 2
elif self.activation == 'sigmoid':
return act_r * (1 - act_r)
return act_r
def __call__(self, x):
return self.activate(x)
反向传播
def backpropagation(self, x, y, learning_rate):
# 反向传播算法实现
## 从后向前计算梯度
output = self.feed_forward(x) # 最后层输出
layer_len = len(self._layers)
for i in reversed(range(layer_len)):
layer = self._layers[i]
# 如果是输出层
if layer == self._layers[-1]:
delta_i = layer.apply_activation_derivative(output)
layer.error = output - y
layer.delta = layer.error * delta_i
else:
next_layer = self._layers[i + 1]
delta_i = layer.apply_activation_derivative(layer.last_activation)
layer.error = np.dot(next_layer.weights, next_layer.delta)
layer.delta = layer.error * delta_i
# 梯度下降
for i in range(layer_len):
layer = self._layers[i]
o_i = np.atleast_2d(x if i == 0 else self._layers[i - 1].last_activation)
layer.weights -= layer.delta * o_i.T * learning_rate
三、MNIST训练及测试
if __name__ == '__main__':
mnistdf = get_ministdata()
te_index = mnistdf.sample(frac=0.8).index.tolist()
mnist_te = mnistdf.loc[te_index, :]
mnist_tr = mnistdf.loc[~mnistdf.index.isin(te_index), :]
x_tr, y_tr = mnist_tr.iloc[:, :-1].values, mnist_tr.iloc[:, -1].values
x_te, y_te = mnist_te.iloc[:, :-1].values, mnist_te.iloc[:, -1].values
print(x_te.shape)
nn = NeuralNetwork()
nn.add_layer(npLayer(784, 128, 'relu'))
nn.add_layer(npLayer(128, 10, 'sigmoid'))
st = time.perf_counter()
mses, accs = nn.train(x_tr, x_te, y_tr, y_te, 0.01, 150)
cost_ = time.perf_counter() - st
print(f'cost: {cost_:.2f}s',accs)
================================================================================
Epoch: # 85, MSE: 0.00713
Accuracy: 93.93 %
================================================================================
Epoch: # 90, MSE: 0.00654
Accuracy: 94.09 %
================================================================================
Epoch: # 95, MSE: 0.00600
Accuracy: 94.27 %
================================================================================
Epoch: # 100, MSE: 0.00558
Accuracy: 94.41 %
================================================================================
Epoch: # 105, MSE: 0.00514
Accuracy: 94.53 %
================================================================================
Epoch: # 110, MSE: 0.00479
Accuracy: 94.65 %
================================================================================
Epoch: # 115, MSE: 0.00447
Accuracy: 94.75 %
================================================================================
Epoch: # 120, MSE: 0.00417
Accuracy: 94.84 %
================================================================================
Epoch: # 125, MSE: 0.00393
Accuracy: 94.93 %
================================================================================
Epoch: # 130, MSE: 0.00370
Accuracy: 94.98 %
================================================================================
Epoch: # 135, MSE: 0.00350
Accuracy: 95.03 %
================================================================================
Epoch: # 140, MSE: 0.00332
Accuracy: 95.08 %
================================================================================
Epoch: # 145, MSE: 0.00316
Accuracy: 95.12 %
================================================================================
Epoch: # 150, MSE: 0.00303
Accuracy: 95.14 %
cost: 1104.11s [0.2034285714285714, 0.5135714285714286, 0.5907142857142857, 0.6798928571428572, 0.74375, 0.7954285714285715
, 0.8364821428571428, 0.863125, 0.8833571428571428, 0.8975178571428571, 0.9077857142857142, 0.9149285714285714, 0.9213214285714286
, 0.9264821428571427, 0.9302142857142858, 0.9336071428571429, 0.9372678571428571, 0.9392857142857143, 0.9408928571428572, 0.9427321428571429
, 0.9440535714285714, 0.94525, 0.9465178571428572, 0.9475178571428572, 0.9483571428571429, 0.9493035714285715, 0.9498214285714286
, 0.9502857142857143, 0.95075, 0.9511607142857144, 0.9513571428571429]