目录
一.ANN的基本要素
(1)常用激活函数
(2)常用学习规则
- Hebb规则
- 误差修正法学习算法 (如:BP算法)
- 胜者为王(Winner-Take-All)学习规则
(3)神经元之间连接方式
<1>前缀网络
<2>反馈网络
二.BP神经网络原理及流程
1.原理
BP(Back Propagation)神经网络的学习过程由信号的正向传播与误差的反向传播两个过程组成。正向传播时,输入样本从输入层传入,经隐层逐层处理后,传向输出层。若输出层的实际输出与期望输出不符,则转向误差的反向传播阶段。误差的反向传播是将输出误差以某种形式通过隐层向输入层逐层反传,并将误差分摊给各层的所有单元,从而获得各层单元的误差信号,此误差信号即作为修正各单元权值的依据。
2.流程
1、网络的初始化
假设输入层的节点个数为,隐含层的节点个数为
,输出层的节点个数为
。输入层到隐含层的权重
,隐含层到输出层的权重为
,输入层到隐含层的偏置为
,隐含层到输出层的偏置为
。学习速率为
,激励函数为
。其中激励函数为
取Sigmoid函数。形式为:
2、隐含层的输出
如上面的三层BP网络所示,隐含层的输出为
3、输出层的输出
4、误差的计算
其中为期望输出。我们记
,则
可以表示为
以上公式中,,
,
。
5、权值的更新
权值的更新公式为:
三.实现
1.正向传播
2.反向传播
四.案例
1.案例1
# -*- coding: utf-8 -*-
import numpy as np
from sklearn import datasets, linear_model
import matplotlib.pyplot as plt
class Config:
nn_input_dim = 2 #数组输入的维度是2(x,y两个坐标当然是二维啊)
nn_output_dim = 2#数组输出的维度是2(分为两类当然是二维啊)
epsilon = 0.01 # 梯度下降学习步长
reg_lambda = 0.01 # 修正的指数?
def generate_data():
np.random.seed(0)#伪随机数的种子0,当然也可以是1,2啊
X, y = datasets.make_moons(200, noise=0.20)#产生200个数据,噪声误差为0.2
return X, y
def visualize(X, y, model):
plot_decision_boundary(lambda x:predict(model,x), X, y)#好好看这个代码,函数名字做参数哦
plt.title("Logistic Regression")
def plot_decision_boundary(pred_func, X, y):
#把X的第一列的最小值减掉0.5赋值给x_min,把X的第一列的最大值加0.5赋值给x_max
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.01
# 根据最小最大值和一个网格距离生成整个网格,就是在图上细分好多个点,画分类边界的时候要用这些点
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)
plt.show()
def predict(model, x):
#这是字典啊
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
z1 = x.dot(W1) + b1# 输入层向隐藏层正向传播
a1 = np.tanh(z1) # 隐藏层激活函数使用tanh = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
z2 = a1.dot(W2) + b2# 隐藏层向输出层正向传播
exp_scores = np.exp(z2)#这两步表示输出层的激活函数为softmax函数哦
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
return np.argmax(probs, axis=1)
def build_model(X, y, nn_hdim, num_passes=20000, print_loss=False):
num_examples = len(X)
np.random.seed(0)#初始化权值和偏置
W1 = np.random.randn(Config.nn_input_dim, nn_hdim) / np.sqrt(Config.nn_input_dim)
b1 = np.zeros((1, nn_hdim))
W2 = np.random.randn(nn_hdim, Config.nn_output_dim) / np.sqrt(nn_hdim)
b2 = np.zeros((1, Config.nn_output_dim))
model = {}
for i in range(0, num_passes):
z1 = X.dot(W1) + b1# 输入层向隐藏层正向传播
a1 = np.tanh(z1)# 隐藏层激活函数使用tanh = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
z2 = a1.dot(W2) + b2# 隐藏层向输出层正向传播
exp_scores = np.exp(z2)#这两步表示输出层的激活函数为softmax函数哦
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
delta3 = probs
#下面这才是delta3,为损失函数对z2求偏导数,y-y^
delta3[range(num_examples), y] -= 1
dW2 = (a1.T).dot(delta3)#损失函数对w2的偏导数
db2 = np.sum(delta3, axis=0, keepdims=True)#损失函数对b2的偏导数
delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2))#损失函数对z1的偏导数
dW1 = np.dot(X.T, delta2)#损失函数对w1的偏导数
db1 = np.sum(delta2, axis=0)#损失函数对b1的偏导数
#个人认为下面两行代码完全没有必要存在
dW2 += Config.reg_lambda * W2#w2梯度增量的修正 屁话
dW1 += Config.reg_lambda * W1#w1梯度增量的修正 屁话
#更新权值和偏置
W1 += -Config.epsilon * dW1
b1 += -Config.epsilon * db1
W2 += -Config.epsilon * dW2
b2 += -Config.epsilon * db2
model = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
return model
def main():
X, y = generate_data()
model = build_model(X, y, 8)
visualize(X, y, model)
if __name__ == "__main__":
main()
2.案例2
import math
import random
random.seed(0)
def rand(a, b):
return (b - a) * random.random() + a
def make_matrix(m, n, fill=0.0):
mat = []
for i in range(m):
mat.append([fill] * n)
return mat
def sigmoid(x):
return 1.0 / (1.0 + math.exp(-x))
def sigmoid_derivative(x):
return x * (1 - x)
class BPNeuralNetwork:
def __init__(self):
self.input_n = 0
self.hidden_n = 0
self.output_n = 0
self.input_cells = []
self.hidden_cells = []
self.output_cells = []
self.input_weights = []
self.output_weights = []
self.input_correction = []
self.output_correction = []
def setup(self, ni, nh, no):
self.input_n = ni + 1
self.hidden_n = nh
self.output_n = no
# init cells
self.input_cells = [1.0] * self.input_n
self.hidden_cells = [1.0] * self.hidden_n
self.output_cells = [1.0] * self.output_n
# init weights
self.input_weights = make_matrix(self.input_n, self.hidden_n)
self.output_weights = make_matrix(self.hidden_n, self.output_n)
# random activate
for i in range(self.input_n):
for h in range(self.hidden_n):
self.input_weights[i][h] = rand(-0.2, 0.2)
for h in range(self.hidden_n):
for o in range(self.output_n):
self.output_weights[h][o] = rand(-2.0, 2.0)
# init correction matrix
self.input_correction = make_matrix(self.input_n, self.hidden_n)
self.output_correction = make_matrix(self.hidden_n, self.output_n)
def predict(self, inputs):
# activate input layer
for i in range(self.input_n - 1):
self.input_cells[i] = inputs[i]
# activate hidden layer
for j in range(self.hidden_n):
total = 0.0
for i in range(self.input_n):
total += self.input_cells[i] * self.input_weights[i][j]
self.hidden_cells[j] = sigmoid(total)
# activate output layer
for k in range(self.output_n):
total = 0.0
for j in range(self.hidden_n):
total += self.hidden_cells[j] * self.output_weights[j][k]
self.output_cells[k] = sigmoid(total)
return self.output_cells[:]
def back_propagate(self, case, label, learn, correct):
# feed forward
self.predict(case)
# get output layer error
output_deltas = [0.0] * self.output_n
for o in range(self.output_n):
error = label[o] - self.output_cells[o]
output_deltas[o] = sigmoid_derivative(self.output_cells[o]) * error
# get hidden layer error
hidden_deltas = [0.0] * self.hidden_n
for h in range(self.hidden_n):
error = 0.0
for o in range(self.output_n):
error += output_deltas[o] * self.output_weights[h][o]
hidden_deltas[h] = sigmoid_derivative(self.hidden_cells[h]) * error
# update output weights
for h in range(self.hidden_n):
for o in range(self.output_n):
change = output_deltas[o] * self.hidden_cells[h]
self.output_weights[h][o] += learn * change + correct * self.output_correction[h][o]
self.output_correction[h][o] = change
# update input weights
for i in range(self.input_n):
for h in range(self.hidden_n):
change = hidden_deltas[h] * self.input_cells[i]
self.input_weights[i][h] += learn * change + correct * self.input_correction[i][h]
self.input_correction[i][h] = change
# get global error
error = 0.0
for o in range(len(label)):
error += 0.5 * (label[o] - self.output_cells[o]) ** 2
return error
def train(self, cases, labels, limit=10000, learn=0.05, correct=0.1):
for j in range(limit):
error = 0.0
for i in range(len(cases)):
label = labels[i]
case = cases[i]
error += self.back_propagate(case, label, learn, correct)
def test(self):
cases = [
[0, 0],
[0, 1],
[1, 0],
[1, 1],
]
labels = [[0], [1], [1], [0]]
self.setup(2, 5, 1)
self.train(cases, labels, 10000, 0.05, 0.1)
for case in cases:
print(self.predict(case))
if __name__ == '__main__':
nn = BPNeuralNetwork()
nn.test()