bp神经网络的python实现
导入所需要的包
import numpy as np
import matplotlib.pyplot as plt
激活函数(选用sigmoid函数)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
连接层类(包含初始化,前向传播,误差反馈等过程)
class Layer(object):
def __init__(self, units, activation = None, learning_rate = None, bool_input = False):
'''
:param units: 神经元数量
:param activation: 激活函数
:param learning_rate: 学习率
:param bool_input: 是否为输入层
'''
self.units = units
# 权值和偏差
self.weight = None
self.bias = None
self.activation = activation
if self.activation is None:
self.activation = sigmoid
self.learning_rate = learning_rate
if self.learning_rate is None:
self.learning_rate = 0.2
self.bool_input = bool_input
# 初始化权值,偏差
def init(self, back_units):
'''
输入层不参与,后面每一层与前一层的权值和偏差初始化
:param back_units: 前一层连接的神经元个数
'''
# 构造正态分布的权值矩阵,行为当前神经元数,列为前一层的神经元个数
self.weight = np.asmatrix(np.random.normal(0, 0.5, (self.units, back_units)))
# 构造偏差向量,只和当前层神经元节点个数有关
self.bias = np.asmatrix(np.random.normal(0, 0.5, self.units)).T
# 计算梯度
# σ′(x)=σ(x)(1−σ(x)),对应于向量做内积
def cal_gradient(self):
if self.activation == sigmoid:
# 梯度矩阵
grad_mat = np.dot(self.output, (1 - self.output).T)
# 梯度矩阵的对角线值构成的对角矩阵,一定是方阵
grad_activation = np.diag(np.diag(grad_mat))
else:
# 默认为线性
grad_activation = 1
return grad_activation
# 前向传播
# 输入层 不做处理
# 第一层 z1 = w1 * x + b1, a1 = σ(z1)
# 第二层 z2 = w2 * a1 + b2, a2 = σ(z2)
# ...
def forward_propagation(self, x_data):
'''
:param x_data: 自变量矩阵
'''
self.x_data = x_data
# 如果为输入层,不做处理
if self.bool_input is True:
self.z = x_data
self.output = x_data
else:
# z = w * x - b
self.z = np.dot(self.weight, self.x_data) - self.bias
self.output = self.activation(self.z)
return self.output
# 后向传播
# ∂E/∂w = ∂E/∂y * ∂y/∂w,E为损失函数,y为输出output
# ∂E/∂b = ∂E/∂y * ∂y/∂b
# ∂E/∂y 由 cal_loss函数计算,此时的结果为gradient,维数为y的维数
# ∂y/∂w = ∂(σ(z))/∂w , ∂(σ(z))/∂w = ∂(σ(z))/∂z * ∂(z)/∂w
# ∂y/∂b = ∂(σ(z))/∂w, ∂(σ(z))/∂w = ∂(σ(z))/∂z * ∂(z)/∂b
# z = w * x - b, ∂(z)/∂w = x, ∂(z)/∂b = -1
# ∂(σ(z))/∂u为 cal_gradient函数计算sigmoid激活函数的导数
def back_propagation(self, gradient):
grad_activation = self.cal_gradient()
# ∂(σ(z))/∂z * ∂E/∂y
gradient = np.asmatrix(np.dot(gradient.T, grad_activation))
# 计算∂E/∂w = ∂(σ(z))/∂z * ∂E/∂y * x
self.gradient_weight = np.dot(gradient.T, np.asmatrix(self.x_data).T)
# 计算∂E/∂b = ∂(σ(z))/∂z * ∂E/∂y * -1
self.gradient_bias = gradient * (-1)
# 传播到前一层
# bp神经网络误差信号反向传播中前一层的局部梯度等于后一层加权和
self.gradient = np.dot(gradient, self.weight).T
# 更新w,w = w - α * ∂E/∂w
self.weight = self.weight - self.learning_rate * self.gradient_weight
# 更新偏差b, b = b - α * ∂E/∂b
self.bias = self.bias - self.learning_rate * self.gradient_bias.T
return self.gradient
bp模型类
class BPNN(object):
def __init__(self):
self.layers = []
self.train_mse = []
# 画图
self.fig_loss = plt.figure()
self.ax_loss = self.fig_loss.add_subplot(1, 1, 1)
# 添加连接层
def add_layer(self, layer):
self.layers.append(layer)
# 构建网络
def build_bp(self):
# 循环所有连接层
for i, layer in enumerate(self.layers):
# 如果是输入层
if i == 0:
layer.bool_input = True
else:
layer.init(self.layers[i - 1].units)
# 打印每一层的权值和偏差维度
def describe(self):
for i, layer in enumerate(self.layers):
print("权值维度", np.shape(layer.weight))
print("偏差维度", np.shape(layer.bias))
# 计算损失, 平方误差损失
def cal_loss(self, y_data, y_data_):
'''
:param y_data: 原始y值
:param y_data_: 计算之后的y值
:return:
'''
self.loss = np.sum(np.power(y_data - y_data_, 2))
# 对损失函数求导
self.loss_grad = 2 * (y_data_ - y_data)
return self.loss, self.loss_grad
def describe_loss(self):
if self.ax_loss.lines:
self.ax_loss.lines.remove(self.ax_loss.lines[0])
self.ax_loss.plot(self.train_mse, "r-")
plt.ion()
plt.xlabel("iter_num")
plt.ylabel("loss")
plt.show()
plt.pause(0.1)
# 训练
def train(self, x, y, iter_num, accuracy):
'''
:param x: 自变量
:param y: 因变量
:param iter_num: 训练次数
:param accuracy: 精确度
:return:
'''
self.iter_num = iter_num
self.accuracy = accuracy
# 初始化图框
# self.ax_loss.hlines(self.accuracy, 0, self.iter_num * 1.1)
x_size = x.shape
for iter_i in range(iter_num):
# 总体误差(包含每一层的)
all_loss = 0
for row in range(x_size[0]):
# 取出每一行
temp_x = np.asmatrix(x[row, :]).T
temp_y = np.asmatrix(y[row, :]).T
# 前向传播
for layer in self.layers:
temp_x = layer.forward_propagation(temp_x)
# 计算损失
loss, loss_grad = self.cal_loss(temp_y, temp_x)
all_loss = all_loss + loss
# 后向传播
# layers从最后一层传到最前面且不包含输入层
# 每一行x循环直到将误差反馈到第一层为止
# loss_grad的维度不断更新到
for layer in self.layers[:0:-1]:
loss_grad = layer.back_propagation(loss_grad)
# 计算mse,均方误差
mse = all_loss / x_size[0]
self.train_mse.append(mse)
self.describe_loss()
if mse < self.accuracy:
print("达到精度")
# 达到精度要求
return mse
# 预测
def predict(self, X_pre):
temp_x = X_pre.T
for layer in self.layers:
temp_x = layer.forward_propagation(temp_x)
return temp_x.T
测试
if __name__ == "__main__":
x = np.random.randn(10, 10)
y = np.asarray(
[
[0.8, 0.4],
[0.4, 0.3],
[0.34, 0.45],
[0.67, 0.32],
[0.88, 0.67],
[0.78, 0.77],
[0.55, 0.66],
[0.55, 0.43],
[0.54, 0.1],
[0.1, 0.5],
]
)
model = BPNN()
for i in (10, 20, 30, 2):
model.add_layer(Layer(i))
model.build_bp()
model.describe()
model.train(x, y, 100, 0.01)
print(model.predict(np.random.randn(1, 10)))
测试结果