机器学习之用bp神经网络实现手写数字识别
代码如下
# 使用三层bp神经网络采用梯度下降法训练数字手写识别
import scipy.io as sio
import numpy as np
from sklearn.metrics import classification_report
#####################################
# 计算给定theta下的输出 #
def feedforward(theta1,theta2,X):
X = np.insert(X, 0, 1, axis=1) # 添加输入层的偏置单元
a1 = X
z2 = X.dot(theta1.T)
a2 = 1 / (1 + np.exp(-z2))
a2 = np.insert(a2, 0, 1, axis=1) # 添加隐藏层的偏置单元
z3 = a2.dot(theta2.T)
a3 = 1 / (1 + np.exp(-z3))
return a1,a2,a3,z2,z3
# 计算给定theta下的输出 #
#####################################
#####################################
# 通过误差反向传播算法来计算第一步的梯度值 #
def Back_Propagation(a1, a2, a3, z2, Ymatrix,theta1_train,theta2_train):
d3 = a3 - Ymatrix # 对应于每一个神经元
D2 = (1 / m) * d3.T.dot(a2) + (l / m) * theta2_train # 对应于连接权值
d2 = d3.dot(theta2_train)[...,1:] * (1/(1+np.exp(-z2)))*(1-1/(1+np.exp(-z2)))
D1 = (1 / m) * d2.T.dot(a1) + (l / m) * theta1_train
return D1,D2
# 通过误差反向传播算法来计算第一步的梯度值 #
#####################################
#####################################
# 计算正规化之后的代价函数 #
def regularized_cost(theta1,theta2,a3,Ymatrix):
theta1_cost = theta1[...,1:]
theta2_cost = theta2[..., 1:]
np.seterr(divide='ignore', invalid='ignore')
part1 = np.mean(np.sum((-Ymatrix) * np.log(a3) - (1 - Ymatrix) * np.log(1 -a3), axis=1))
part2 = (l / (2 * m)) * (np.sum(theta1_cost * theta1_cost) + np.sum(theta2_cost * theta2_cost))
regularized_cost = part1 +part2
return regularized_cost
# 计算正规化之后的代价函数 #
#####################################
if __name__ == '__main__':
data = sio.loadmat('ex3data1.mat')
theta = sio.loadmat('ex3weights.mat')
theta1 = theta['Theta1'] # 输入层到隐藏层的权值矩阵 (25,401)
theta2 = theta['Theta2'] # 隐藏层到输出层的权值矩阵 (10,26)
X = data['X']
Y = data['y']
m = X.shape[0]
l = 1
## 将Y转换为矩阵Ymatrix ##
i_temp = np.zeros((1, 10))
Ymatrix = False
for i in range(5000):
i_temp[..., Y[i] - 1] = 1
if type(Ymatrix) == bool:
Ymatrix = i_temp
else:
Ymatrix = np.concatenate((Ymatrix, i_temp), axis=0)
i_temp[..., Y[i] - 1] = 0
## ##
## 初始化训练的权值矩阵 ##
theta1_train = (np.random.rand(theta1.shape[0],theta1.shape[1]) - 0.5) * 2 * 0.12
theta2_train = (np.random.rand(theta2.shape[0],theta2.shape[1]) - 0.5) * 2 * 0.12
## ##
## 进行梯度下降 ##
for i in range(3000):
a1, a2, a3, z2, z3 = feedforward(theta1_train, theta2_train, X) # 前向传播计算输出a和z
cost = regularized_cost(theta1_train, theta2_train, a3, Ymatrix)
D1, D2 = Back_Propagation(a1, a2, a3, z2, Ymatrix, theta1_train, theta2_train) # 反向传播计算梯度D1和D2
theta1_train -= D1
theta2_train -= D2
print(cost)
## ##
## 打印结果 ##
print('cost:',cost)
print('theta1_train:',theta1_train)
print('theta2_train:',theta2_train)
p = np.zeros((1, 10))
for i in a3:
index = np.argmax(i) # 找到每个输出向量中最大的那个数的索引值
temp = np.zeros((1, 10))
temp[0][index] = 1 # 将输出最大的这个位置置1
p = np.concatenate((p, temp), axis=0) # p的每一行为一个由0和1组成的输出向量
p = p[1:]
print(classification_report(Ymatrix, p))
print('end')
## ##