包含数据集可视化+数据降维
Read_File模块参见https://blog.csdn.net/wangdiedang/article/details/125335812?spm=1001.2014.3001.5502
# creator : wangdiedang
# time : 2022/6/9 17:16
# filename : BPNN.py
import numpy as np
import Read_File as RF
from sklearn.preprocessing import OneHotEncoder
from scipy.optimize import minimize
import copy
import time
def featureExtraction(img, dim, num):
res = np.empty((dim, dim))
for i in range(0, dim):
for j in range(0, dim):
# 算出每一个片区像素点的个数 若大于某一特定数则设为1
tmp = img[num * i:num * (i + 1), num * j:num * (j + 1)].sum()
if tmp > max((28 // dim - 1), 1):
res[i, j] = 1
else:
res[i, j] = 0
return res
# 输入图像集和转化维度
def Extraction2AllImgs(imgs, dim):
res = np.empty((imgs.shape[0], dim, dim))
num = 28 // dim
for k, img in enumerate(imgs):
# 对于每一个图像进行特征降维
res[k] = featureExtraction(imgs[k], dim, num)
return res
def read_data(dim=7):
# 返回生数据
a, b, c, d = RF.read_main()
# 降低训练集和测试集的特征维度 初始28*28转化为dim*dim
if dim < 28:
a = Extraction2AllImgs(a, dim)
c = Extraction2AllImgs(c, dim)
# 神经网络的特征数量
e = dim * dim
# 神经网络隐藏层特征数
f = 25
# 神经网络输出层特征数
g = 10
return a, b, c, d, e, f, g
# sigmoid函数
def sigmoid(z):
return 1.0 / (1 + np.exp(-z))
# 计算sigmoid梯度
def sigmoidGradient(z):
graident = np.zeros(z.shape)
t = sigmoid(z)
graident = np.multiply(t, 1 - t)
return graident
# 先随机初始化各层权重
def initializeWeights(row, col):
epsilon_init = 0.12
W = np.random.rand(row, col + 1) * 2 * epsilon_init - epsilon_init
return W
# 进行神经网路前向传播
def forward_propagate(X, theta1, theta2):
m, n = X.shape
a1 = np.hstack((np.ones((m, 1)), X))
z2 = np.dot(a1, theta1.T)
a2 = np.hstack((np.ones((m, 1)), sigmoid(z2)))
z3 = np.dot(a2, theta2.T)
a3 = sigmoid(z3)
return a1, z2, a2, z3, a3
# 反向传播计算梯度
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lambda_):
theta1 = nn_params[0:hidden_layer_size * (input_layer_size + 1)].reshape(
hidden_layer_size, input_layer_size + 1)
theta2 = nn_params[(hidden_layer_size * (input_layer_size + 1)):].reshape(
num_labels, hidden_layer_size + 1)
m, n = X.shape
# 初始化代价为0
delta_1 = np.zeros(theta1.shape)
delta_2 = np.zeros(theta2.shape)
a1, z2, a2, z3, a3 = forward_propagate(X, theta1, theta2)
# y_ = np.zeros((m, num_labels))
# for i in range(m):
# y_[i, (y[i] - 1)] = 1
encoder = OneHotEncoder(sparse=False)
y_ = encoder.fit_transform(y)
J = (-y_ * np.log(a3) - (1 - y_) * np.log(1 - a3)).sum() / m + (
np.multiply(theta1[:, 1:], theta1[:, 1:]).sum() + np.multiply(
theta2[:, 1:], theta2[:, 1:]).sum()) * lambda_ / (2 * m)
for i in range(m):
a1_i = np.mat(a1[i])
z2_i = np.mat(z2[i])
# z2_i = np.hstack((np.mat(np.zeros(z2_i.shape[0])), z2_i))
a2_i = np.mat(a2[i])
a3_i = np.mat(a3[i])
y_i = y_[i]
err3 = a3_i - y_i # 1,3
err2 = np.multiply(np.dot(err3, theta2[:, 1:]), sigmoidGradient(z2_i))
delta_1 += np.dot(err2.T, np.mat(a1_i))
delta_2 += np.dot(err3.T, np.mat(a2_i))
theta1_grad = delta_1 / m
theta1_grad[:, 1:] += theta1[:, 1:] * lambda_ / m
theta2_grad = delta_2 / m
theta2_grad[:, 1:] += theta2[:, 1:] * lambda_ / m
grad = np.append(theta1_grad.flatten(), theta2_grad.flatten())
return J, grad
def predict(Theta1, Theta2, X):
if len(X.shape) == 1:
X = X.reshape(1, -1)
m = X.shape[0]
z2 = np.insert(X, 0, np.ones(m), axis=1) @ Theta1.T
a2 = sigmoid(z2)
z3 = np.insert(a2, 0, values=np.ones(a2.shape[0]), axis=1) @ Theta2.T
a3 = sigmoid(z3)
p = (np.argmax(a3, axis=1)).reshape(-1, 1)
return p
def minimize_gradient(_nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, alpha=0.1, maxiter=50,
_lambda=1, epsilon=1e-7):
nn_params = copy.deepcopy(_nn_params)
m, r, c = X.shape
X = X.reshape((m, r * c))
y = y.reshape((-1, 1))
for i in range(maxiter):
J, grad = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, _lambda)
print("第" + str(i) + "次迭代, 损失函数为:%.6f,梯度为:%f" % (J))
if J <= epsilon:
break
nn_params -= alpha * grad
Theta1 = nn_params[0:hidden_layer_size * (input_layer_size + 1)].reshape(
hidden_layer_size, input_layer_size + 1)
Theta2 = nn_params[(hidden_layer_size * (input_layer_size + 1)):].reshape(
num_labels, hidden_layer_size + 1)
return Theta1, Theta2
def train_NN(train_imgs, train_labels, _lambda=1, maxiter=50, num=10000):
m, r, c = train_imgs.shape
X = train_imgs.reshape((m, r * c))[:num, :]
y = train_labels.reshape((-1, 1))[:num, :]
print("-----进行神经网络训练-----")
result = minimize(fun=nnCostFunction,
x0=initial_nn_params,
args=(input_layer_size, hidden_layer_size, num_labels, X, y,
_lambda),
method='CG',
jac=True,
options={'maxiter': maxiter},
tol=1e-5)
print("-----神经网络训练结束-----")
Theta1 = result.x[0:hidden_layer_size * (input_layer_size + 1)].reshape(
hidden_layer_size, input_layer_size + 1)
Theta2 = result.x[(hidden_layer_size * (input_layer_size + 1)):].reshape(
num_labels, hidden_layer_size + 1)
return Theta1, Theta2
if __name__ == '__main__':
old_time = time.time()
print("-----开始读数据-----")
# 设定维数,正则化系数,最大迭代次数,选择样本总数
dim, _lambda, maxiter, num = 28, 1, 50, 60000
train_imgs, train_labels, test_imgs, test_labels, input_layer_size, hidden_layer_size, num_labels = read_data(dim)
print("-----读数据成功-----")
print("-----开始初始化权重矩阵-----")
initial_Theta1 = initializeWeights(hidden_layer_size, input_layer_size)
initial_Theta2 = initializeWeights(num_labels, hidden_layer_size)
initial_nn_params = np.append(initial_Theta1.flatten(), initial_Theta2.flatten())
print("-----初始化权重矩阵结束-----")
Theta1, Theta2 = train_NN(train_imgs, train_labels, _lambda, maxiter, num)
# Theta1, Theta2 = minimize_gradient(initial_nn_params, input_layer_size, hidden_layer_size, num_labels, train_imgs,
# train_labels)
m, r, c = test_imgs.shape
ty = test_labels.reshape((-1, 1))
tX = test_imgs.reshape((m, r * c))
pred = predict(Theta1, Theta2, tX)
current_time = time.time()
print("--------------------------------------------------------------")
print("训练模型样本:%d,样本尺寸(%d, %d)" % (num, dim, dim))
print("最大迭代次数:%d,正则化系数:%d" % (maxiter, _lambda))
print('训练集样本数:%d,模型测试正确数:%d,模型测试正确率: %.3f' % (m, np.sum(pred == ty), np.mean(pred == ty) * 100) + "%")
print("运行时间为" + str(current_time - old_time) + "s")
print("--------------------------------------------------------------")