# 当一个门外汉做交叉方向研究,选修了神经网络之后的大作业
# 训练结果,预测精度达97.75%
手写体数字分类是机器学习中的一个经典问题,主要目的实现手写数字图像的自动识别。该问题使用MNIST数据集进行研究,MNIST数据集包含60000张训练图像和10000张测试图像,每张图像为28x28像素,表示从0到9的手写数字。目标是设计一个基于神经网络的分类器,能够准确地将每张图像分配到正确的数字类别。
手写体数字分类问题可以采用多种方法进行解决,包括但不限于:
- 基础分类算法:决策树;逻辑回归;支持向量机(SVM);朴素贝叶斯;K-最近邻;神经网络;贝叶斯网络;线性判别分析;最大熵模型;
- 集成分类算法:随机森林;AdaBoost;梯度提升决策树(GBDT);极端梯度提升(XGBoost);
- 决策树桩;K-最近邻朴素贝叶斯
训练数据介绍:
mnist手写字体有60000组图片和标签做训练集,以及10000组图片和标签做测试集
mnist数据下载地址:http://yann.lecun.com/exdb/mnist/
# 解压和读取MNIST数据
def load_mnist_images(self, filename):
with gzip.open(filename, 'rb') as f:
f.read(16) # 跳过前16个字节
data = np.frombuffer(f.read(), dtype=np.uint8).reshape(-1, 28 * 28)
return data / 255.0 # 归一化到[0, 1]
def load_mnist_labels(self, filename):
with gzip.open(filename, 'rb') as f:
f.read(8) # 跳过前8个字节
labels = np.frombuffer(f.read(), dtype=np.uint8)
return labels # 转换为one-hot编码
网络结构
四层:分别是一层28*28的图案输入层,两层隐藏层(80,40),最后一层输出层(10)
def init_network(self):
self.W1 = 0.01 * np.random.randn(28 * 28, 80)
self.b1 = 0.01 * np.random.randn(1, 80)
self.W2 = 0.01 * np.random.randn(80, 40)
self.b2 = 0.01 * np.random.randn(1, 40)
self.W3 = 0.01 * np.random.randn(40, self.K)
self.b3 = 0.01 * np.random.randn(1, self.K)
激活函数:前几层是Relu,最后一层是Softmax
def train_network(self, img_batch_list, label_batch_list):
train_example_num = img_batch_list.shape[0]
# 使用relu激活函数
hidden_layer1 = np.maximum(0, np.matmul(img_batch_list, self.W1) + self.b1)
hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2)
# 使用softmax激活函数
scores = np.matmul(hidden_layer2, self.W3) + self.b3
scores_e = np.exp(scores)
scores_e_sum = np.sum(scores_e, axis=1, keepdims=True)
probs = scores_e / scores_e_sum
迭代过程:batchsize是3000,训练2500轮,每轮都训练完整的训练集,不随机采样
def train(self):
# 所有数据循环训练一轮
for i in range(2500):
# 按batchsize训练完所有的数据
for j in range(0, self.N, self.BATCHSIZE):
img_list = self.train_img_list[j:j + self.BATCHSIZE]
label_list = self.train_label_list[j:j + self.BATCHSIZE]
self.train_network(img_list, label_list)
if i % 500 == 0:
print("Train Time: ", i)
self. Predict()
损失函数:使用交叉熵损失函数,训练过程中加入了L2正则化项对损失函数修正,避免过拟合
# 计算损失
loss_list_tmp = np.zeros((train_example_num, 1))
for i in range(train_example_num):
loss_list_tmp[i] = scores_e[i][int(label_batch_list[i])] / scores_e_sum[i]
loss_list = -np.log(loss_list_tmp)
loss = np.mean(loss_list, axis=0)[0] + 0.5 * self.reg_factor * np.sum(self.W1 * self.W1) + 0.5 * self.reg_factor * np.sum(self.W2 * self.W2) + \
0.5 * self.reg_factor * np.sum(self.W3 * self.W3)
self.loss_list.append(loss)
权重和偏置更替:手推的过程之后再发,先欠着,这里也可以改进写得简洁一点
dW3 = np.dot(hidden_layer2.T, dscore)
db3 = np.sum(dscore, axis=0, keepdims=True)
dh2 = np.dot(dscore, self.W3.T)
dh2[hidden_layer2 <= 0] = 0
dW2 = np.dot(hidden_layer1.T, dh2)
db2 = np.sum(dh2, axis=0, keepdims=True)
dh1 = np.dot(dh2, self.W2.T)
dh1[hidden_layer1 <= 0] = 0
dW1 = np.dot(img_batch_list.T, dh1)
db1 = np.sum(dh1, axis=0, keepdims=True)
dW3 += self.reg_factor * self.W3
dW2 += self.reg_factor * self.W2
dW1 += self.reg_factor * self.W1
self.W3 += -self.stepsize * dW3
self.W2 += -self.stepsize * dW2
self.W1 += -self.stepsize * dW1
self.b3 += -self.stepsize * db3
self.b2 += -self.stepsize * db2
self.b1 += -self.stepsize * db1
网络评估
最后使用测试集评估网络模型,评估模型预测的准确度
def predict(self):
hidden_layer1 = np.maximum(0, np.matmul(self.test_img_list, self.W1) + self.b1)
hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2)
scores = np.matmul(hidden_layer2, self.W3) + self.b3
prediction = np.argmax(scores, axis=1)
prediction = np.reshape(prediction, (10000, ))
print(prediction == self.test_label_list)
accuracy = np.mean(prediction == self.test_label_list)
print('The accuracy of test data is: ', accuracy)
return
图案绘制及预测结果输出:
for i in range(100):
outputs = data.query(data.test_img_list[i])
label = np.argmax(outputs)
print(label)
print(data.test_label_list[i])
image_array = data.test_img_list[i].reshape(28, 28)
plt.imshow(image_array, cmap="Greys", interpolation='None')
plt.pause(0.001)
plt.show()
print('done')
保存权重偏置和损失:
# 保存损失
np.savetxt('./loss_mnist.txt', np.array(data.loss_list))
# 保存模型权重和偏置
weight_bias_mnist = {'W1': data.W1, 'b1': data.b1, 'W2': data.W2, 'b2': data.b2, 'W3': data.W3, 'b3': data.b3}
np.save('./weight_bias_mnist.npy', weight_bias_mnist)
完整的代码如下:
import numpy as np
import matplotlib.pyplot as plt
import gzip
class Data:
def __init__(self):
self.K = 10
self.N = 60000
self.M = 10000
self.BATCHSIZE = 3000
self.reg_factor = 1e-3
self.stepsize = 5e-2
# 同目录下的文件
self.train_img_list = self.load_mnist_images('mnist/train-images-idx3-ubyte.gz')
self.train_label_list = self.load_mnist_labels('mnist/train-labels-idx1-ubyte.gz')
self.test_img_list = self.load_mnist_images('mnist/t10k-images-idx3-ubyte.gz')
self.test_label_list = self.load_mnist_labels('mnist/t10k-labels-idx1-ubyte.gz')
self.loss_list = []
self.init_network()
# 解压和读取MNIST数据
def load_mnist_images(self, filename):
with gzip.open(filename, 'rb') as f:
f.read(16) # 跳过前16个字节
data = np.frombuffer(f.read(), dtype=np.uint8).reshape(-1, 28 * 28)
return data / 255.0 # 归一化到[0, 1]
def load_mnist_labels(self, filename):
with gzip.open(filename, 'rb') as f:
f.read(8) # 跳过前8个字节
labels = np.frombuffer(f.read(), dtype=np.uint8)
return labels # 转换为one-hot编码
def predict(self):
hidden_layer1 = np.maximum(0, np.matmul(self.test_img_list, self.W1) + self.b1)
hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2)
scores = np.matmul(hidden_layer2, self.W3) + self.b3
prediction = np.argmax(scores, axis=1)
prediction = np.reshape(prediction, (10000, ))
print(prediction == self.test_label_list)
accuracy = np.mean(prediction == self.test_label_list)
print('The accuracy of test data is: ', accuracy)
return
def query(self, inputs_list):
hidden_layer1 = np.maximum(0, np.matmul(inputs_list, self.W1) + self.b1)
hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2)
scores = np.matmul(hidden_layer2, self.W3) + self.b3
return scores
def train(self):
# 所有数据循环训练一轮
for i in range(2500):
# 按batchsize训练完所有的数据
for j in range(0, self.N, self.BATCHSIZE):
img_list = self.train_img_list[j:j + self.BATCHSIZE]
label_list = self.train_label_list[j:j + self.BATCHSIZE]
self.train_network(img_list, label_list)
if i % 500 == 0:
print("Train Time: ", i)
self.predict()
def train_network(self, img_batch_list, label_batch_list):
train_example_num = img_batch_list.shape[0]
# 使用relu激活函数
hidden_layer1 = np.maximum(0, np.matmul(img_batch_list, self.W1) + self.b1)
hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2)
# 使用softmax激活函数
scores = np.matmul(hidden_layer2, self.W3) + self.b3
scores_e = np.exp(scores)
scores_e_sum = np.sum(scores_e, axis=1, keepdims=True)
probs = scores_e / scores_e_sum
# 计算损失
loss_list_tmp = np.zeros((train_example_num, 1))
for i in range(train_example_num):
loss_list_tmp[i] = scores_e[i][int(label_batch_list[i])] / scores_e_sum[i]
loss_list = -np.log(loss_list_tmp)
loss = np.mean(loss_list, axis=0)[0] + 0.5 * self.reg_factor * np.sum(self.W1 * self.W1) + 0.5 * self.reg_factor * np.sum(self.W2 * self.W2) + \
0.5 * self.reg_factor * np.sum(self.W3 * self.W3)
self.loss_list.append(loss)
dscore = np.zeros((train_example_num, self.K))
for i in range(train_example_num):
dscore[i][:] = probs[i][:]
dscore[i][int(label_batch_list[i])] -= 1
dscore /= train_example_num
dW3 = np.dot(hidden_layer2.T, dscore)
db3 = np.sum(dscore, axis=0, keepdims=True)
dh2 = np.dot(dscore, self.W3.T)
dh2[hidden_layer2 <= 0] = 0
dW2 = np.dot(hidden_layer1.T, dh2)
db2 = np.sum(dh2, axis=0, keepdims=True)
dh1 = np.dot(dh2, self.W2.T)
dh1[hidden_layer1 <= 0] = 0
dW1 = np.dot(img_batch_list.T, dh1)
db1 = np.sum(dh1, axis=0, keepdims=True)
dW3 += self.reg_factor * self.W3
dW2 += self.reg_factor * self.W2
dW1 += self.reg_factor * self.W1
self.W3 += -self.stepsize * dW3
self.W2 += -self.stepsize * dW2
self.W1 += -self.stepsize * dW1
self.b3 += -self.stepsize * db3
self.b2 += -self.stepsize * db2
self.b1 += -self.stepsize * db1
return
def init_network(self):
self.W1 = 0.01 * np.random.randn(28 * 28, 80)
self.b1 = 0.01 * np.random.randn(1, 80)
self.W2 = 0.01 * np.random.randn(80, 40)
self.b2 = 0.01 * np.random.randn(1, 40)
self.W3 = 0.01 * np.random.randn(40, self.K)
self.b3 = 0.01 * np.random.randn(1, self.K)
def main():
data = Data()
data.train()
data.predict()
# 保存损失
np.savetxt('./loss_mnist.txt', np.array(data.loss_list))
# 保存模型权重和偏置
weight_bias_mnist = {'W1': data.W1, 'b1': data.b1, 'W2': data.W2, 'b2': data.b2, 'W3': data.W3, 'b3': data.b3}
np.save('./weight_bias_mnist.npy', weight_bias_mnist)
for i in range(100):
outputs = data.query(data.test_img_list[i])
label = np.argmax(outputs)
print(label)
print(data.test_label_list[i])
image_array = data.test_img_list[i].reshape(28, 28)
plt.imshow(image_array, cmap="Greys", interpolation='None')
plt.pause(0.001)
plt.show()
print('done')
if __name__ == '__main__':
main()
各位好姐好哥要是需要,可以点个赞嗷
这里有完整的技术报告:包括问题描述,分类方法总结,该方法说明,实验结果,参考文献。训练损失图,权重偏置,以及完整的代码和数据集,下载即可食用MNIST手写字体识别:python不使用深度学习库函数