MNIST手写字体识别：python不使用深度学习库函数

真难学啊

已于 2024-07-07 21:15:43 修改

阅读量219

点赞数 4

分类专栏：机器学习笔记神经网络训练技巧文章标签： python 深度学习神经网络

于 2024-06-13 14:13:02 首次发布

本文链接：https://blog.csdn.net/sky_ying/article/details/139651851

版权

笔记同时被 3 个专栏收录

4 篇文章 0 订阅

订阅专栏

神经网络训练技巧

3 篇文章 0 订阅

订阅专栏

机器学习

2 篇文章 1 订阅

订阅专栏

# 当一个门外汉做交叉方向研究，选修了神经网络之后的大作业

# 训练结果，预测精度达97.75%

手写体数字分类是机器学习中的一个经典问题，主要目的实现手写数字图像的自动识别。该问题使用MNIST数据集进行研究，MNIST数据集包含60000张训练图像和10000张测试图像，每张图像为28x28像素，表示从0到9的手写数字。目标是设计一个基于神经网络的分类器，能够准确地将每张图像分配到正确的数字类别。

手写体数字分类问题可以采用多种方法进行解决，包括但不限于：

基础分类算法：决策树；逻辑回归；支持向量机（SVM）；朴素贝叶斯；K-最近邻；神经网络；贝叶斯网络；线性判别分析；最大熵模型；
集成分类算法：随机森林；AdaBoost；梯度提升决策树（GBDT）；极端梯度提升（XGBoost）；
决策树桩；K-最近邻朴素贝叶斯

训练数据介绍：

mnist手写字体有60000组图片和标签做训练集，以及10000组图片和标签做测试集

mnist数据下载地址：http://yann.lecun.com/exdb/mnist/

    # 解压和读取MNIST数据
    def load_mnist_images(self, filename):
        with gzip.open(filename, 'rb') as f:
            f.read(16)  # 跳过前16个字节
            data = np.frombuffer(f.read(), dtype=np.uint8).reshape(-1, 28 * 28)
        return data / 255.0  # 归一化到[0, 1]

    def load_mnist_labels(self, filename):
        with gzip.open(filename, 'rb') as f:
            f.read(8)  # 跳过前8个字节
            labels = np.frombuffer(f.read(), dtype=np.uint8)
        return labels  # 转换为one-hot编码

网络结构

四层：分别是一层28*28的图案输入层，两层隐藏层（80，40），最后一层输出层（10）

    def init_network(self):

        self.W1 = 0.01 * np.random.randn(28 * 28, 80)
        self.b1 = 0.01 * np.random.randn(1, 80)

        self.W2 = 0.01 * np.random.randn(80, 40)
        self.b2 = 0.01 * np.random.randn(1, 40)

        self.W3 = 0.01 * np.random.randn(40, self.K)
        self.b3 = 0.01 * np.random.randn(1, self.K)

激活函数：前几层是Relu，最后一层是Softmax

    def train_network(self, img_batch_list, label_batch_list):

        train_example_num = img_batch_list.shape[0]
        # 使用relu激活函数
        hidden_layer1 = np.maximum(0, np.matmul(img_batch_list, self.W1) + self.b1)
        hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2)

        # 使用softmax激活函数
        scores = np.matmul(hidden_layer2, self.W3) + self.b3
        scores_e = np.exp(scores)
        scores_e_sum = np.sum(scores_e, axis=1, keepdims=True)
        probs = scores_e / scores_e_sum

迭代过程：batchsize是3000，训练2500轮，每轮都训练完整的训练集，不随机采样

    def train(self):
        # 所有数据循环训练一轮
        for i in range(2500):
            # 按batchsize训练完所有的数据
            for j in range(0, self.N, self.BATCHSIZE):
                img_list = self.train_img_list[j:j + self.BATCHSIZE]
                label_list = self.train_label_list[j:j + self.BATCHSIZE]
                self.train_network(img_list, label_list)
            if i % 500 == 0:
                print("Train Time: ", i)
                self. Predict()

损失函数：使用交叉熵损失函数，训练过程中加入了L2正则化项对损失函数修正，避免过拟合

        # 计算损失
        loss_list_tmp = np.zeros((train_example_num, 1))
        for i in range(train_example_num):
            loss_list_tmp[i] = scores_e[i][int(label_batch_list[i])] / scores_e_sum[i]
        loss_list = -np.log(loss_list_tmp)

        loss = np.mean(loss_list, axis=0)[0] + 0.5 * self.reg_factor * np.sum(self.W1 * self.W1) + 0.5 * self.reg_factor * np.sum(self.W2 * self.W2) + \
               0.5 * self.reg_factor * np.sum(self.W3 * self.W3)

        self.loss_list.append(loss)

权重和偏置更替：手推的过程之后再发，先欠着，这里也可以改进写得简洁一点

        dW3 = np.dot(hidden_layer2.T, dscore)
        db3 = np.sum(dscore, axis=0, keepdims=True)

        dh2 = np.dot(dscore, self.W3.T)
        dh2[hidden_layer2 <= 0] = 0

        dW2 = np.dot(hidden_layer1.T, dh2)
        db2 = np.sum(dh2, axis=0, keepdims=True)

        dh1 = np.dot(dh2, self.W2.T)
        dh1[hidden_layer1 <= 0] = 0

        dW1 = np.dot(img_batch_list.T, dh1)
        db1 = np.sum(dh1, axis=0, keepdims=True)

        dW3 += self.reg_factor * self.W3
        dW2 += self.reg_factor * self.W2
        dW1 += self.reg_factor * self.W1

        self.W3 += -self.stepsize * dW3
        self.W2 += -self.stepsize * dW2
        self.W1 += -self.stepsize * dW1

        self.b3 += -self.stepsize * db3
        self.b2 += -self.stepsize * db2
        self.b1 += -self.stepsize * db1

网络评估

最后使用测试集评估网络模型，评估模型预测的准确度

    def predict(self):

        hidden_layer1 = np.maximum(0, np.matmul(self.test_img_list, self.W1) + self.b1)
        hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2)
        scores = np.matmul(hidden_layer2, self.W3) + self.b3
        prediction = np.argmax(scores, axis=1)
        prediction = np.reshape(prediction, (10000, ))
        print(prediction == self.test_label_list)
        accuracy = np.mean(prediction == self.test_label_list)
        print('The accuracy of test data is:  ', accuracy)

        return

图案绘制及预测结果输出：

    for i in range(100):
        outputs = data.query(data.test_img_list[i])
        label = np.argmax(outputs)
        print(label)
        print(data.test_label_list[i])
        image_array = data.test_img_list[i].reshape(28, 28)
        plt.imshow(image_array, cmap="Greys", interpolation='None')
        plt.pause(0.001)
        plt.show()
    print('done')

保存权重偏置和损失：

    # 保存损失
    np.savetxt('./loss_mnist.txt', np.array(data.loss_list))
    # 保存模型权重和偏置
    weight_bias_mnist = {'W1': data.W1, 'b1': data.b1, 'W2': data.W2, 'b2': data.b2, 'W3': data.W3, 'b3': data.b3}
    np.save('./weight_bias_mnist.npy', weight_bias_mnist)

完整的代码如下：

import numpy as np
import matplotlib.pyplot as plt
import gzip


class Data:
    def __init__(self):

        self.K = 10
        self.N = 60000
        self.M = 10000
        self.BATCHSIZE = 3000
        self.reg_factor = 1e-3
        self.stepsize = 5e-2
        # 同目录下的文件
        self.train_img_list = self.load_mnist_images('mnist/train-images-idx3-ubyte.gz')
        self.train_label_list = self.load_mnist_labels('mnist/train-labels-idx1-ubyte.gz')
        self.test_img_list = self.load_mnist_images('mnist/t10k-images-idx3-ubyte.gz')
        self.test_label_list = self.load_mnist_labels('mnist/t10k-labels-idx1-ubyte.gz')
        self.loss_list = []
        self.init_network()

    # 解压和读取MNIST数据
    def load_mnist_images(self, filename):
        with gzip.open(filename, 'rb') as f:
            f.read(16)  # 跳过前16个字节
            data = np.frombuffer(f.read(), dtype=np.uint8).reshape(-1, 28 * 28)
        return data / 255.0  # 归一化到[0, 1]

    def load_mnist_labels(self, filename):
        with gzip.open(filename, 'rb') as f:
            f.read(8)  # 跳过前8个字节
            labels = np.frombuffer(f.read(), dtype=np.uint8)
        return labels  # 转换为one-hot编码

    def predict(self):

        hidden_layer1 = np.maximum(0, np.matmul(self.test_img_list, self.W1) + self.b1)
        hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2)
        scores = np.matmul(hidden_layer2, self.W3) + self.b3
        prediction = np.argmax(scores, axis=1)
        prediction = np.reshape(prediction, (10000, ))
        print(prediction == self.test_label_list)
        accuracy = np.mean(prediction == self.test_label_list)
        print('The accuracy of test data is:  ', accuracy)

        return

    def query(self, inputs_list):
        hidden_layer1 = np.maximum(0, np.matmul(inputs_list, self.W1) + self.b1)

        hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2)

        scores = np.matmul(hidden_layer2, self.W3) + self.b3
        return scores

    def train(self):
        # 所有数据循环训练一轮
        for i in range(2500):
            # 按batchsize训练完所有的数据
            for j in range(0, self.N, self.BATCHSIZE):
                img_list = self.train_img_list[j:j + self.BATCHSIZE]
                label_list = self.train_label_list[j:j + self.BATCHSIZE]
                self.train_network(img_list, label_list)
            if i % 500 == 0:
                print("Train Time: ", i)
                self.predict()

    def train_network(self, img_batch_list, label_batch_list):

        train_example_num = img_batch_list.shape[0]
        # 使用relu激活函数
        hidden_layer1 = np.maximum(0, np.matmul(img_batch_list, self.W1) + self.b1)
        hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2)

        # 使用softmax激活函数
        scores = np.matmul(hidden_layer2, self.W3) + self.b3
        scores_e = np.exp(scores)
        scores_e_sum = np.sum(scores_e, axis=1, keepdims=True)
        probs = scores_e / scores_e_sum

        # 计算损失
        loss_list_tmp = np.zeros((train_example_num, 1))
        for i in range(train_example_num):
            loss_list_tmp[i] = scores_e[i][int(label_batch_list[i])] / scores_e_sum[i]
        loss_list = -np.log(loss_list_tmp)

        loss = np.mean(loss_list, axis=0)[0] + 0.5 * self.reg_factor * np.sum(self.W1 * self.W1) + 0.5 * self.reg_factor * np.sum(self.W2 * self.W2) + \
               0.5 * self.reg_factor * np.sum(self.W3 * self.W3)

        self.loss_list.append(loss)

        dscore = np.zeros((train_example_num, self.K))
        for i in range(train_example_num):
            dscore[i][:] = probs[i][:]
            dscore[i][int(label_batch_list[i])] -= 1

        dscore /= train_example_num

        dW3 = np.dot(hidden_layer2.T, dscore)
        db3 = np.sum(dscore, axis=0, keepdims=True)

        dh2 = np.dot(dscore, self.W3.T)
        dh2[hidden_layer2 <= 0] = 0

        dW2 = np.dot(hidden_layer1.T, dh2)
        db2 = np.sum(dh2, axis=0, keepdims=True)

        dh1 = np.dot(dh2, self.W2.T)
        dh1[hidden_layer1 <= 0] = 0

        dW1 = np.dot(img_batch_list.T, dh1)
        db1 = np.sum(dh1, axis=0, keepdims=True)

        dW3 += self.reg_factor * self.W3
        dW2 += self.reg_factor * self.W2
        dW1 += self.reg_factor * self.W1

        self.W3 += -self.stepsize * dW3
        self.W2 += -self.stepsize * dW2
        self.W1 += -self.stepsize * dW1

        self.b3 += -self.stepsize * db3
        self.b2 += -self.stepsize * db2
        self.b1 += -self.stepsize * db1

        return

    def init_network(self):

        self.W1 = 0.01 * np.random.randn(28 * 28, 80)
        self.b1 = 0.01 * np.random.randn(1, 80)

        self.W2 = 0.01 * np.random.randn(80, 40)
        self.b2 = 0.01 * np.random.randn(1, 40)

        self.W3 = 0.01 * np.random.randn(40, self.K)
        self.b3 = 0.01 * np.random.randn(1, self.K)


def main():
    data = Data()
    data.train()
    data.predict()
    # 保存损失
    np.savetxt('./loss_mnist.txt', np.array(data.loss_list))
    # 保存模型权重和偏置
    weight_bias_mnist = {'W1': data.W1, 'b1': data.b1, 'W2': data.W2, 'b2': data.b2, 'W3': data.W3, 'b3': data.b3}
    np.save('./weight_bias_mnist.npy', weight_bias_mnist)

    for i in range(100):
        outputs = data.query(data.test_img_list[i])
        label = np.argmax(outputs)
        print(label)
        print(data.test_label_list[i])
        image_array = data.test_img_list[i].reshape(28, 28)
        plt.imshow(image_array, cmap="Greys", interpolation='None')
        plt.pause(0.001)
        plt.show()
    print('done')


if __name__ == '__main__':
    main()

各位好姐好哥要是需要，可以点个赞嗷

这里有完整的技术报告：包括问题描述，分类方法总结，该方法说明，实验结果，参考文献。训练损失图，权重偏置，以及完整的代码和数据集，下载即可食用MNIST手写字体识别：python不使用深度学习库函数