MNIST手写字体识别:python不使用深度学习库函数

# 当一个门外汉做交叉方向研究,选修了神经网络之后的大作业

# 训练结果,预测精度达97.75%

手写体数字分类是机器学习中的一个经典问题,主要目的实现手写数字图像的自动识别。该问题使用MNIST数据集进行研究,MNIST数据集包含60000张训练图像和10000张测试图像,每张图像为28x28像素,表示从0到9的手写数字。目标是设计一个基于神经网络的分类器,能够准确地将每张图像分配到正确的数字类别。

手写体数字分类问题可以采用多种方法进行解决,包括但不限于:

  1. 基础分类算法:决策树;逻辑回归;支持向量机(SVM);朴素贝叶斯;K-最近邻;神经网络;贝叶斯网络;线性判别分析;最大熵模型;
  2. 集成分类算法:随机森林;AdaBoost;梯度提升决策树(GBDT);极端梯度提升(XGBoost);
  3. 决策树桩;K-最近邻朴素贝叶斯

训练数据介绍:

mnist手写字体有60000组图片和标签做训练集,以及10000组图片和标签做测试集

mnist数据下载地址:http://yann.lecun.com/exdb/mnist/

    # 解压和读取MNIST数据
    def load_mnist_images(self, filename):
        with gzip.open(filename, 'rb') as f:
            f.read(16)  # 跳过前16个字节
            data = np.frombuffer(f.read(), dtype=np.uint8).reshape(-1, 28 * 28)
        return data / 255.0  # 归一化到[0, 1]

    def load_mnist_labels(self, filename):
        with gzip.open(filename, 'rb') as f:
            f.read(8)  # 跳过前8个字节
            labels = np.frombuffer(f.read(), dtype=np.uint8)
        return labels  # 转换为one-hot编码

网络结构

四层:分别是一层28*28的图案输入层,两层隐藏层(80,40),最后一层输出层(10)

    def init_network(self):

        self.W1 = 0.01 * np.random.randn(28 * 28, 80)
        self.b1 = 0.01 * np.random.randn(1, 80)

        self.W2 = 0.01 * np.random.randn(80, 40)
        self.b2 = 0.01 * np.random.randn(1, 40)

        self.W3 = 0.01 * np.random.randn(40, self.K)
        self.b3 = 0.01 * np.random.randn(1, self.K)

激活函数:前几层是Relu,最后一层是Softmax

    def train_network(self, img_batch_list, label_batch_list):

        train_example_num = img_batch_list.shape[0]
        # 使用relu激活函数
        hidden_layer1 = np.maximum(0, np.matmul(img_batch_list, self.W1) + self.b1)
        hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2)

        # 使用softmax激活函数
        scores = np.matmul(hidden_layer2, self.W3) + self.b3
        scores_e = np.exp(scores)
        scores_e_sum = np.sum(scores_e, axis=1, keepdims=True)
        probs = scores_e / scores_e_sum

迭代过程:batchsize是3000,训练2500轮,每轮都训练完整的训练集,不随机采样

    def train(self):
        # 所有数据循环训练一轮
        for i in range(2500):
            # 按batchsize训练完所有的数据
            for j in range(0, self.N, self.BATCHSIZE):
                img_list = self.train_img_list[j:j + self.BATCHSIZE]
                label_list = self.train_label_list[j:j + self.BATCHSIZE]
                self.train_network(img_list, label_list)
            if i % 500 == 0:
                print("Train Time: ", i)
                self. Predict()

损失函数:使用交叉熵损失函数,训练过程中加入了L2正则化项对损失函数修正,避免过拟合

        # 计算损失
        loss_list_tmp = np.zeros((train_example_num, 1))
        for i in range(train_example_num):
            loss_list_tmp[i] = scores_e[i][int(label_batch_list[i])] / scores_e_sum[i]
        loss_list = -np.log(loss_list_tmp)

        loss = np.mean(loss_list, axis=0)[0] + 0.5 * self.reg_factor * np.sum(self.W1 * self.W1) + 0.5 * self.reg_factor * np.sum(self.W2 * self.W2) + \
               0.5 * self.reg_factor * np.sum(self.W3 * self.W3)

        self.loss_list.append(loss)

权重和偏置更替:手推的过程之后再发,先欠着,这里也可以改进写得简洁一点

        dW3 = np.dot(hidden_layer2.T, dscore)
        db3 = np.sum(dscore, axis=0, keepdims=True)

        dh2 = np.dot(dscore, self.W3.T)
        dh2[hidden_layer2 <= 0] = 0

        dW2 = np.dot(hidden_layer1.T, dh2)
        db2 = np.sum(dh2, axis=0, keepdims=True)

        dh1 = np.dot(dh2, self.W2.T)
        dh1[hidden_layer1 <= 0] = 0

        dW1 = np.dot(img_batch_list.T, dh1)
        db1 = np.sum(dh1, axis=0, keepdims=True)

        dW3 += self.reg_factor * self.W3
        dW2 += self.reg_factor * self.W2
        dW1 += self.reg_factor * self.W1

        self.W3 += -self.stepsize * dW3
        self.W2 += -self.stepsize * dW2
        self.W1 += -self.stepsize * dW1

        self.b3 += -self.stepsize * db3
        self.b2 += -self.stepsize * db2
        self.b1 += -self.stepsize * db1

网络评估

最后使用测试集评估网络模型,评估模型预测的准确度

    def predict(self):

        hidden_layer1 = np.maximum(0, np.matmul(self.test_img_list, self.W1) + self.b1)
        hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2)
        scores = np.matmul(hidden_layer2, self.W3) + self.b3
        prediction = np.argmax(scores, axis=1)
        prediction = np.reshape(prediction, (10000, ))
        print(prediction == self.test_label_list)
        accuracy = np.mean(prediction == self.test_label_list)
        print('The accuracy of test data is:  ', accuracy)

        return

图案绘制及预测结果输出:

    for i in range(100):
        outputs = data.query(data.test_img_list[i])
        label = np.argmax(outputs)
        print(label)
        print(data.test_label_list[i])
        image_array = data.test_img_list[i].reshape(28, 28)
        plt.imshow(image_array, cmap="Greys", interpolation='None')
        plt.pause(0.001)
        plt.show()
    print('done')

保存权重偏置和损失:

    # 保存损失
    np.savetxt('./loss_mnist.txt', np.array(data.loss_list))
    # 保存模型权重和偏置
    weight_bias_mnist = {'W1': data.W1, 'b1': data.b1, 'W2': data.W2, 'b2': data.b2, 'W3': data.W3, 'b3': data.b3}
    np.save('./weight_bias_mnist.npy', weight_bias_mnist)

完整的代码如下:

import numpy as np
import matplotlib.pyplot as plt
import gzip


class Data:
    def __init__(self):

        self.K = 10
        self.N = 60000
        self.M = 10000
        self.BATCHSIZE = 3000
        self.reg_factor = 1e-3
        self.stepsize = 5e-2
        # 同目录下的文件
        self.train_img_list = self.load_mnist_images('mnist/train-images-idx3-ubyte.gz')
        self.train_label_list = self.load_mnist_labels('mnist/train-labels-idx1-ubyte.gz')
        self.test_img_list = self.load_mnist_images('mnist/t10k-images-idx3-ubyte.gz')
        self.test_label_list = self.load_mnist_labels('mnist/t10k-labels-idx1-ubyte.gz')
        self.loss_list = []
        self.init_network()

    # 解压和读取MNIST数据
    def load_mnist_images(self, filename):
        with gzip.open(filename, 'rb') as f:
            f.read(16)  # 跳过前16个字节
            data = np.frombuffer(f.read(), dtype=np.uint8).reshape(-1, 28 * 28)
        return data / 255.0  # 归一化到[0, 1]

    def load_mnist_labels(self, filename):
        with gzip.open(filename, 'rb') as f:
            f.read(8)  # 跳过前8个字节
            labels = np.frombuffer(f.read(), dtype=np.uint8)
        return labels  # 转换为one-hot编码

    def predict(self):

        hidden_layer1 = np.maximum(0, np.matmul(self.test_img_list, self.W1) + self.b1)
        hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2)
        scores = np.matmul(hidden_layer2, self.W3) + self.b3
        prediction = np.argmax(scores, axis=1)
        prediction = np.reshape(prediction, (10000, ))
        print(prediction == self.test_label_list)
        accuracy = np.mean(prediction == self.test_label_list)
        print('The accuracy of test data is:  ', accuracy)

        return

    def query(self, inputs_list):
        hidden_layer1 = np.maximum(0, np.matmul(inputs_list, self.W1) + self.b1)

        hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2)

        scores = np.matmul(hidden_layer2, self.W3) + self.b3
        return scores

    def train(self):
        # 所有数据循环训练一轮
        for i in range(2500):
            # 按batchsize训练完所有的数据
            for j in range(0, self.N, self.BATCHSIZE):
                img_list = self.train_img_list[j:j + self.BATCHSIZE]
                label_list = self.train_label_list[j:j + self.BATCHSIZE]
                self.train_network(img_list, label_list)
            if i % 500 == 0:
                print("Train Time: ", i)
                self.predict()

    def train_network(self, img_batch_list, label_batch_list):

        train_example_num = img_batch_list.shape[0]
        # 使用relu激活函数
        hidden_layer1 = np.maximum(0, np.matmul(img_batch_list, self.W1) + self.b1)
        hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2)

        # 使用softmax激活函数
        scores = np.matmul(hidden_layer2, self.W3) + self.b3
        scores_e = np.exp(scores)
        scores_e_sum = np.sum(scores_e, axis=1, keepdims=True)
        probs = scores_e / scores_e_sum

        # 计算损失
        loss_list_tmp = np.zeros((train_example_num, 1))
        for i in range(train_example_num):
            loss_list_tmp[i] = scores_e[i][int(label_batch_list[i])] / scores_e_sum[i]
        loss_list = -np.log(loss_list_tmp)

        loss = np.mean(loss_list, axis=0)[0] + 0.5 * self.reg_factor * np.sum(self.W1 * self.W1) + 0.5 * self.reg_factor * np.sum(self.W2 * self.W2) + \
               0.5 * self.reg_factor * np.sum(self.W3 * self.W3)

        self.loss_list.append(loss)

        dscore = np.zeros((train_example_num, self.K))
        for i in range(train_example_num):
            dscore[i][:] = probs[i][:]
            dscore[i][int(label_batch_list[i])] -= 1

        dscore /= train_example_num

        dW3 = np.dot(hidden_layer2.T, dscore)
        db3 = np.sum(dscore, axis=0, keepdims=True)

        dh2 = np.dot(dscore, self.W3.T)
        dh2[hidden_layer2 <= 0] = 0

        dW2 = np.dot(hidden_layer1.T, dh2)
        db2 = np.sum(dh2, axis=0, keepdims=True)

        dh1 = np.dot(dh2, self.W2.T)
        dh1[hidden_layer1 <= 0] = 0

        dW1 = np.dot(img_batch_list.T, dh1)
        db1 = np.sum(dh1, axis=0, keepdims=True)

        dW3 += self.reg_factor * self.W3
        dW2 += self.reg_factor * self.W2
        dW1 += self.reg_factor * self.W1

        self.W3 += -self.stepsize * dW3
        self.W2 += -self.stepsize * dW2
        self.W1 += -self.stepsize * dW1

        self.b3 += -self.stepsize * db3
        self.b2 += -self.stepsize * db2
        self.b1 += -self.stepsize * db1

        return

    def init_network(self):

        self.W1 = 0.01 * np.random.randn(28 * 28, 80)
        self.b1 = 0.01 * np.random.randn(1, 80)

        self.W2 = 0.01 * np.random.randn(80, 40)
        self.b2 = 0.01 * np.random.randn(1, 40)

        self.W3 = 0.01 * np.random.randn(40, self.K)
        self.b3 = 0.01 * np.random.randn(1, self.K)


def main():
    data = Data()
    data.train()
    data.predict()
    # 保存损失
    np.savetxt('./loss_mnist.txt', np.array(data.loss_list))
    # 保存模型权重和偏置
    weight_bias_mnist = {'W1': data.W1, 'b1': data.b1, 'W2': data.W2, 'b2': data.b2, 'W3': data.W3, 'b3': data.b3}
    np.save('./weight_bias_mnist.npy', weight_bias_mnist)

    for i in range(100):
        outputs = data.query(data.test_img_list[i])
        label = np.argmax(outputs)
        print(label)
        print(data.test_label_list[i])
        image_array = data.test_img_list[i].reshape(28, 28)
        plt.imshow(image_array, cmap="Greys", interpolation='None')
        plt.pause(0.001)
        plt.show()
    print('done')


if __name__ == '__main__':
    main()

各位好姐好哥要是需要,可以点个赞嗷

这里有完整的技术报告:包括问题描述,分类方法总结,该方法说明,实验结果,参考文献。训练损失图,权重偏置,以及完整的代码和数据集,下载即可食用MNIST手写字体识别:python不使用深度学习库函数

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值