MNIST手写体识别/使用面向对象的方法设计训练和推理程序MLP

最新推荐文章于 2024-10-12 21:53:35 发布

好多什么洋

最新推荐文章于 2024-10-12 21:53:35 发布

阅读量629

点赞数 14

文章标签：机器学习深度学习人工智能

本文链接：https://blog.csdn.net/wangzhengyang214/article/details/140403408

版权

一、实验目的

掌握神经网络的设计原理，熟练掌握神经网络的训练和推断方法，能够使用 Python 语
言实现一个三层全连接神经网络模型对手写数字分类的训练和使用。
1)实现三层神经网络模型进行手写数字分类，建立一个简单而完整的神经网络工程。
通过本实验理解神经网络中基本模块的作用和模块间的关系，为后续建立更复杂的神经网
络(如风格迁移)奠定基础。
2)利用高级编程语言 Python 实现神经网络基本单元的前向传播(正向传播)和反向传
播计算，加深对神经网络中基本单元的理解，包括全连接层、激活函数、损失函数等基本
单元。
3)利用高级编程语言 Python 实现神经网络训练所使用的梯度下降算法，加深对神经
网络训练过程的理解

二、实验内容

设计一个三层神经网络实现手写数字图像分类。该网络包含两个隐层和一个输出层，其中输入神经元个数由输入数据维度决定，输出层的神经元个数由数据集包含的类别决定，两个隐层的神经元个数可以作为超参数自行设置。对于手写数字图像的分类问题，输入数据为手写数字图像，原始图像一般可表示为二维矩阵(灰度图像)或三维矩阵(彩色图像)，在输入神经网络前会将图像矩阵调整为一维向量作为输入。待分类的类别数一般是提前预设的，如手写数字包含0至9共10 个类别，则神经网络的输出神经元个数为 10。

三、实验代码

# coding=utf-8
import numpy as np
import struct
import os
import time
import matplotlib.pyplot as plt

from layers_1 import FullyConnectedLayer, ReLULayer, SoftmaxLossLayer,SigmoidLayer

MNIST_DIR = "../mnist_data"
TRAIN_DATA = "train-images-idx3-ubyte"
TRAIN_LABEL = "train-labels-idx1-ubyte"
TEST_DATA = "t10k-images-idx3-ubyte"
TEST_LABEL = "t10k-labels-idx1-ubyte"



class MNIST_MLP(object):  #学习率0.01---》0.001有提高
    def __init__(self, batch_size=100, input_size=784, hidden1=32, hidden2=16, out_classes=10, lr=0.001, max_epoch=1,
                 print_iter=100):
        self.batch_size = batch_size  #self是对当前实例的引用
        self.input_size = input_size
        self.hidden1 = hidden1
        self.hidden2 = hidden2
        self.out_classes = out_classes
        self.lr = lr
        self.max_epoch = max_epoch
        self.print_iter = print_iter

    def display_prediction(self, i):
        # 获取测试数据集中第i个数字的图像和标签
        image = self.test_data[i, :-1].reshape(28, 28)
        true_label = self.test_data[i, -1]

        # 使用模型进行预测
        prob = self.forward(self.test_data[i, :-1].reshape(1, -1))
        pred_label = np.argmax(prob, axis=1)[0]

        # 显示图像、真实标签和预测标签
        plt.imshow(image, cmap='gray')
        plt.title(f"True Label: {true_label}, Predicted Label: {pred_label}")
        plt.show()

    def load_mnist(self, file_dir, is_images='True'):
        # Read binary data
        bin_file = open(file_dir, 'rb')
        bin_data = bin_file.read()
        bin_file.close()
        # Analysis file header
        if is_images:
            # Read images
            fmt_header = '>iiii'
            magic, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, 0)
        else:
            # Read labels
            fmt_header = '>ii'
            magic, num_images = struct.unpack_from(fmt_header, bin_data, 0)
            num_rows, num_cols = 1, 1
        data_size = num_images * num_rows * num_cols
        mat_data = struct.unpack_from('>' + str(data_size) + 'B', bin_data, struct.calcsize(fmt_header))
        mat_data = np.reshape(mat_data, [num_images, num_rows * num_cols])
        print('Load images from %s, number: %d, data shape: %s' % (file_dir, num_images, str(mat_data.shape)))
        return mat_data

    def load_data(self):
        # TODO: 调用函数 load_mnist 读取和预处理 MNIST 中训练数据和测试数据的图像和标记
        print('Loading MNIST data from files...')
        train_images = self.load_mnist(os.path.join(MNIST_DIR, TRAIN_DATA), True)
        train_labels = self.load_mnist(os.path.join(MNIST_DIR, TRAIN_LABEL), False)
        test_images = self.load_mnist(os.path.join(MNIST_DIR, TEST_DATA), True)
        test_labels = self.load_mnist(os.path.join(MNIST_DIR, TEST_LABEL), False)
        # self.train_data = np.append(train_images, train_labels[:, np.newaxis], axis=1)
        # self.test_data = np.append(test_images, test_labels[:, np.newaxis], axis=1)
        self.train_data = np.append(train_images, train_labels, axis=1) #沿第1个轴（即列轴）进行拼接 成二维数组
        self.test_data = np.append(test_images, test_labels, axis=1)
        # self.test_data = np.concatenate((self.train_data, self.test_data), axis=0)

    def shuffle_data(self):
        print('Randomly shuffle MNIST data...')
        np.random.shuffle(self.train_data) #随机打乱MNIST数据集中的训练数据 原地打乱

    def build_model(self):  # 建立网络结构
        # TODO：建立三层神经网络结构
        print('Building multi-layer perception model...')
        self.fc1 = FullyConnectedLayer(self.input_size, self.hidden1)
        #self.relu1 = ReLULayer()
        #可用sigmoid
        self.sig1=SigmoidLayer()
        self.fc2 = FullyConnectedLayer(self.hidden1, self.hidden2)
        #self.relu2 = ReLULayer()
        self.sig2 = SigmoidLayer()
        #self.fc3 =  (self.hidden2, self.out_classes)
        self.fc3 = FullyConnectedLayer(self.hidden2, self.out_classes)
        self.softmax = SoftmaxLossLayer()
        self.update_layer_list = [self.fc1, self.fc2, self.fc3]

    def init_model(self):
        print('Initializing parameters of each layer in MLP...')
        for layer in self.update_layer_list:  #对每一层需要更新参数的引用
            layer.init_param() #初始化该层的参数

    def load_model(self, param_dir):
        print('Loading parameters from file ' + param_dir)
        params = np.load(param_dir).item()
        self.fc1.load_param(params['w1'], params['b1'])
        self.fc2.load_param(params['w2'], params['b2'])
        self.fc3.load_param(params['w3'], params['b3'])

    def save_model(self, param_dir):
        print('Saving parameters to file ' + param_dir)
        params = {}
        params['w1'], params['b1'] = self.fc1.save_param()
        params['w2'], params['b2'] = self.fc2.save_param()
        params['w3'], params['b3'] = self.fc3.save_param()
        np.save(param_dir, params)

    def forward(self, input):  # 神经网络的前向传播
        # TODO：神经网络的前向传播
        # h1 = self.fc1.forward(input)
        # h1 = self.relu1.forward(h1)
        # h2 = self.fc2.forward(h1)
        # h2 = self.relu2.forward(h2)
        # h3 = self.fc3.forward(h2)
        # prob = self.softmax.forward(h3)

        h1 = self.fc1.forward(input)
        h1 = self.sig1.forward(h1)
        h2 = self.fc2.forward(h1)
        h2 = self.sig2.forward(h2)
        h3 = self.fc3.forward(h2)
        prob = self.softmax.forward(h3)
        return prob

    def backward(self):  # 神经网络的反向传播
        # TODO：神经网络的反向传播
        # dloss = self.softmax.backward()
        # dh3 = self.fc3.backward(dloss)
        # dh2 = self.relu2.backward(dh3)
        # dh1 = self.fc2.backward(dh2)
        # dh1 = self.relu1.backward(dh1)
        # dh1 = self.fc1.backward(dh1)

        dloss = self.softmax.backward()
        dh3 = self.fc3.backward(dloss)
        dh2 = self.sig2.backward(dh3)
        dh1 = self.fc2.backward(dh2)
        dh1 = self.sig1.backward(dh1)
        dh1 = self.fc1.backward(dh1)

    def update(self, lr):
        for layer in self.update_layer_list:
            layer.update_param(lr)

    def train(self):
        max_batch = self.train_data.shape[0] / self.batch_size
        print('Start training...')
        for idx_epoch in range(self.max_epoch):
            self.shuffle_data()
            for idx_batch in range(int(max_batch)):
                batch_images = self.train_data[idx_batch*self.batch_size:(idx_batch+1)*self.batch_size, :-1]
                batch_labels = self.train_data[idx_batch*self.batch_size:(idx_batch+1)*self.batch_size, -1]
                prob = self.forward(batch_images)
                loss = self.softmax.get_loss(batch_labels)
                self.backward()
                self.update(self.lr)
                if idx_batch % self.print_iter == 0:
                    print('Epoch %d, iter %d, loss: %.6f' % (idx_epoch, idx_batch, loss))

    def evaluate(self):
        pred_results = np.zeros([self.test_data.shape[0]])
        #for idx in range(self.test_data.shape[0] / self.batch_size):  #range()函数要求其参数必须是整数。当self.test_data.shape[0]不能被self.batch_size整除时，除法结果会是一个浮点数，从而导致TypeError。
        for idx in range(int(self.test_data.shape[0] / self.batch_size)): #使用int向下取整
            batch_images = self.test_data[idx * self.batch_size:(idx + 1) * self.batch_size, :-1]
            start = time.time()
            prob = self.forward(batch_images)
            end = time.time()
            print("inferencing time: %f" % (end - start))
            pred_labels = np.argmax(prob, axis=1)
            pred_results[idx * self.batch_size:(idx + 1) * self.batch_size] = pred_labels
        accuracy = np.mean(pred_results == self.test_data[:, -1])
        print('Accuracy in test set: %f' % accuracy)

        def save_model(self, filename=None):
            params = {}
            params['w1'], params['b1'] = self.fc1.save_param()
            params['w2'], params['b2'] = self.fc2.save_param()
            np.save(filename, params)
            print(f"Model saved to {filename}.")

def build_mnist_mlp(param_dir='weight.npy'):
    h1, h2, e = 1000, 640, 10
    mlp = MNIST_MLP(hidden1=h1, hidden2=h2, max_epoch=e)
    mlp.load_data()
    mlp.build_model()
    mlp.init_model()
    mlp.train()
    mlp.save_model('mlp-%d-%d-%depoch.npy' % (h1, h2, e))
    # mlp.load_model('mlp-%d-%d-%depoch.npy' % (h1, h2, e))
    return mlp

if __name__ == '__main__':
    mlp = build_mnist_mlp()
    mlp.evaluate()
    # 显示第i个数字的识别效果，例如i=5
    i = 5
    mlp.display_prediction(i)

layer1.py

# coding=utf-8
import numpy as np
import struct
import os
import time


class FullyConnectedLayer(object):
    def __init__(self, num_input, num_output):  # 全连接层初始化
        self.num_input = num_input
        self.num_output = num_output
        print('\tFully connected layer with input %d, output %d.' % (self.num_input, self.num_output))
    def init_param(self, std=0.01):  # 参数初始化  初始化权重时使用的标准差，默认值为0.01
        self.weight = np.random.normal(loc=0.0, scale=std, size=(self.num_input, self.num_output))
        self.bias = np.zeros([1, self.num_output])
    def forward(self, input):  # 前向传播计算
        start_time = time.time()
        self.input = input
        # TODO：全连接层的前向传播，计算输出结果
        self.output = np.dot(self.input, self.weight) + self.bias
        return self.output
    def backward(self, top_diff):  # 反向传播的计算 “正向传播”求损失，“反向传播”回传误差
        # TODO：全连接层的反向传播，计算参数梯度和本层损失
        #self.d_weight = np.dot
        self.d_weight = np.dot(self.input.T, top_diff)
        self.d_bias = np.sum(top_diff, axis=0)
        bottom_diff = np.dot(top_diff, self.weight.T)
        return bottom_diff   #输入的误差信号
    def update_param(self, learning_rate):
        self.weight -= learning_rate * self.d_weight
        self.bias -= learning_rate * self.d_bias
    def save_param(self):
        return self.weight, self.bias
    # def save_param(self, filename=None):  #保存权重和偏差
    #     if filename is None:
    #         filename = f"fc_layer_params_{self.num_input}_{self.num_output}.npy"
    #     np.save(filename, {'weight': self.weight, 'bias': self.bias})
    #     print(f"Parameters saved to {filename}.")
#
# def show_time(param, param1):
#     pass
#

class ReLULayer(object): #大于0返回本身，小于0返回o0
    def __init__(self):
        print('\tReLU layer.')
    def forward(self, input):
        start_time = time.time()
        self.input = input
        in_mask = input <= 0
        # output = np.maximum(0, self.input)
        output = input.copy()
        output[in_mask] = 0
        return output
    def backward(self, top_diff):
        bottom_diff = top_diff.copy()
        bottom_diff[self.input < 0] = 0
        return bottom_diff

#新sigmoid

class SigmoidLayer(object):
    def __init__(self):
        self.output = None
        print('\tSigmod layer.')

    def forward(self, input):
        output = 1.0 / (1.0 + np.exp(-input))
        self.output = output

        return output

    # def backward(self, top_diff):
    #     bottom_diff = top_diff * (1.0 - self.output) * self.output
    #
    #     return bottom_diff
    def backward(self, top_diff):
        sigma = self.output
        bottom_diff = top_diff * sigma * (1 - sigma)
        return bottom_diff


class SoftmaxLossLayer(object):
    def __init__(self):
        print('\tSoftmax loss layer.')

    def forward(self, input):  # 前向传播的计算
        # TODO：softmax 损失层的前向传播，计算输出结果
        input_max = np.max(input, axis=1, keepdims=True)
        input_exp = np.exp(input - input_max)
        self.prob = input_exp / np.sum(input_exp, axis=1, keepdims=True)
        return self.prob

    def get_loss(self, label):   # 计算损失
        self.batch_size = self.prob.shape[0]
        self.label_onehot = np.zeros_like(self.prob)
        self.label_onehot[np.arange(self.batch_size), label] = 1.0
        loss = -np.sum(np.log(self.prob) * self.label_onehot) / self.batch_size
        return loss

    def backward(self):  # 反向传播的计算
        # TODO：softmax 损失层的反向传播，计算本层损失
        bottom_diff = self.prob - self.label_onehot
        return bottom_diff