numpy实现BP神经网络,对MNIST数据集进行测试

Wang121201

已于 2022-05-22 13:30:03 修改

阅读量888

点赞数 1

分类专栏：测试代码文章标签：神经网络 python 分类

于 2022-05-22 12:28:09 首次发布

本文链接：https://blog.csdn.net/Wang121201/article/details/124909051

版权

测试代码专栏收录该内容

3 篇文章 0 订阅

订阅专栏

使用numpy实现BP神经网络,并对MNIST数据集进行测试.

模型说明

直接读取本地文件获得数据集
数据集使用包含60000个图像的文件,可以设置
单隐层神经网络,可以调整隐层神经元个数
激活函数:sigmoid
更新模型参数的原理可以参考西瓜书
初始化模型参数权重:参数正态分布
没有考虑偏置,西瓜书里面的阈值

import matplotlib.pyplot as plt
import numpy as np
import os

# 激活函数
def sigmoid(x):
    return 1/(1+np.exp(-x))

# 随机显示数据集图像和标签
def showDataSet(dataSet,dataLabel):
    plt.figure(figsize=(12,12))
    for i in range(16):
        index = np.random.randint(0,len(dataSet))
        plt.subplot(4,4,i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(dataSet[index], cmap=plt.cm.binary)
        plt.xlabel(dataLabel[index])
    plt.show()

def loadData(path = './data/MNIST/raw'):
    # 读取数据分别对应60000和10000的数据集
    # dataImg = np.fromfile(open(os.path.join(path, 't10k-images-idx3-ubyte')),dtype=np.uint8)
    # dataLabel = np.fromfile(open(os.path.join(path, 't10k-labels-idx1-ubyte')),dtype=np.uint8)
    dataImg = np.fromfile(open(os.path.join(path, 'train-images-idx3-ubyte')),dtype=np.uint8)
    dataLabel = np.fromfile(open(os.path.join(path, 'train-labels-idx1-ubyte')),dtype=np.uint8)
    # 前几位数据无意义,对数据类型进行转换
    dataLabel = dataLabel[8:].astype(np.int)
    dataImg = dataImg[16:].reshape(60000,28,28).astype(np.int)
    return dataImg,dataLabel

def BP_Mnist(dataSet,dataLabel, train_rate = 0.8,iterate = 50000, learning_rate = 2 ,hiden_layer = 100):
    # 根据train_rate对训练数据和测试数据进行划分
    split = [train_rate * len(dataSet), train_rate * len(dataLabel)]
    trainSet, testSet = dataSet[:int(split[0])], dataSet[int(split[1]):]
    trainLabel, testLabel = dataLabel[:int(split[0])], dataLabel[int(split[1]):]
    # 输出的大小
    labelsize = len(set(testLabel))
    # 输入维度28*28，输出维度10，即10个数字
    input_layer = len(dataSet[0])
    out_layer = labelsize
    # 标签二值化，参考sklearn的函数，直接使用n维单位矩阵就可以
    label_code = np.identity(labelsize, dtype=int)
    # 初始化模型权重，不同的初始化方式对模型收敛的速度和收敛的效果影响很大
    # hiden_Weight = np.random.randn(hiden_layer,input_layer)
    # out_Weight = np.random.randn(out_layer, hiden_layer)
    hiden_Weight = np.random.randn(hiden_layer,input_layer)/np.sqrt(hiden_layer/2.0)
    out_Weight = np.random.randn(out_layer, hiden_layer)/np.sqrt(out_layer/2.0)
    # 记录模型的准确度
    accuracys=[]
    for n in range(iterate):
        # 数据集中随机选择一个作为模型输入
        index = np.random.randint(0,len(dataSet))
        xinput = np.array(dataSet[index])
        # 将模型的输出转为二值标签即对应的10个输出
        yout = np.array(label_code[dataLabel[index]])
        # 计算隐藏层输出
        hiden = sigmoid(np.dot(xinput,hiden_Weight.T))
        # 计算输出层的输出
        out = sigmoid(np.dot(hiden,out_Weight.T))
        # 最小化损失函数
        # error = sum((out-yout)**2)
        # 更新模型参数
        g_j = out*(1-out)*(yout-out)
        e_h = hiden*(1-hiden)*(g_j.dot(out_Weight))
        hiden_Weight += learning_rate*e_h.reshape(-1,1)*xinput
        out_Weight += learning_rate*g_j.reshape(-1,1)*hiden
        # 训练1000次,计算一次模型准确度
        if n%1000==0:
            # 预测的模型输出
            pre_hide = sigmoid(np.dot(testSet,hiden_Weight.T))
            pr_out = sigmoid(np.dot(pre_hide,out_Weight.T))
            predictions = np.argmax(pr_out,axis=1)
            # 模型准确度
            acc = np.mean(np.equal(predictions,testLabel))
            accuracys.append(acc)
            print('steps:',n,'accuracy:',acc)
    print('max accuracy: ',max(accuracys))
    plt.plot(accuracys)
    plt.show()

# 加载数据集
dataSet,dataLabel = loadData()
# 随机展示几张图像和标签
showDataSet(dataSet,dataLabel)
# 将图像展为一维数据,像素值转为0-1之间
dataSet = dataSet.reshape(dataSet.shape[0],28*28)/255.0
# 进行模型训练
np.random.seed(0)
BP_Mnist(dataSet,dataLabel,train_rate=0.8,iterate=50000,learning_rate=1,hiden_layer=100)