机器学习之用bp神经网络实现手写数字识别

最新推荐文章于 2023-11-29 22:11:59 发布

yingxin~

最新推荐文章于 2023-11-29 22:11:59 发布

阅读量1.2k

点赞数 1

分类专栏：机器学习文章标签： python 神经网络深度学习机器学习人工智能

本文链接：https://blog.csdn.net/qq_41665310/article/details/110732805

版权

机器学习专栏收录该内容

2 篇文章 0 订阅

订阅专栏

机器学习之用bp神经网络实现手写数字识别

代码如下

# 使用三层bp神经网络采用梯度下降法训练数字手写识别
import scipy.io as sio
import numpy as np
from sklearn.metrics import classification_report

#####################################
#        计算给定theta下的输出         #
def feedforward(theta1,theta2,X):
    X = np.insert(X, 0, 1, axis=1)  # 添加输入层的偏置单元
    a1 = X
    z2 = X.dot(theta1.T)
    a2 = 1 / (1 + np.exp(-z2))
    a2 = np.insert(a2, 0, 1, axis=1)  # 添加隐藏层的偏置单元
    z3 = a2.dot(theta2.T)
    a3 = 1 / (1 + np.exp(-z3))
    return a1,a2,a3,z2,z3
#        计算给定theta下的输出         #
#####################################

#####################################
# 通过误差反向传播算法来计算第一步的梯度值 #
def Back_Propagation(a1, a2, a3, z2, Ymatrix,theta1_train,theta2_train):
    d3 = a3 - Ymatrix  # 对应于每一个神经元
    D2 = (1 / m) * d3.T.dot(a2) + (l / m) * theta2_train  # 对应于连接权值
    d2 = d3.dot(theta2_train)[...,1:] * (1/(1+np.exp(-z2)))*(1-1/(1+np.exp(-z2)))
    D1 = (1 / m) * d2.T.dot(a1) + (l / m) * theta1_train
    return D1,D2
# 通过误差反向传播算法来计算第一步的梯度值 #
#####################################

#####################################
#        计算正规化之后的代价函数        #
def regularized_cost(theta1,theta2,a3,Ymatrix):

    theta1_cost = theta1[...,1:]
    theta2_cost = theta2[..., 1:]
    np.seterr(divide='ignore', invalid='ignore')
    part1 = np.mean(np.sum((-Ymatrix) * np.log(a3) - (1 - Ymatrix) * np.log(1 -a3), axis=1))
    part2 = (l / (2 * m)) * (np.sum(theta1_cost * theta1_cost) + np.sum(theta2_cost * theta2_cost))
    regularized_cost = part1 +part2
    return regularized_cost
#        计算正规化之后的代价函数        #
#####################################






if __name__ == '__main__':
    data = sio.loadmat('ex3data1.mat')
    theta = sio.loadmat('ex3weights.mat')
    theta1 = theta['Theta1']  # 输入层到隐藏层的权值矩阵 (25,401)
    theta2 = theta['Theta2']  # 隐藏层到输出层的权值矩阵 (10,26)
    X = data['X']
    Y = data['y']
    m = X.shape[0]
    l = 1


    ##        将Y转换为矩阵Ymatrix        ##
    i_temp = np.zeros((1, 10))
    Ymatrix = False
    for i in range(5000):
        i_temp[..., Y[i] - 1] = 1
        if type(Ymatrix) == bool:
            Ymatrix = i_temp
        else:
            Ymatrix = np.concatenate((Ymatrix, i_temp), axis=0)
        i_temp[..., Y[i] - 1] = 0
    ##                                  ##



    ##         初始化训练的权值矩阵         ##
    theta1_train = (np.random.rand(theta1.shape[0],theta1.shape[1]) - 0.5) * 2 * 0.12
    theta2_train = (np.random.rand(theta2.shape[0],theta2.shape[1]) - 0.5) * 2 * 0.12
    ##                                  ##



    ##            进行梯度下降            ##
    for i in range(3000):
        a1, a2, a3, z2, z3 = feedforward(theta1_train, theta2_train, X)  # 前向传播计算输出a和z
        cost = regularized_cost(theta1_train, theta2_train, a3, Ymatrix)
        D1, D2 = Back_Propagation(a1, a2, a3, z2, Ymatrix, theta1_train, theta2_train)  # 反向传播计算梯度D1和D2
        theta1_train -= D1
        theta2_train -= D2
        print(cost)
    ##                                  ##



    ##             打印结果              ##
    print('cost:',cost)
    print('theta1_train:',theta1_train)
    print('theta2_train:',theta2_train)
    p = np.zeros((1, 10))
    for i in a3:
        index = np.argmax(i)  # 找到每个输出向量中最大的那个数的索引值
        temp = np.zeros((1, 10))
        temp[0][index] = 1  # 将输出最大的这个位置置1
        p = np.concatenate((p, temp), axis=0)  # p的每一行为一个由0和1组成的输出向量
    p = p[1:]
    print(classification_report(Ymatrix, p))
    print('end')
    ##                                  ##