吴恩达机器学习python作业之神经网络的反向传播算法

abcd1233463457347

已于 2022-10-29 11:56:10 修改

阅读量663

点赞数

分类专栏：吴恩达机器学习作业文章标签：机器学习 python 神经网络

于 2022-10-29 11:55:24 首次发布

本文链接：https://blog.csdn.net/Ariya1234/article/details/127585158

版权

吴恩达机器学习作业专栏收录该内容

14 篇文章 0 订阅

订阅专栏

参考链接：吴恩达|机器学习作业4.0神经网络反向传播（BP算法）_学吧学吧终成学霸的博客-CSDN博客
进行了梯度检查与代价函数的检查，但由于电脑原因，最后的准确率部分一直没跑完，有时间再跑。

方法一：按照吴恩达上课视频中的ppt挨个实现

在这里插入图片描述
未正则化的反向传播函数与梯度检查函数之间的参数区别

正则化的反向传播函数与梯度检查函数之间的参数区别

#定义正则化的反向传播函数
def back_propagation_Regularzation(theta,x,y,lamda = 1):
    D_temp = back_propagation(theta,x,y)
    D1_temp,D2_temp = splitArray(D_temp)
    theta1_temp,theta2_temp = splitArray(theta)
    theta1_ = theta1_temp * 1.0
    theta2_ = theta2_temp *1.0
    theta1_[:,0] = 0
    theta2_[:,0] = 0
    D1_temp += lamda/m * theta1_
    D2_temp += lamda / m * theta2_
    return rolling(D1_temp,D2_temp)

在这里插入图片描述

这里有一个坑：根据python数据类型的特性，array类型属于不可变类型，在下面这个函数中修改了theta1_temp的具体数值后，theta会发生变化，从而导致后面进行梯度检测时，theta发生了变化，导致结果有问题。

#定义正则化的反向传播函数
def back_propagation_Regularzation(theta,x,y,lamda = 1):
    D_temp = back_propagation(theta,x,y)
    D1_temp,D2_temp = splitArray(D_temp)
    theta1_temp,theta2_temp = splitArray(theta)
    theta1_temp[:,0] = 0
    theta2_temp[:,0] = 0
    D1_temp += lamda/m * theta1_temp
    D2_temp += lamda / m * theta2_temp
    return rolling(D1_temp,D2_temp)

在这里插入图片描述

import numpy as np
from scipy import io
from scipy import optimize as op

#1.读取数据集并进行处理
dt1 = io.loadmat("E:\机器学习\吴恩达\data_sets\ex4data1.mat")
x = dt1["X"] #(5000, 400)
y = dt1["y"] #(5000, 1)
m = y.size #样本数


dt2 = io.loadmat("E:\机器学习\吴恩达\data_sets\ex4weights.mat")
#theta1表示输入层与隐藏层（第一层与第二层）之间的参数矩阵
theta1 = dt2["Theta1"] #(25, 401) size:10025
#theta2表示隐藏层与输出层（第二层与第三层）之间的参数矩阵
theta2 = dt2["Theta2"] #(10, 26) size:260



#-----------------------------------------------------------------------------------------
#2.定义输出层转换函数与代价函数
#定义sigmoid函数
def sigmoid(z):
    return 1/(1+np.exp(-z))


#根据输出的10个表示概率的数值找到最大的对应的数字
#(5000,10)->(5000,)
def estimatedY(y):
    length = y.shape[0]
    estimated_y = np.zeros(length)
    for i in range(length):
        estimated_y[i] = np.unravel_index(np.argmax(y[i,:]), y[i,:].shape)[0]+1
    return estimated_y



#根据每次给定的数生成对应的列向量
#(5000,)->(5000,10)
def convertY(y):
    length = y.size
    convert_y = np.zeros((length,10))
    for i in range(length):
        convert_y[i,y[i]-1] = 1
    return convert_y


#根据每一层的输入与该层的参数得出该层的输出
def layers(x,theta):
    length = x.shape[0]
    temp_x = np.ones((length,1))
    x = np.concatenate((temp_x,x),1)
    result = sigmoid(x@theta.T)
    return result

#根据自己的参数计算出对应的输出值(5000,10)
def calculate_prob(theta,x):
    theta1_temp,theta2_temp = splitArray(theta) #(25,401) (10,26)
    h1 = layers(x,theta1_temp) #(5000, 25)
    h2 = layers(h1,theta2_temp) #(5000, 10)
    return h2


#计算出未正则化的代价函数
def costFunction(theta,x,y):
    estimated_y = calculate_prob(theta,x)
    #estimated_y = calculate_prob(theta,x,y) #(5000, 10)
    convert_y = convertY(y) #(5000, 10)
    temp = np.multiply(convert_y,np.log(estimated_y)) + np.multiply((1 - convert_y) , np.log(1 - estimated_y))
    return -1/m*np.sum(temp)


#计算出正则化的代价函数
def costFunctionRegularzation(theta,x,y,lamda=1):
    theta1_temp,theta2_temp = splitArray(theta)
    theta1_cut = theta1_temp[:,1:]
    theta2_cut = theta2_temp[:,1:]
    #theta_i0不需要进行惩罚
    result1_temp = np.sum(theta1_cut * theta1_cut)
    result2_temp = np.sum(theta2_cut * theta2_cut)
    return costFunction(theta,x,y) + lamda / (2*m) * (result1_temp+result2_temp)


#-----------------------------------------------------------------------------------------------
#3.定义反向传播函数
#每一层只加一个样本
def addOne(x):
    temp = np.ones(1)
    x = np.concatenate((temp, x))
    return x

#定义计算每一行（每一个样本）在某一层输出的函数
def calcualte_layer(x,theta):
    x = addOne(x)
    result0 = x @ theta.T #(401,)*(25,401).T=(25,)
    result = sigmoid(result0) #(25,)
    return result

#将实际标签值转换为一个1*10的列向量
def calculate_y(m):
    cal_y =np.zeros(10)
    for i in range(10):
        if m == i+1:
            cal_y[i] = 1
    return cal_y

#将一维向量拆成两个矩阵(25, 401)与(10, 26)
def splitArray(p):
    return np.reshape(p[:theta1.size],(theta1.shape[0], theta1.shape[1])),np.reshape(p[theta1.size:],(theta2.shape[0], theta2.shape[1]))

#把两个矩阵合并为一个一维向量
def rolling(m1,m2):
    m1_roll = m1.ravel()
    m2_roll = m2.ravel()
    m_all = np.concatenate((m1_roll, m2_roll))
    return m_all


#定义未正则化的反向传播函数
def back_propagation(theta,x,y):
    theta1_reshape,theta2_reshape = splitArray(theta)
    #(25, 401)与(10, 26)

    gradient = np.zeros(theta1.size+theta2.size)
    gradient1,gradient2 = splitArray(gradient)
    #(25, 401)与(10, 26)

    for i in range(m):
        # 第一步；计算出每一层的delta，其中第三层的delta直接根据a3-y可得出

        # a1表示第一层的输出，即输入的x
        a1 = x[i,:] #(400,)


        # a2表示第二层经过sigmoid函数后的输出
        a2 = calcualte_layer(a1,theta1_reshape) #(25,)


        # a3表示第三层经过sigmoid函数后的输出
        a3 = calcualte_layer(a2,theta2_reshape) #(10,)

        convert_y = calculate_y(y[i])


        # delta3表示代价函数对第三层输出层的偏导
        delta3 = a3 - convert_y #(10,)

        #delta2表示代价函数对第二层输出层的偏导
        delta2 = (delta3 @ theta2_reshape[:,1:]) * (a2*(1-a2)) #(25,)
        #array类型中，*表示点对点相乘

        #---------------------------------------------------------------------------------
        #第二步：计算出每一层的参数的梯度值
        a1 = addOne(a1)
        a2 = addOne(a2)

        #计算出隐藏层与输出层之间参数theta_2的梯度矩阵(10,26)
        delta3 = np.mat(delta3).T
        delta3 = np.array(delta3)
        gradient2 = gradient2 + delta3 * a2

        # 计算出输入层与隐藏层之间参数theta_1的梯度矩阵(25,401)
        delta2 = np.mat(delta2).T
        delta2 = np.array(delta2)
        gradient1 = gradient1 + delta2 * a1

    gradient1 = 1/m * gradient1
    gradient2 = 1/m * gradient2
    return rolling(gradient1,gradient2)


#定义未正则化的梯度检查函数
def gradientCheck(theta,x,y):
    gradCheck = np.zeros(theta.shape)
    iterCheck = np.zeros(theta.shape)
    eplison = 1e-4
    for i in range(len(theta)):
        iterCheck[i] = eplison
        cost2 = costFunction(theta + iterCheck , x,y)
        cost1 = costFunction(theta - iterCheck , x,y)
        gradCheck[i] = (cost2 - cost1)/(2*eplison)
        iterCheck[i] = 0
    return gradCheck


#梯度检查
# predict_gradient = back_propagation(theta_all, x, y) #(10285,)
# print(predict_gradient)
# num_gradient = gradientCheck(theta_all,x,y) #(10285,)
# np.savetxt("abc.txt",num_gradient,fmt="%d",delimiter=" ")
# print(num_gradient)
# #向量差的二范数与向量和的二范数的比值
# diff = np.linalg.norm(num_gradient-predict_gradient)/np.linalg.norm(num_gradient+predict_gradient)
# print(diff) #数量级为e-09，这里结果为2.1448374139731396e-09



#定义正则化的反向传播函数
def back_propagation_Regularzation(theta,x,y,lamda = 1):
    D_temp = back_propagation(theta,x,y)
    D1_temp,D2_temp = splitArray(D_temp)
    theta1_temp,theta2_temp = splitArray(theta)
    theta1_ = theta1_temp * 1.0
    theta2_ = theta2_temp *1.0
    theta1_[:,0] = 0
    theta2_[:,0] = 0
    D1_temp += lamda/m * theta1_
    D2_temp += lamda / m * theta2_
    return rolling(D1_temp,D2_temp)


#定义正则化的梯度检查函数
def gradientCheckRegularzation(theta,x,y,lamda=1):
    gradCheck = np.zeros(theta.shape)
    iterCheck = np.zeros(theta.shape)
    eplison = 1e-4
    for i in range(len(theta)):
        iterCheck[i] = eplison
        cost2 = costFunctionRegularzation(theta + iterCheck , x,y,lamda)
        cost1 = costFunctionRegularzation(theta - iterCheck , x,y,lamda)
        gradCheck[i] = (cost2 - cost1)/(2*eplison)
        iterCheck[i] = 0
    return gradCheck



theta_all = rolling(theta1,theta2)
#
# #正则化后的梯度检查
# predict_gradient = back_propagation_Regularzation(theta_all, x, y) #(10285,)
# print(predict_gradient)
# num_gradient = gradientCheckRegularzation(theta_all,x,y) #(10285,)
# print(num_gradient)
# #向量差的二范数与向量和的二范数的比值
# diff = np.linalg.norm(num_gradient-predict_gradient)/np.linalg.norm(num_gradient+predict_gradient)
# print(diff) #还是e-9


#----------------------------------------------------------------------------------
#下面这些还没来得及跑完
eplison = 0.1
theta_init = np.random.uniform(-eplison,eplison,(10285,))
#进行参数优化
result = op.fmin_ncg(f = costFunctionRegularzation,fprime=back_propagation_Regularzation,x0 = theta_init,args=(x,y),maxiter = 400)

result_train = calculate_prob(result , x)
print(result_train)


def calculateAccuracy(estimated_y,y):
    extend_y = estimatedY(estimated_y)
    sum = y.size
    count = 0
    for i in range(sum):
        if extend_y[i] == y[i]:
            count +=1
    print("accuracy is:",count/sum)

calculateAccuracy(result_train,y)

方法二：矩阵实现

未正则化的反向传播函数与梯度检查函数之间的参数区别
在这里插入图片描述
正则化的反向传播函数与梯度检查函数之间的参数区别

import numpy as np
from scipy import io
from scipy import optimize as op

#1.读取数据集并进行处理
dt1 = io.loadmat("E:\机器学习\吴恩达\data_sets\ex4data1.mat")
x = dt1["X"] #(5000, 400)
y = dt1["y"] #(5000, 1)
m = y.size #样本数


dt2 = io.loadmat("E:\机器学习\吴恩达\data_sets\ex4weights.mat")
#theta1表示输入层与隐藏层（第一层与第二层）之间的参数矩阵
theta1 = dt2["Theta1"] #(25, 401) size:10025
#theta2表示隐藏层与输出层（第二层与第三层）之间的参数矩阵
theta2 = dt2["Theta2"] #(10, 26) size:260




#-----------------------------------------------------------------------------------------
#2.定义输出层转换函数与代价函数
#定义sigmoid函数
def sigmoid(z):
    return 1/(1+np.exp(-z))


#根据输出的10个表示概率的数值找到最大的对应的数字
#(5000,10)->(5000,)
def estimatedY(y):
    length = y.shape[0]
    estimated_y = np.zeros(length)
    for i in range(length):
        estimated_y[i] = np.unravel_index(np.argmax(y[i,:]), y[i,:].shape)[0]+1
    return estimated_y



#根据每次给定的数生成对应的列向量
#(5000,)->(5000,10)
def convertY(y):
    length = y.size
    convert_y = np.zeros((length,10))
    for i in range(length):
        convert_y[i,y[i]-1] = 1
    return convert_y


#给矩阵多加一列全是1的元素
def addOne(x):
    length = x.shape[0]
    temp_x = np.ones((length, 1))
    x = np.concatenate((temp_x, x), 1)
    return x


#根据每一层的输入与该层的参数得出该层的输出
def layers(x,theta):
    x = addOne(x)
    result = sigmoid(x@theta.T)
    return result

#根据自己的参数计算出对应的输出值(5000,10)
def calculate_prob(theta,x):
    theta1_temp,theta2_temp = splitArray(theta) #(25,401) (10,26)
    h1 = layers(x,theta1_temp) #(5000, 25)
    h2 = layers(h1,theta2_temp) #(5000, 10)
    return h2


#计算出未正则化的代价函数
def costFunction(theta,x,y):
    estimated_y = calculate_prob(theta,x)
    #estimated_y = calculate_prob(theta,x,y) #(5000, 10)
    convert_y = convertY(y) #(5000, 10)
    temp = np.multiply(convert_y,np.log(estimated_y)) + np.multiply((1 - convert_y) , np.log(1 - estimated_y))
    return -1/m*np.sum(temp)


#计算出正则化的代价函数
def costFunctionRegularzation(theta,x,y,lamda=1):
    theta1_temp,theta2_temp = splitArray(theta)
    theta1_cut = theta1_temp[:,1:]
    theta2_cut = theta2_temp[:,1:]
    #theta_i0不需要进行惩罚
    result1_temp = np.sum(theta1_cut * theta1_cut)
    result2_temp = np.sum(theta2_cut * theta2_cut)
    return costFunction(theta,x,y) + lamda / (2*m) * (result1_temp+result2_temp)


#-----------------------------------------------------------------------------------------------
#3.定义前向传播函数与反向传播函数
#定义前向传播函数
def forward_propagation(theta,x):
    theta1_temp , theta2_temp = splitArray(theta)
    a1 = x
    a2 = layers(a1,theta1_temp)
    a3 = layers(a2,theta2_temp)
    return a1,a2,a3

#将一维向量拆成两个矩阵(25, 401)与(10, 26)
def splitArray(p):
    return np.reshape(p[:theta1.size],(theta1.shape[0], theta1.shape[1])),np.reshape(p[theta1.size:],(theta2.shape[0], theta2.shape[1]))

#把两个矩阵合并为一个一维向量
def rolling(m1,m2):
    m1_roll = m1.ravel()
    m2_roll = m2.ravel()
    m_all = np.concatenate((m1_roll, m2_roll))
    return m_all


#定义未正则化的反向传播函数
def back_propagation(theta,x,y):
    theta1_reshape,theta2_reshape = splitArray(theta)
    #(25, 401)与(10, 26)

    gradient = np.zeros(theta1.size+theta2.size)
    gradient1,gradient2 = splitArray(gradient)
    #(25, 401)与(10, 26)

    a1,a2,a3 = forward_propagation(theta,x)
    #a1 (5000, 400)
    #a2 (5000, 25)
    #a3 (5000, 10)
    convert_y = convertY(y) #(5000,10)

    # delta3表示代价函数对第三层输出层的偏导
    delta3 = a3 - convert_y #(5000,10)

    #delta2表示代价函数对第二层输出层的偏导
    delta2 = (delta3 @ theta2_reshape[:,1:]) * (a2*(1-a2)) #(5000, 25)
    #array类型中，*表示点对点相乘

    #---------------------------------------------------------------------------------
    #第二步：计算出每一层的参数的梯度值
    a1 = addOne(a1) #(5000, 401)
    a2 = addOne(a2) #(5000, 26)


    #计算出隐藏层与输出层之间参数theta_2的梯度矩阵(10,26)
    gradient2 = delta3.T @ a2 #(10, 26)

    # 计算出输入层与隐藏层之间参数theta_1的梯度矩阵(25,401)
    gradient1 = delta2.T @ a1

    gradient1 = 1/m * gradient1
    gradient2 = 1/m * gradient2
    return rolling(gradient1,gradient2)


#定义未正则化的梯度检查函数
def gradientCheck(theta,x,y):
    gradCheck = np.zeros(theta.shape)
    iterCheck = np.zeros(theta.shape)
    eplison = 1e-4
    for i in range(len(theta)):
        iterCheck[i] = eplison
        cost2 = costFunction(theta + iterCheck , x,y)
        cost1 = costFunction(theta - iterCheck , x,y)
        gradCheck[i] = (cost2 - cost1)/(2*eplison)
        iterCheck[i] = 0
    return gradCheck


"""theta_all = rolling(theta1,theta2)
#梯度检查
predict_gradient = back_propagation(theta_all, x, y) #(10285,)
print(predict_gradient)
num_gradient = gradientCheck(theta_all,x,y) #(10285,)
np.savetxt("abc.txt",num_gradient,fmt="%d",delimiter=" ")
print(num_gradient)
#向量差的二范数与向量和的二范数的比值
diff = np.linalg.norm(num_gradient-predict_gradient)/np.linalg.norm(num_gradient+predict_gradient)
print(diff) #数量级为e-09，这里结果为2.1515337653638197e-09"""



#定义正则化的反向传播函数
def back_propagation_Regularzation(theta,x,y,lamda = 1):
    D_temp = back_propagation(theta,x,y)
    D1_temp,D2_temp = splitArray(D_temp)
    theta1_temp,theta2_temp = splitArray(theta)
    theta1_ = theta1_temp * 1.0
    theta2_ = theta2_temp *1.0
    theta1_[:,0] = 0
    theta2_[:,0] = 0
    D1_temp += lamda/m * theta1_
    D2_temp += lamda / m * theta2_
    return rolling(D1_temp,D2_temp)


#定义正则化的梯度检查函数
def gradientCheckRegularzation(theta,x,y,lamda=1):
    gradCheck = np.zeros(theta.shape)
    iterCheck = np.zeros(theta.shape)
    eplison = 1e-4
    for i in range(len(theta)):
        iterCheck[i] = eplison
        cost2 = costFunctionRegularzation(theta + iterCheck , x,y,lamda)
        cost1 = costFunctionRegularzation(theta - iterCheck , x,y,lamda)
        gradCheck[i] = (cost2 - cost1)/(2*eplison)
        iterCheck[i] = 0
    return gradCheck


"""
theta_all = rolling(theta1,theta2)

#正则化后的梯度检查
predict_gradient = back_propagation_Regularzation(theta_all, x, y) #(10285,)
print(predict_gradient)
num_gradient = gradientCheckRegularzation(theta_all,x,y) #(10285,)
print(num_gradient)
#向量差的二范数与向量和的二范数的比值
diff = np.linalg.norm(num_gradient-predict_gradient)/np.linalg.norm(num_gradient+predict_gradient)
print(diff) #还是e-9"""


#————————————————————————————————————————————————————————----------
#还没跑的出来
def randIniWeights(size, epsilon):
    return np.random.uniform(-epsilon, epsilon, size)

theta_init = randIniWeights((10285,), 0.12)
#进行参数优化
result = op.fmin_ncg(f = costFunctionRegularzation,fprime=back_propagation_Regularzation,x0 = theta_init,args=(x,y),maxiter = 400)

result_train = calculate_prob(result , x)
print(result_train)


def calculateAccuracy(estimated_y,y):
    extend_y = estimatedY(estimated_y)
    sum = y.size
    count = 0
    for i in range(sum):
        if extend_y[i] == y[i]:
            count +=1
    print("accuracy is:",count/sum)

calculateAccuracy(result_train,y)