吴恩达机器学习python作业之神经网络的反向传播算法

参考链接:吴恩达|机器学习作业4.0神经网络反向传播(BP算法)_学吧学吧终成学霸的博客-CSDN博客
进行了梯度检查与代价函数的检查,但由于电脑原因,最后的准确率部分一直没跑完,有时间再跑。

方法一:按照吴恩达上课视频中的ppt挨个实现

在这里插入图片描述
未正则化的反向传播函数与梯度检查函数之间的参数区别
在这里插入图片描述

正则化的反向传播函数与梯度检查函数之间的参数区别

#定义正则化的反向传播函数
def back_propagation_Regularzation(theta,x,y,lamda = 1):
    D_temp = back_propagation(theta,x,y)
    D1_temp,D2_temp = splitArray(D_temp)
    theta1_temp,theta2_temp = splitArray(theta)
    theta1_ = theta1_temp * 1.0
    theta2_ = theta2_temp *1.0
    theta1_[:,0] = 0
    theta2_[:,0] = 0
    D1_temp += lamda/m * theta1_
    D2_temp += lamda / m * theta2_
    return rolling(D1_temp,D2_temp)

在这里插入图片描述

这里有一个坑:根据python数据类型的特性,array类型属于不可变类型,在下面这个函数中修改了theta1_temp的具体数值后,theta会发生变化,从而导致后面进行梯度检测时,theta发生了变化,导致结果有问题。

#定义正则化的反向传播函数
def back_propagation_Regularzation(theta,x,y,lamda = 1):
    D_temp = back_propagation(theta,x,y)
    D1_temp,D2_temp = splitArray(D_temp)
    theta1_temp,theta2_temp = splitArray(theta)
    theta1_temp[:,0] = 0
    theta2_temp[:,0] = 0
    D1_temp += lamda/m * theta1_temp
    D2_temp += lamda / m * theta2_temp
    return rolling(D1_temp,D2_temp)

在这里插入图片描述

import numpy as np
from scipy import io
from scipy import optimize as op

#1.读取数据集并进行处理
dt1 = io.loadmat("E:\机器学习\吴恩达\data_sets\ex4data1.mat")
x = dt1["X"] #(5000, 400)
y = dt1["y"] #(5000, 1)
m = y.size #样本数


dt2 = io.loadmat("E:\机器学习\吴恩达\data_sets\ex4weights.mat")
#theta1表示输入层与隐藏层(第一层与第二层)之间的参数矩阵
theta1 = dt2["Theta1"] #(25, 401) size:10025
#theta2表示隐藏层与输出层(第二层与第三层)之间的参数矩阵
theta2 = dt2["Theta2"] #(10, 26) size:260



#-----------------------------------------------------------------------------------------
#2.定义输出层转换函数与代价函数
#定义sigmoid函数
def sigmoid(z):
    return 1/(1+np.exp(-z))


#根据输出的10个表示概率的数值找到最大的对应的数字
#(5000,10)->(5000,)
def estimatedY(y):
    length = y.shape[0]
    estimated_y = np.zeros(length)
    for i in range(length):
        estimated_y[i] = np.unravel_index(np.argmax(y[i,:]), y[i,:].shape)[0]+1
    return estimated_y



#根据每次给定的数生成对应的列向量
#(5000,)->(5000,10)
def convertY(y):
    length = y.size
    convert_y = np.zeros((length,10))
    for i in range(length):
        convert_y[i,y[i]-1] = 1
    return convert_y


#根据每一层的输入与该层的参数得出该层的输出
def layers(x,theta):
    length = x.shape[0]
    temp_x = np.ones((length,1))
    x = np.concatenate((temp_x,x),1)
    result = sigmoid(x@theta.T)
    return result

#根据自己的参数计算出对应的输出值(5000,10)
def calculate_prob(theta,x):
    theta1_temp,theta2_temp = splitArray(theta) #(25,401) (10,26)
    h1 = layers(x,theta1_temp) #(5000, 25)
    h2 = layers(h1,theta2_temp) #(5000, 10)
    return h2


#计算出未正则化的代价函数
def costFunction(theta,x,y):
    estimated_y = calculate_prob(theta,x)
    #estimated_y = calculate_prob(theta,x,y) #(5000, 10)
    convert_y = convertY(y) #(5000, 10)
    temp = np.multiply(convert_y,np.log(estimated_y)) + np.multiply((1 - convert_y) , np.log(1 - estimated_y))
    return -1/m*np.sum(temp)


#计算出正则化的代价函数
def costFunctionRegularzation(theta,x,y,lamda=1):
    theta1_temp,theta2_temp = splitArray(theta)
    theta1_cut = theta1_temp[:,1:]
    theta2_cut = theta2_temp[:,1:]
    #theta_i0不需要进行惩罚
    result1_temp = np.sum(theta1_cut * theta1_cut)
    result2_temp = np.sum(theta2_cut * theta2_cut)
    return costFunction(theta,x,y) + lamda / (2*m) * (result1_temp+result2_temp)


#-----------------------------------------------------------------------------------------------
#3.定义反向传播函数
#每一层只加一个样本
def addOne(x):
    temp = np.ones(1)
    x = np.concatenate((temp, x))
    return x

#定义计算每一行(每一个样本)在某一层输出的函数
def calcualte_layer(x,theta):
    x = addOne(x)
    result0 = x @ theta.T #(401,)*(25,401).T=(25,)
    result = sigmoid(result0) #(25,)
    return result

#将实际标签值转换为一个1*10的列向量
def calculate_y(m):
    cal_y =np.zeros(10)
    for i in range(10):
        if m == i+1:
            cal_y[i] = 1
    return cal_y

#将一维向量拆成两个矩阵(25, 401)与(10, 26)
def splitArray(p):
    return np.reshape(p[:theta1.size],(theta1.shape[0], theta1.shape[1])),np.reshape(p[theta1.size:],(theta2.shape[0], theta2.shape[1]))

#把两个矩阵合并为一个一维向量
def rolling(m1,m2):
    m1_roll = m1.ravel()
    m2_roll = m2.ravel()
    m_all = np.concatenate((m1_roll, m2_roll))
    return m_all


#定义未正则化的反向传播函数
def back_propagation(theta,x,y):
    theta1_reshape,theta2_reshape = splitArray(theta)
    #(25, 401)与(10, 26)

    gradient = np.zeros(theta1.size+theta2.size)
    gradient1,gradient2 = splitArray(gradient)
    #(25, 401)与(10, 26)

    for i in range(m):
        # 第一步;计算出每一层的delta,其中第三层的delta直接根据a3-y可得出

        # a1表示第一层的输出,即输入的x
        a1 = x[i,:] #(400,)


        # a2表示第二层经过sigmoid函数后的输出
        a2 = calcualte_layer(a1,theta1_reshape) #(25,)


        # a3表示第三层经过sigmoid函数后的输出
        a3 = calcualte_layer(a2,theta2_reshape) #(10,)

        convert_y = calculate_y(y[i])


        # delta3表示代价函数对第三层输出层的偏导
        delta3 = a3 - convert_y #(10,)

        #delta2表示代价函数对第二层输出层的偏导
        delta2 = (delta3 @ theta2_reshape[:,1:]) * (a2*(1-a2)) #(25,)
        #array类型中,*表示点对点相乘

        #---------------------------------------------------------------------------------
        #第二步:计算出每一层的参数的梯度值
        a1 = addOne(a1)
        a2 = addOne(a2)

        #计算出隐藏层与输出层之间参数theta_2的梯度矩阵(10,26)
        delta3 = np.mat(delta3).T
        delta3 = np.array(delta3)
        gradient2 = gradient2 + delta3 * a2

        # 计算出输入层与隐藏层之间参数theta_1的梯度矩阵(25,401)
        delta2 = np.mat(delta2).T
        delta2 = np.array(delta2)
        gradient1 = gradient1 + delta2 * a1

    gradient1 = 1/m * gradient1
    gradient2 = 1/m * gradient2
    return rolling(gradient1,gradient2)


#定义未正则化的梯度检查函数
def gradientCheck(theta,x,y):
    gradCheck = np.zeros(theta.shape)
    iterCheck = np.zeros(theta.shape)
    eplison = 1e-4
    for i in range(len(theta)):
        iterCheck[i] = eplison
        cost2 = costFunction(theta + iterCheck , x,y)
        cost1 = costFunction(theta - iterCheck , x,y)
        gradCheck[i] = (cost2 - cost1)/(2*eplison)
        iterCheck[i] = 0
    return gradCheck


#梯度检查
# predict_gradient = back_propagation(theta_all, x, y) #(10285,)
# print(predict_gradient)
# num_gradient = gradientCheck(theta_all,x,y) #(10285,)
# np.savetxt("abc.txt",num_gradient,fmt="%d",delimiter=" ")
# print(num_gradient)
# #向量差的二范数与向量和的二范数的比值
# diff = np.linalg.norm(num_gradient-predict_gradient)/np.linalg.norm(num_gradient+predict_gradient)
# print(diff) #数量级为e-09,这里结果为2.1448374139731396e-09



#定义正则化的反向传播函数
def back_propagation_Regularzation(theta,x,y,lamda = 1):
    D_temp = back_propagation(theta,x,y)
    D1_temp,D2_temp = splitArray(D_temp)
    theta1_temp,theta2_temp = splitArray(theta)
    theta1_ = theta1_temp * 1.0
    theta2_ = theta2_temp *1.0
    theta1_[:,0] = 0
    theta2_[:,0] = 0
    D1_temp += lamda/m * theta1_
    D2_temp += lamda / m * theta2_
    return rolling(D1_temp,D2_temp)


#定义正则化的梯度检查函数
def gradientCheckRegularzation(theta,x,y,lamda=1):
    gradCheck = np.zeros(theta.shape)
    iterCheck = np.zeros(theta.shape)
    eplison = 1e-4
    for i in range(len(theta)):
        iterCheck[i] = eplison
        cost2 = costFunctionRegularzation(theta + iterCheck , x,y,lamda)
        cost1 = costFunctionRegularzation(theta - iterCheck , x,y,lamda)
        gradCheck[i] = (cost2 - cost1)/(2*eplison)
        iterCheck[i] = 0
    return gradCheck



theta_all = rolling(theta1,theta2)
#
# #正则化后的梯度检查
# predict_gradient = back_propagation_Regularzation(theta_all, x, y) #(10285,)
# print(predict_gradient)
# num_gradient = gradientCheckRegularzation(theta_all,x,y) #(10285,)
# print(num_gradient)
# #向量差的二范数与向量和的二范数的比值
# diff = np.linalg.norm(num_gradient-predict_gradient)/np.linalg.norm(num_gradient+predict_gradient)
# print(diff) #还是e-9


#----------------------------------------------------------------------------------
#下面这些还没来得及跑完
eplison = 0.1
theta_init = np.random.uniform(-eplison,eplison,(10285,))
#进行参数优化
result = op.fmin_ncg(f = costFunctionRegularzation,fprime=back_propagation_Regularzation,x0 = theta_init,args=(x,y),maxiter = 400)

result_train = calculate_prob(result , x)
print(result_train)


def calculateAccuracy(estimated_y,y):
    extend_y = estimatedY(estimated_y)
    sum = y.size
    count = 0
    for i in range(sum):
        if extend_y[i] == y[i]:
            count +=1
    print("accuracy is:",count/sum)

calculateAccuracy(result_train,y)

方法二:矩阵实现

未正则化的反向传播函数与梯度检查函数之间的参数区别
在这里插入图片描述
正则化的反向传播函数与梯度检查函数之间的参数区别
在这里插入图片描述

import numpy as np
from scipy import io
from scipy import optimize as op

#1.读取数据集并进行处理
dt1 = io.loadmat("E:\机器学习\吴恩达\data_sets\ex4data1.mat")
x = dt1["X"] #(5000, 400)
y = dt1["y"] #(5000, 1)
m = y.size #样本数


dt2 = io.loadmat("E:\机器学习\吴恩达\data_sets\ex4weights.mat")
#theta1表示输入层与隐藏层(第一层与第二层)之间的参数矩阵
theta1 = dt2["Theta1"] #(25, 401) size:10025
#theta2表示隐藏层与输出层(第二层与第三层)之间的参数矩阵
theta2 = dt2["Theta2"] #(10, 26) size:260




#-----------------------------------------------------------------------------------------
#2.定义输出层转换函数与代价函数
#定义sigmoid函数
def sigmoid(z):
    return 1/(1+np.exp(-z))


#根据输出的10个表示概率的数值找到最大的对应的数字
#(5000,10)->(5000,)
def estimatedY(y):
    length = y.shape[0]
    estimated_y = np.zeros(length)
    for i in range(length):
        estimated_y[i] = np.unravel_index(np.argmax(y[i,:]), y[i,:].shape)[0]+1
    return estimated_y



#根据每次给定的数生成对应的列向量
#(5000,)->(5000,10)
def convertY(y):
    length = y.size
    convert_y = np.zeros((length,10))
    for i in range(length):
        convert_y[i,y[i]-1] = 1
    return convert_y


#给矩阵多加一列全是1的元素
def addOne(x):
    length = x.shape[0]
    temp_x = np.ones((length, 1))
    x = np.concatenate((temp_x, x), 1)
    return x


#根据每一层的输入与该层的参数得出该层的输出
def layers(x,theta):
    x = addOne(x)
    result = sigmoid(x@theta.T)
    return result

#根据自己的参数计算出对应的输出值(5000,10)
def calculate_prob(theta,x):
    theta1_temp,theta2_temp = splitArray(theta) #(25,401) (10,26)
    h1 = layers(x,theta1_temp) #(5000, 25)
    h2 = layers(h1,theta2_temp) #(5000, 10)
    return h2


#计算出未正则化的代价函数
def costFunction(theta,x,y):
    estimated_y = calculate_prob(theta,x)
    #estimated_y = calculate_prob(theta,x,y) #(5000, 10)
    convert_y = convertY(y) #(5000, 10)
    temp = np.multiply(convert_y,np.log(estimated_y)) + np.multiply((1 - convert_y) , np.log(1 - estimated_y))
    return -1/m*np.sum(temp)


#计算出正则化的代价函数
def costFunctionRegularzation(theta,x,y,lamda=1):
    theta1_temp,theta2_temp = splitArray(theta)
    theta1_cut = theta1_temp[:,1:]
    theta2_cut = theta2_temp[:,1:]
    #theta_i0不需要进行惩罚
    result1_temp = np.sum(theta1_cut * theta1_cut)
    result2_temp = np.sum(theta2_cut * theta2_cut)
    return costFunction(theta,x,y) + lamda / (2*m) * (result1_temp+result2_temp)


#-----------------------------------------------------------------------------------------------
#3.定义前向传播函数与反向传播函数
#定义前向传播函数
def forward_propagation(theta,x):
    theta1_temp , theta2_temp = splitArray(theta)
    a1 = x
    a2 = layers(a1,theta1_temp)
    a3 = layers(a2,theta2_temp)
    return a1,a2,a3

#将一维向量拆成两个矩阵(25, 401)与(10, 26)
def splitArray(p):
    return np.reshape(p[:theta1.size],(theta1.shape[0], theta1.shape[1])),np.reshape(p[theta1.size:],(theta2.shape[0], theta2.shape[1]))

#把两个矩阵合并为一个一维向量
def rolling(m1,m2):
    m1_roll = m1.ravel()
    m2_roll = m2.ravel()
    m_all = np.concatenate((m1_roll, m2_roll))
    return m_all


#定义未正则化的反向传播函数
def back_propagation(theta,x,y):
    theta1_reshape,theta2_reshape = splitArray(theta)
    #(25, 401)与(10, 26)

    gradient = np.zeros(theta1.size+theta2.size)
    gradient1,gradient2 = splitArray(gradient)
    #(25, 401)与(10, 26)

    a1,a2,a3 = forward_propagation(theta,x)
    #a1 (5000, 400)
    #a2 (5000, 25)
    #a3 (5000, 10)
    convert_y = convertY(y) #(5000,10)

    # delta3表示代价函数对第三层输出层的偏导
    delta3 = a3 - convert_y #(5000,10)

    #delta2表示代价函数对第二层输出层的偏导
    delta2 = (delta3 @ theta2_reshape[:,1:]) * (a2*(1-a2)) #(5000, 25)
    #array类型中,*表示点对点相乘

    #---------------------------------------------------------------------------------
    #第二步:计算出每一层的参数的梯度值
    a1 = addOne(a1) #(5000, 401)
    a2 = addOne(a2) #(5000, 26)


    #计算出隐藏层与输出层之间参数theta_2的梯度矩阵(10,26)
    gradient2 = delta3.T @ a2 #(10, 26)

    # 计算出输入层与隐藏层之间参数theta_1的梯度矩阵(25,401)
    gradient1 = delta2.T @ a1

    gradient1 = 1/m * gradient1
    gradient2 = 1/m * gradient2
    return rolling(gradient1,gradient2)


#定义未正则化的梯度检查函数
def gradientCheck(theta,x,y):
    gradCheck = np.zeros(theta.shape)
    iterCheck = np.zeros(theta.shape)
    eplison = 1e-4
    for i in range(len(theta)):
        iterCheck[i] = eplison
        cost2 = costFunction(theta + iterCheck , x,y)
        cost1 = costFunction(theta - iterCheck , x,y)
        gradCheck[i] = (cost2 - cost1)/(2*eplison)
        iterCheck[i] = 0
    return gradCheck


"""theta_all = rolling(theta1,theta2)
#梯度检查
predict_gradient = back_propagation(theta_all, x, y) #(10285,)
print(predict_gradient)
num_gradient = gradientCheck(theta_all,x,y) #(10285,)
np.savetxt("abc.txt",num_gradient,fmt="%d",delimiter=" ")
print(num_gradient)
#向量差的二范数与向量和的二范数的比值
diff = np.linalg.norm(num_gradient-predict_gradient)/np.linalg.norm(num_gradient+predict_gradient)
print(diff) #数量级为e-09,这里结果为2.1515337653638197e-09"""



#定义正则化的反向传播函数
def back_propagation_Regularzation(theta,x,y,lamda = 1):
    D_temp = back_propagation(theta,x,y)
    D1_temp,D2_temp = splitArray(D_temp)
    theta1_temp,theta2_temp = splitArray(theta)
    theta1_ = theta1_temp * 1.0
    theta2_ = theta2_temp *1.0
    theta1_[:,0] = 0
    theta2_[:,0] = 0
    D1_temp += lamda/m * theta1_
    D2_temp += lamda / m * theta2_
    return rolling(D1_temp,D2_temp)


#定义正则化的梯度检查函数
def gradientCheckRegularzation(theta,x,y,lamda=1):
    gradCheck = np.zeros(theta.shape)
    iterCheck = np.zeros(theta.shape)
    eplison = 1e-4
    for i in range(len(theta)):
        iterCheck[i] = eplison
        cost2 = costFunctionRegularzation(theta + iterCheck , x,y,lamda)
        cost1 = costFunctionRegularzation(theta - iterCheck , x,y,lamda)
        gradCheck[i] = (cost2 - cost1)/(2*eplison)
        iterCheck[i] = 0
    return gradCheck


"""
theta_all = rolling(theta1,theta2)

#正则化后的梯度检查
predict_gradient = back_propagation_Regularzation(theta_all, x, y) #(10285,)
print(predict_gradient)
num_gradient = gradientCheckRegularzation(theta_all,x,y) #(10285,)
print(num_gradient)
#向量差的二范数与向量和的二范数的比值
diff = np.linalg.norm(num_gradient-predict_gradient)/np.linalg.norm(num_gradient+predict_gradient)
print(diff) #还是e-9"""


#————————————————————————————————————————————————————————----------
#还没跑的出来
def randIniWeights(size, epsilon):
    return np.random.uniform(-epsilon, epsilon, size)

theta_init = randIniWeights((10285,), 0.12)
#进行参数优化
result = op.fmin_ncg(f = costFunctionRegularzation,fprime=back_propagation_Regularzation,x0 = theta_init,args=(x,y),maxiter = 400)

result_train = calculate_prob(result , x)
print(result_train)


def calculateAccuracy(estimated_y,y):
    extend_y = estimatedY(estimated_y)
    sum = y.size
    count = 0
    for i in range(sum):
        if extend_y[i] == y[i]:
            count +=1
    print("accuracy is:",count/sum)

calculateAccuracy(result_train,y)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值