参考链接:吴恩达|机器学习作业4.0神经网络反向传播(BP算法)_学吧学吧终成学霸的博客-CSDN博客
进行了梯度检查与代价函数的检查,但由于电脑原因,最后的准确率部分一直没跑完,有时间再跑。
方法一:按照吴恩达上课视频中的ppt挨个实现
未正则化的反向传播函数与梯度检查函数之间的参数区别
正则化的反向传播函数与梯度检查函数之间的参数区别
#定义正则化的反向传播函数
def back_propagation_Regularzation(theta,x,y,lamda = 1):
D_temp = back_propagation(theta,x,y)
D1_temp,D2_temp = splitArray(D_temp)
theta1_temp,theta2_temp = splitArray(theta)
theta1_ = theta1_temp * 1.0
theta2_ = theta2_temp *1.0
theta1_[:,0] = 0
theta2_[:,0] = 0
D1_temp += lamda/m * theta1_
D2_temp += lamda / m * theta2_
return rolling(D1_temp,D2_temp)
这里有一个坑:根据python数据类型的特性,array类型属于不可变类型,在下面这个函数中修改了theta1_temp的具体数值后,theta会发生变化,从而导致后面进行梯度检测时,theta发生了变化,导致结果有问题。
#定义正则化的反向传播函数
def back_propagation_Regularzation(theta,x,y,lamda = 1):
D_temp = back_propagation(theta,x,y)
D1_temp,D2_temp = splitArray(D_temp)
theta1_temp,theta2_temp = splitArray(theta)
theta1_temp[:,0] = 0
theta2_temp[:,0] = 0
D1_temp += lamda/m * theta1_temp
D2_temp += lamda / m * theta2_temp
return rolling(D1_temp,D2_temp)
import numpy as np
from scipy import io
from scipy import optimize as op
#1.读取数据集并进行处理
dt1 = io.loadmat("E:\机器学习\吴恩达\data_sets\ex4data1.mat")
x = dt1["X"] #(5000, 400)
y = dt1["y"] #(5000, 1)
m = y.size #样本数
dt2 = io.loadmat("E:\机器学习\吴恩达\data_sets\ex4weights.mat")
#theta1表示输入层与隐藏层(第一层与第二层)之间的参数矩阵
theta1 = dt2["Theta1"] #(25, 401) size:10025
#theta2表示隐藏层与输出层(第二层与第三层)之间的参数矩阵
theta2 = dt2["Theta2"] #(10, 26) size:260
#-----------------------------------------------------------------------------------------
#2.定义输出层转换函数与代价函数
#定义sigmoid函数
def sigmoid(z):
return 1/(1+np.exp(-z))
#根据输出的10个表示概率的数值找到最大的对应的数字
#(5000,10)->(5000,)
def estimatedY(y):
length = y.shape[0]
estimated_y = np.zeros(length)
for i in range(length):
estimated_y[i] = np.unravel_index(np.argmax(y[i,:]), y[i,:].shape)[0]+1
return estimated_y
#根据每次给定的数生成对应的列向量
#(5000,)->(5000,10)
def convertY(y):
length = y.size
convert_y = np.zeros((length,10))
for i in range(length):
convert_y[i,y[i]-1] = 1
return convert_y
#根据每一层的输入与该层的参数得出该层的输出
def layers(x,theta):
length = x.shape[0]
temp_x = np.ones((length,1))
x = np.concatenate((temp_x,x),1)
result = sigmoid(x@theta.T)
return result
#根据自己的参数计算出对应的输出值(5000,10)
def calculate_prob(theta,x):
theta1_temp,theta2_temp = splitArray(theta) #(25,401) (10,26)
h1 = layers(x,theta1_temp) #(5000, 25)
h2 = layers(h1,theta2_temp) #(5000, 10)
return h2
#计算出未正则化的代价函数
def costFunction(theta,x,y):
estimated_y = calculate_prob(theta,x)
#estimated_y = calculate_prob(theta,x,y) #(5000, 10)
convert_y = convertY(y) #(5000, 10)
temp = np.multiply(convert_y,np.log(estimated_y)) + np.multiply((1 - convert_y) , np.log(1 - estimated_y))
return -1/m*np.sum(temp)
#计算出正则化的代价函数
def costFunctionRegularzation(theta,x,y,lamda=1):
theta1_temp,theta2_temp = splitArray(theta)
theta1_cut = theta1_temp[:,1:]
theta2_cut = theta2_temp[:,1:]
#theta_i0不需要进行惩罚
result1_temp = np.sum(theta1_cut * theta1_cut)
result2_temp = np.sum(theta2_cut * theta2_cut)
return costFunction(theta,x,y) + lamda / (2*m) * (result1_temp+result2_temp)
#-----------------------------------------------------------------------------------------------
#3.定义反向传播函数
#每一层只加一个样本
def addOne(x):
temp = np.ones(1)
x = np.concatenate((temp, x))
return x
#定义计算每一行(每一个样本)在某一层输出的函数
def calcualte_layer(x,theta):
x = addOne(x)
result0 = x @ theta.T #(401,)*(25,401).T=(25,)
result = sigmoid(result0) #(25,)
return result
#将实际标签值转换为一个1*10的列向量
def calculate_y(m):
cal_y =np.zeros(10)
for i in range(10):
if m == i+1:
cal_y[i] = 1
return cal_y
#将一维向量拆成两个矩阵(25, 401)与(10, 26)
def splitArray(p):
return np.reshape(p[:theta1.size],(theta1.shape[0], theta1.shape[1])),np.reshape(p[theta1.size:],(theta2.shape[0], theta2.shape[1]))
#把两个矩阵合并为一个一维向量
def rolling(m1,m2):
m1_roll = m1.ravel()
m2_roll = m2.ravel()
m_all = np.concatenate((m1_roll, m2_roll))
return m_all
#定义未正则化的反向传播函数
def back_propagation(theta,x,y):
theta1_reshape,theta2_reshape = splitArray(theta)
#(25, 401)与(10, 26)
gradient = np.zeros(theta1.size+theta2.size)
gradient1,gradient2 = splitArray(gradient)
#(25, 401)与(10, 26)
for i in range(m):
# 第一步;计算出每一层的delta,其中第三层的delta直接根据a3-y可得出
# a1表示第一层的输出,即输入的x
a1 = x[i,:] #(400,)
# a2表示第二层经过sigmoid函数后的输出
a2 = calcualte_layer(a1,theta1_reshape) #(25,)
# a3表示第三层经过sigmoid函数后的输出
a3 = calcualte_layer(a2,theta2_reshape) #(10,)
convert_y = calculate_y(y[i])
# delta3表示代价函数对第三层输出层的偏导
delta3 = a3 - convert_y #(10,)
#delta2表示代价函数对第二层输出层的偏导
delta2 = (delta3 @ theta2_reshape[:,1:]) * (a2*(1-a2)) #(25,)
#array类型中,*表示点对点相乘
#---------------------------------------------------------------------------------
#第二步:计算出每一层的参数的梯度值
a1 = addOne(a1)
a2 = addOne(a2)
#计算出隐藏层与输出层之间参数theta_2的梯度矩阵(10,26)
delta3 = np.mat(delta3).T
delta3 = np.array(delta3)
gradient2 = gradient2 + delta3 * a2
# 计算出输入层与隐藏层之间参数theta_1的梯度矩阵(25,401)
delta2 = np.mat(delta2).T
delta2 = np.array(delta2)
gradient1 = gradient1 + delta2 * a1
gradient1 = 1/m * gradient1
gradient2 = 1/m * gradient2
return rolling(gradient1,gradient2)
#定义未正则化的梯度检查函数
def gradientCheck(theta,x,y):
gradCheck = np.zeros(theta.shape)
iterCheck = np.zeros(theta.shape)
eplison = 1e-4
for i in range(len(theta)):
iterCheck[i] = eplison
cost2 = costFunction(theta + iterCheck , x,y)
cost1 = costFunction(theta - iterCheck , x,y)
gradCheck[i] = (cost2 - cost1)/(2*eplison)
iterCheck[i] = 0
return gradCheck
#梯度检查
# predict_gradient = back_propagation(theta_all, x, y) #(10285,)
# print(predict_gradient)
# num_gradient = gradientCheck(theta_all,x,y) #(10285,)
# np.savetxt("abc.txt",num_gradient,fmt="%d",delimiter=" ")
# print(num_gradient)
# #向量差的二范数与向量和的二范数的比值
# diff = np.linalg.norm(num_gradient-predict_gradient)/np.linalg.norm(num_gradient+predict_gradient)
# print(diff) #数量级为e-09,这里结果为2.1448374139731396e-09
#定义正则化的反向传播函数
def back_propagation_Regularzation(theta,x,y,lamda = 1):
D_temp = back_propagation(theta,x,y)
D1_temp,D2_temp = splitArray(D_temp)
theta1_temp,theta2_temp = splitArray(theta)
theta1_ = theta1_temp * 1.0
theta2_ = theta2_temp *1.0
theta1_[:,0] = 0
theta2_[:,0] = 0
D1_temp += lamda/m * theta1_
D2_temp += lamda / m * theta2_
return rolling(D1_temp,D2_temp)
#定义正则化的梯度检查函数
def gradientCheckRegularzation(theta,x,y,lamda=1):
gradCheck = np.zeros(theta.shape)
iterCheck = np.zeros(theta.shape)
eplison = 1e-4
for i in range(len(theta)):
iterCheck[i] = eplison
cost2 = costFunctionRegularzation(theta + iterCheck , x,y,lamda)
cost1 = costFunctionRegularzation(theta - iterCheck , x,y,lamda)
gradCheck[i] = (cost2 - cost1)/(2*eplison)
iterCheck[i] = 0
return gradCheck
theta_all = rolling(theta1,theta2)
#
# #正则化后的梯度检查
# predict_gradient = back_propagation_Regularzation(theta_all, x, y) #(10285,)
# print(predict_gradient)
# num_gradient = gradientCheckRegularzation(theta_all,x,y) #(10285,)
# print(num_gradient)
# #向量差的二范数与向量和的二范数的比值
# diff = np.linalg.norm(num_gradient-predict_gradient)/np.linalg.norm(num_gradient+predict_gradient)
# print(diff) #还是e-9
#----------------------------------------------------------------------------------
#下面这些还没来得及跑完
eplison = 0.1
theta_init = np.random.uniform(-eplison,eplison,(10285,))
#进行参数优化
result = op.fmin_ncg(f = costFunctionRegularzation,fprime=back_propagation_Regularzation,x0 = theta_init,args=(x,y),maxiter = 400)
result_train = calculate_prob(result , x)
print(result_train)
def calculateAccuracy(estimated_y,y):
extend_y = estimatedY(estimated_y)
sum = y.size
count = 0
for i in range(sum):
if extend_y[i] == y[i]:
count +=1
print("accuracy is:",count/sum)
calculateAccuracy(result_train,y)
方法二:矩阵实现
未正则化的反向传播函数与梯度检查函数之间的参数区别
正则化的反向传播函数与梯度检查函数之间的参数区别
import numpy as np
from scipy import io
from scipy import optimize as op
#1.读取数据集并进行处理
dt1 = io.loadmat("E:\机器学习\吴恩达\data_sets\ex4data1.mat")
x = dt1["X"] #(5000, 400)
y = dt1["y"] #(5000, 1)
m = y.size #样本数
dt2 = io.loadmat("E:\机器学习\吴恩达\data_sets\ex4weights.mat")
#theta1表示输入层与隐藏层(第一层与第二层)之间的参数矩阵
theta1 = dt2["Theta1"] #(25, 401) size:10025
#theta2表示隐藏层与输出层(第二层与第三层)之间的参数矩阵
theta2 = dt2["Theta2"] #(10, 26) size:260
#-----------------------------------------------------------------------------------------
#2.定义输出层转换函数与代价函数
#定义sigmoid函数
def sigmoid(z):
return 1/(1+np.exp(-z))
#根据输出的10个表示概率的数值找到最大的对应的数字
#(5000,10)->(5000,)
def estimatedY(y):
length = y.shape[0]
estimated_y = np.zeros(length)
for i in range(length):
estimated_y[i] = np.unravel_index(np.argmax(y[i,:]), y[i,:].shape)[0]+1
return estimated_y
#根据每次给定的数生成对应的列向量
#(5000,)->(5000,10)
def convertY(y):
length = y.size
convert_y = np.zeros((length,10))
for i in range(length):
convert_y[i,y[i]-1] = 1
return convert_y
#给矩阵多加一列全是1的元素
def addOne(x):
length = x.shape[0]
temp_x = np.ones((length, 1))
x = np.concatenate((temp_x, x), 1)
return x
#根据每一层的输入与该层的参数得出该层的输出
def layers(x,theta):
x = addOne(x)
result = sigmoid(x@theta.T)
return result
#根据自己的参数计算出对应的输出值(5000,10)
def calculate_prob(theta,x):
theta1_temp,theta2_temp = splitArray(theta) #(25,401) (10,26)
h1 = layers(x,theta1_temp) #(5000, 25)
h2 = layers(h1,theta2_temp) #(5000, 10)
return h2
#计算出未正则化的代价函数
def costFunction(theta,x,y):
estimated_y = calculate_prob(theta,x)
#estimated_y = calculate_prob(theta,x,y) #(5000, 10)
convert_y = convertY(y) #(5000, 10)
temp = np.multiply(convert_y,np.log(estimated_y)) + np.multiply((1 - convert_y) , np.log(1 - estimated_y))
return -1/m*np.sum(temp)
#计算出正则化的代价函数
def costFunctionRegularzation(theta,x,y,lamda=1):
theta1_temp,theta2_temp = splitArray(theta)
theta1_cut = theta1_temp[:,1:]
theta2_cut = theta2_temp[:,1:]
#theta_i0不需要进行惩罚
result1_temp = np.sum(theta1_cut * theta1_cut)
result2_temp = np.sum(theta2_cut * theta2_cut)
return costFunction(theta,x,y) + lamda / (2*m) * (result1_temp+result2_temp)
#-----------------------------------------------------------------------------------------------
#3.定义前向传播函数与反向传播函数
#定义前向传播函数
def forward_propagation(theta,x):
theta1_temp , theta2_temp = splitArray(theta)
a1 = x
a2 = layers(a1,theta1_temp)
a3 = layers(a2,theta2_temp)
return a1,a2,a3
#将一维向量拆成两个矩阵(25, 401)与(10, 26)
def splitArray(p):
return np.reshape(p[:theta1.size],(theta1.shape[0], theta1.shape[1])),np.reshape(p[theta1.size:],(theta2.shape[0], theta2.shape[1]))
#把两个矩阵合并为一个一维向量
def rolling(m1,m2):
m1_roll = m1.ravel()
m2_roll = m2.ravel()
m_all = np.concatenate((m1_roll, m2_roll))
return m_all
#定义未正则化的反向传播函数
def back_propagation(theta,x,y):
theta1_reshape,theta2_reshape = splitArray(theta)
#(25, 401)与(10, 26)
gradient = np.zeros(theta1.size+theta2.size)
gradient1,gradient2 = splitArray(gradient)
#(25, 401)与(10, 26)
a1,a2,a3 = forward_propagation(theta,x)
#a1 (5000, 400)
#a2 (5000, 25)
#a3 (5000, 10)
convert_y = convertY(y) #(5000,10)
# delta3表示代价函数对第三层输出层的偏导
delta3 = a3 - convert_y #(5000,10)
#delta2表示代价函数对第二层输出层的偏导
delta2 = (delta3 @ theta2_reshape[:,1:]) * (a2*(1-a2)) #(5000, 25)
#array类型中,*表示点对点相乘
#---------------------------------------------------------------------------------
#第二步:计算出每一层的参数的梯度值
a1 = addOne(a1) #(5000, 401)
a2 = addOne(a2) #(5000, 26)
#计算出隐藏层与输出层之间参数theta_2的梯度矩阵(10,26)
gradient2 = delta3.T @ a2 #(10, 26)
# 计算出输入层与隐藏层之间参数theta_1的梯度矩阵(25,401)
gradient1 = delta2.T @ a1
gradient1 = 1/m * gradient1
gradient2 = 1/m * gradient2
return rolling(gradient1,gradient2)
#定义未正则化的梯度检查函数
def gradientCheck(theta,x,y):
gradCheck = np.zeros(theta.shape)
iterCheck = np.zeros(theta.shape)
eplison = 1e-4
for i in range(len(theta)):
iterCheck[i] = eplison
cost2 = costFunction(theta + iterCheck , x,y)
cost1 = costFunction(theta - iterCheck , x,y)
gradCheck[i] = (cost2 - cost1)/(2*eplison)
iterCheck[i] = 0
return gradCheck
"""theta_all = rolling(theta1,theta2)
#梯度检查
predict_gradient = back_propagation(theta_all, x, y) #(10285,)
print(predict_gradient)
num_gradient = gradientCheck(theta_all,x,y) #(10285,)
np.savetxt("abc.txt",num_gradient,fmt="%d",delimiter=" ")
print(num_gradient)
#向量差的二范数与向量和的二范数的比值
diff = np.linalg.norm(num_gradient-predict_gradient)/np.linalg.norm(num_gradient+predict_gradient)
print(diff) #数量级为e-09,这里结果为2.1515337653638197e-09"""
#定义正则化的反向传播函数
def back_propagation_Regularzation(theta,x,y,lamda = 1):
D_temp = back_propagation(theta,x,y)
D1_temp,D2_temp = splitArray(D_temp)
theta1_temp,theta2_temp = splitArray(theta)
theta1_ = theta1_temp * 1.0
theta2_ = theta2_temp *1.0
theta1_[:,0] = 0
theta2_[:,0] = 0
D1_temp += lamda/m * theta1_
D2_temp += lamda / m * theta2_
return rolling(D1_temp,D2_temp)
#定义正则化的梯度检查函数
def gradientCheckRegularzation(theta,x,y,lamda=1):
gradCheck = np.zeros(theta.shape)
iterCheck = np.zeros(theta.shape)
eplison = 1e-4
for i in range(len(theta)):
iterCheck[i] = eplison
cost2 = costFunctionRegularzation(theta + iterCheck , x,y,lamda)
cost1 = costFunctionRegularzation(theta - iterCheck , x,y,lamda)
gradCheck[i] = (cost2 - cost1)/(2*eplison)
iterCheck[i] = 0
return gradCheck
"""
theta_all = rolling(theta1,theta2)
#正则化后的梯度检查
predict_gradient = back_propagation_Regularzation(theta_all, x, y) #(10285,)
print(predict_gradient)
num_gradient = gradientCheckRegularzation(theta_all,x,y) #(10285,)
print(num_gradient)
#向量差的二范数与向量和的二范数的比值
diff = np.linalg.norm(num_gradient-predict_gradient)/np.linalg.norm(num_gradient+predict_gradient)
print(diff) #还是e-9"""
#————————————————————————————————————————————————————————----------
#还没跑的出来
def randIniWeights(size, epsilon):
return np.random.uniform(-epsilon, epsilon, size)
theta_init = randIniWeights((10285,), 0.12)
#进行参数优化
result = op.fmin_ncg(f = costFunctionRegularzation,fprime=back_propagation_Regularzation,x0 = theta_init,args=(x,y),maxiter = 400)
result_train = calculate_prob(result , x)
print(result_train)
def calculateAccuracy(estimated_y,y):
extend_y = estimatedY(estimated_y)
sum = y.size
count = 0
for i in range(sum):
if extend_y[i] == y[i]:
count +=1
print("accuracy is:",count/sum)
calculateAccuracy(result_train,y)