import os
os.chdir('E:/ML/machine-learning-ex4')
print('现在工作目录是 '+str(os.getcwd()))
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
data=sio.loadmat('ex4data1.mat')
X=data['X'];y=data['y']
# 可视化X中部分数据
fig,axs=plt.subplots(3,3)
axs=axs[:,:].flatten()
import random
for i in range(len(axs)):
j=random.randint(0,len(X)-1)
num=int(np.sqrt(len(X[0,:])))
axs[i].imshow(X[j,:].reshape(num,-1).T)
axs[i].set_ylabel(str(y[j]))
plt.show()
#读取给定theta;设定一些基本变量。
theta_dict=sio.loadmat('ex4weights.mat')
Theta1=theta_dict['Theta1'];Theta2=theta_dict['Theta2']
labels=np.array([1,2,3,4,5,6,7,8,9,10]).reshape(10,1)
num_units=[400,25,10]
#定义方法
def sigmoid(z):
return 1/(1+np.exp(-z))
def unrolling(input_list):#定义方法,展开成一个向量
if input_list==[]:
return None
output_vec=input_list[0].reshape(-1,1)
for input_list_item in input_list[1:]:
output_vec=np.concatenate((output_vec,input_list_item.reshape(-1,1)),axis=0)
return output_vec
def rolling(input_vec,num_units):#将长向量按形状转化
input_vec=np.reshape(input_vec,(-1,1))
num_layers=len(num_units)
output_list=[]
num_units_X=0
for layer_index in range(num_layers-1):#num_layers层会有num_layers-1个theta
num_units_u=int(num_units[layer_index])
num_units_v=int(num_units[layer_index+1])
output_list_item=input_vec[num_units_X:(num_units_X+num_units_v*(num_units_u+1)),:].reshape(num_units_v,num_units_u+1)
output_list.append(output_list_item)
num_units_X=num_units_v*(num_units_u+1)#保留本次的单元数量结果,下次切片作为起点。
return output_list
def y_to_Y(input_y,labels):#将y:m*1,转化成Y:K*m
input_y=input_y.reshape(1,-1)
labels=labels.reshape(-1,1)
output_Y=np.int0(input_y==labels)
return output_Y
def Y_to_y(input_Y,labels):#将Y:K*m转化成y:m*1
labels=labels.reshape(-1,1)
output_y=labels[np.argmax(input_Y,axis=0)]
return output_y
#特征标准化
def normal(input_X):
mean_X=np.mean(input_X);mean_X.iloc[0]=0
range_X=np.max(input_X)-np.min(input_X);range_X.iloc[0]=1 #常数项保持不变,均值定为0,范围定为1;
normal_X=(input_X-mean_X)/range_X
return normal_X
def randTheta(num_units):#随机选取初始theta值,返回长向量
num_layers=len(num_units)
theta_list=[]
for i in range(num_layers-1):
S_in=num_units[i]
S_out=num_units[i+1]
#初始系数q=sqrt(6)/sqrt(S_in+S_out),返回[-q,q]
q=np.sqrt(6)/np.sqrt(S_in+S_out)
theta_i=np.random.random((S_out,1+S_in))*2*q-q
theta_list.append(theta_i)
return unrolling(theta_list)
#定义前向传播和反向传播算法
def forward_Propagation(input_theta_list,input_X):#前向传播,计算各单元值;
#a(j+1) =g(Θ(j) *a(j) );Forward propagation, 由a(j)----> a(j+1);
#add a0(j+1)=1;加上常数项;
num_X=len(input_X);
num_layers=len(input_theta_list)+1 #p是网络层数
# 前向传播计算Ap,也就是预测值h,计算h是为了进行代价计算和梯度计算
output_A_list=[]
output_A_0=np.concatenate((np.ones((1,num_X)),input_X.T),axis=0)
#A1=np.insert(X.T,0,np.ones((1,m),),axis=0)
output_A_list.append(output_A_0)
for layer_index in range(num_layers-1):#num_layers层网络,第一层是输入层,只计算后面几层。
output_A_item=sigmoid(np.dot(input_theta_list[layer_index],output_A_list[layer_index]))
output_A_item=np.concatenate((np.ones((1,num_X)),output_A_item),axis=0)#增加常数项
output_A_list.append(output_A_item)
return output_A_list
def back_Propagation(input_theta_list,input_X,input_Y,input_A_list,reg_L=0):#反向传播,计算误差和梯度。
num_X=len(input_X);
num_layers=len(input_theta_list)+1 #p是网络层数
out_delta_list=[]
input_A_p=input_A_list[-1][1:,:]#最后一层A去掉常数项
out_delta_p=input_A_p-input_Y#最后一层误差
out_delta_list.insert(0,out_delta_p)
out_grad_list=[]
for layer_index in range(num_layers-1):#p层网络,最后一层误差已知,往前计算各层误差,从-2层开始,到第1层。梯度维度与theta一致。
#Delta(j-1)= (Θ(j-1))T *Delta(j) .* A(j-1).*( 1-A(j-1));
out_delta_item_1=np.dot(input_theta_list[-layer_index-1].T,out_delta_list[-layer_index-1])#theta[-1]就是Theta_(p-1),delta[-1]就是Delta_(p)
#np.multiply只能用于两个数组相乘,多写会出错
out_delta_item_2=np.multiply(input_A_list[-layer_index-2],(1-input_A_list[-layer_index-2]))
out_delta_item=np.multiply(out_delta_item_1,out_delta_item_2)#A[-2]就是A_(p-1)
out_delta_item=out_delta_item[1:,:] #去掉常数项
out_delta_list.insert(0,out_delta_item)
#Grad(j)=1/m*Delta(j+1)*( a(j))T+λ/m* reg_Θ(j);set reg_Θ(j)= Θ(j); reg_Θ(j)(:,1)= 0;
Grad_reg=np.copy(input_theta_list[-layer_index-1])# 此代码改变了theta值,下次循环调用会给出不同结果,所以需要避免在原来的theta变量上做出改变。
Grad_reg[:,0]=0
out_grad_item=1/num_X*np.dot(out_delta_list[-layer_index-1],input_A_list[-layer_index-2].T)+reg_L/num_X*Grad_reg
out_grad_list.insert(0,out_grad_item)
return out_grad_list
def nnCost(theta_vector,input_X,input_Y,num_units,reg_L=0):#L 为正则系数
#用scipy.opt.fmin_tnc高级优化方法时,要求输入的theta是一个长向量
#theta_list是不同层的theta列表;X的维度要求是m*(n+1),Y的维度要求是k*m;
#theta_vector是长向量,需要转换成theta_list,定义一个转换方法def rolling;
#num_units是各层单元数列表
num_X=len(input_X);
num_layers=len(num_units) #p是网络层数
#将长向量theta转换成tehta列表
theta_list=rolling(theta_vector,num_units)
#print('theta0.shape='+str(theta[0].shape))
# 前向传播计算Ap,也就是预测值h,计算h是为了进行代价计算和梯度计算
out_A_list=forward_Propagation(theta_list,input_X)
out_A_p=out_A_list[-1][1:,:]#最后一层去掉常数项
#代价计算
'''
J(Θ)=-1/m*sum((Y.*log(h)+(1-Y).*log(1- h))(:))
'''
cost_J=np.multiply(input_Y,np.log(out_A_p))+np.multiply((1-input_Y),np.log(1-out_A_p))
cost_J=-1/num_X*sum(cost_J.flatten())
#代价正则项
cost_J_reg=0
for theta_j in theta_list:
theta_j_reg=theta_j[:,1:].flatten()#去掉常数项
cost_J_reg=cost_J_reg+np.sum(theta_j_reg*theta_j_reg)#代价计算的正则项
cost_J_reg=reg_L/2/num_X*cost_J_reg
cost_J=cost_J+cost_J_reg
#反向传播计算误差Delta,用来计算梯度
out_grad_list=[]
out_grad_list=back_Propagation(theta_list,input_X,input_Y,out_A_list,1)
return cost_J,unrolling(out_grad_list)
#用已知代价的数据集和theta测试前向传播函数
Y=y_to_Y(y,labels)
Theta=[Theta1,Theta2]
Theta=unrolling(Theta)
print('计算得到的J是:'+str(nnCost(Theta,X,Y,num_units)[0] ) )
print('\n(this value should be about 0.287629)\n')
print('计算得到的带正则项的J是:'+str(nnCost(Theta,X,Y,num_units,1)[0] ) )
print('\n(this value should be about 0.383770)\n')
#用梯度检查函数测试反向传播函数
def gradientCheck(theta_vector,X,Y,num_units,L=0):
theta_vector=np.reshape(theta_vector,(-1,1))
grad_1=nnCost(theta_vector,X,Y,num_units,L)[1]
E=0.0001
grad_2=np.zeros(grad_1.shape)
for i in range(10):#为了减少计算时间,这里仅选取前10个值,可以大概做出判断。
theta_vector1=np.copy(theta_vector)
theta_vector2=np.copy(theta_vector)
theta_vector1[i]=theta_vector1[i,0]+E
theta_vector2[i]=theta_vector2[i,0]-E
J1=nnCost(theta_vector1,X,Y,num_units,L)[0]
J2=nnCost(theta_vector2,X,Y,num_units,L)[0]
grad_2[i]=(J1-J2)/2/E
#diff = norm(numgrad-grad)/norm(numgrad+grad);
diff=np.linalg.norm((grad_1-grad_2)[:10])/np.linalg.norm((grad_1+grad_2)[:10])#为了减少计算时间,这里仅选取前10个值,可以大概做出判断。
return diff
#用梯度验证函数测试反向传播函数
print('梯度检查运行完毕,数值梯度与反向传播计算梯度之间的差是:'+str(gradientCheck(Theta,X,Y,num_units,1)))
print('\n(EPSILON = 0.0001,this value should be less than 1e-9)\n')
#为了减少计算时间,这里仅选取前10个值,可以大概做出判断。
#随机选取初始theta
init_theta=randTheta(num_units)
#梯度下降1:使用高级梯度下降方法
import scipy.optimize as opt
#result=opt.fmin_tnc(func=computeCost,x0=theta,args=(X,y))
result=opt.fmin_tnc(func=nnCost,x0=init_theta,args=(X,Y,num_units,1) )
print('计算得到的带正则项的J是:'+str(nnCost(result[0],X,Y,num_units,1)[0] ) )
def nnScore(best_theta,X,Y,labels):#计算算法预测正确分数
theta_list=rolling(best_theta,num_units)
A=forward_Propagation(theta_list,X)
H=A[-1][1:,:]#最后一层去掉常数项
h=Y_to_y(H,labels)
y=Y_to_y(Y,labels)
score=np.sum(np.int0(h==y))/np.size(h)
return score
print('用高级梯度算法的正确率是:'+str(nnScore(result[0],X,Y,labels) ))
print('给定theta值的正确率是:'+str(nnScore(Theta,X,Y,labels) ))
#梯度算法2:原始梯度算法
def gradientDescent(theta,X,Y,num_units,alpha,iterations,L=0):#L为正则参数
#原始梯度下降函数,返回最佳theta值和每次循环的代价值列表J,列表J可用来判断函数执行情况是否理想
J=np.zeros((iterations,1))
for i in range(iterations):
cost,grad=nnCost(theta,X,Y,num_units,L)
theta=theta-alpha*grad
J[i]=cost
return theta,J
alpha=1
iterations=1000
best_theta,J_list=gradientDescent(init_theta,X,Y,num_units,alpha,iterations,L=0)
plt.plot(J_list)
plt.show()
print('用原始梯度算法的正确率是:'+str(nnScore(best_theta,X,Y,labels) ))
print('给定theta值的正确率是:'+str(nnScore(Theta,X,Y,labels) ))
吴恩达机器学习编程作业
链接: https://pan.baidu.com/s/1cpMM0xWZ1Dxs8HhVAmeUkA 提取码: a36e