import os
os.chdir('E:/ML/machine-learning-ex2')
print('现在工作目录是 '+str(os.getcwd()))
#可视化数据ex2data1.txt
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
ex2data1=pd.read_csv('ex2data1.txt',names=['exam1','exam2','admitted'])
ex2data1.insert(0,'intercept',np.ones(ex2data1.shape[0]))#增加常数项
X=ex2data1.iloc[:,0:3];y=ex2data1.iloc[:,3:]#若写成y=ex2data1.iloc[:,3],会出现维数问题
#按y值划分数据集
pos=(ex2data1.iloc[:,-1]==1);neg=(ex2data1.iloc[:,-1]==0)
X_pos=np.mat(X[pos]);X_neg=np.mat(X[neg])
fig,ax=plt.subplots()
ax.scatter(X_pos[:,1].flat,X_pos[:,2].flat,marker='x',label='Admitted')
ax.scatter(X_neg[:,1].flat,X_neg[:,2].flat,marker='o',label='Not Admitted')
ax.legend(loc='upper right')
ax.set_xlabel('exam1')
ax.set_ylabel('exam2')
plt.show()
#定义逻辑函数和代价函数;
def sigmoid(z):
g=1/(1+np.exp(-z))
return g
def computeCost(theta,X,y,L=0):
#代价函数同时返回代价和梯度;
m=len(y)
X=np.mat(X);y=np.mat(y).reshape(X.shape[0],1);theta=np.mat(theta).reshape(X.shape[1],1)
#正则项系数
vec1=np.ones((X.shape[1],1));vec1[0,0]=0
h=sigmoid(X*theta)
cost=-1/m*(y.T*np.log(h)+(1-y).T*np.log(1-h))+L/2/m*np.multiply(theta,vec1).T*np.multiply(theta,vec1)
grad=1/m*X.T*(h-y)+L/m*np.multiply(theta,vec1)
return cost,grad
#定义梯度下降函数------最原始版本
def gradientDescent(theta,X,y,alpha,iterations):
#原始梯度下降函数,返回最佳theta值和每次循环的代价值列表J,列表J可用来判断函数执行情况是否理想
#会报错:RuntimeWarning: divide by zero encountered in log
J=np.zeros((iterations,1))
for i in range(iterations):
cost,grad=computeCost(theta,X,y)
theta=theta-alpha*grad
J[i]=cost
return theta,J
#执行梯度下降
theta=np.zeros((X.shape[1],1)) #设置初始theta
theta,J=gradientDescent(theta,X,y,0.1,5000)
#解决方法一:对数据X进行均值标准化,可以减少计算量;
#解决方法二:运用高级梯度下降算法:scipy.optimize.fmin_tnc
#解决方法一:对数据X进行均值标准化,可以减少计算量;
#特征标准化
def normal(X):
mean_X=np.mean(X);mean_X.iloc[0]=0
range_X=np.max(X)-np.min(X);range_X.iloc[0]=1 #常数项保持不变,均值定为0,范围定为1;
normal_X=(X-mean_X)/range_X
return normal_X
normal_X=normal(X)
#定义梯度下降函数------最原始版本
def gradientDescent(theta,X,y,alpha,iterations,L=0):#L为正则参数
#原始梯度下降函数,返回最佳theta值和每次循环的代价值列表J,列表J可用来判断函数执行情况是否理想
J=np.zeros((iterations,1))
for i in range(iterations):
cost,grad=computeCost(theta,X,y,L)
theta=theta-alpha*grad
J[i]=cost
return theta,J
#执行梯度下降
theta=np.zeros((X.shape[1],1)) #设置初始theta
theta,J=gradientDescent(theta,normal_X,y,0.1,5000) #此处的最佳theta值匹配的是标准后的数据normal_X;
print('best of theta is:'+str(theta))
fig,axs=plt.subplots(2,1)
axs[0].plot(J)
axs[0].set_ylabel('J')
#画出决策边界
normal_X_pos=np.mat(normal_X[pos]);normal_X_neg=np.mat(normal_X[neg])
axs[1].scatter(normal_X_pos[:,1].flat,normal_X_pos[:,2].flat,marker='x',label='Admitted')
axs[1].scatter(normal_X_neg[:,1].flat,normal_X_neg[:,2].flat,marker='o',label='Not Admitted')
axs[1].legend(loc='upper right')
axs[1].set_xlabel('exam1')
axs[1].set_ylabel('exam2')
X1=np.linspace(-0.5,0.5,30)
intercept=np.ones((30))
X2=(-intercept*theta[0,0]-X1*theta[1,0])/theta[2,0]
axs[1].plot(X1,X2)
plt.show()
#解决方法二:运用高级梯度下降算法:scipy.optimize.fmin_tnc
import scipy.optimize as opt
X=np.mat(X);y=np.mat(y)
theta=np.zeros((X.shape[1],1))
theta=np.mat(theta)
result=opt.fmin_tnc(func=computeCost,x0=theta,args=(X,y))
theta=result[0].reshape(theta.shape) #此处的最佳theta值匹配的是原始数据X;
#画出决策边界
plt.scatter(X_pos[:,1].flat,X_pos[:,2].flat,marker='x',label='pos')
plt.scatter(X_neg[:,1].flat,X_neg[:,2].flat,marker='o',label='neg')
plt.legend(loc='upper right')
X1=np.linspace(25,100,30)
intercept=np.ones((30))
X2=(-intercept*theta[0,0]-X1*theta[1,0])/theta[2,0]
plt.plot(X1,X2)
plt.show()
#可视化数据ex2data2.txt
ex2data2=pd.read_csv('ex2data2.txt',names=['exam1','exam2','admitted'])
ex2data2.insert(0,'intercept',np.ones(ex2data2.shape[0]))#增加常数项
X=ex2data2.iloc[:,0:3];y=ex2data2.iloc[:,3:]#若写成y=ex2data1.iloc[:,3],会出现维数问题
#按y值划分数据集
pos=(ex2data2.iloc[:,-1]==1);neg=(ex2data2.iloc[:,-1]==0)
X_pos=np.mat(X[pos]);X_neg=np.mat(X[neg])
fig,ax=plt.subplots()
ax.scatter(X_pos[:,1].flat,X_pos[:,2].flat,marker='x',label='Admitted')
ax.scatter(X_neg[:,1].flat,X_neg[:,2].flat,marker='o',label='Not Admitted')
ax.legend(loc='upper right')
ax.set_xlabel('exam1')
ax.set_ylabel('exam2')
plt.show()
XX=X
#扩展特征
def mapFeature(X,degree):
import numpy as np
X=np.mat(X)
for i in range(2,degree+1):
for j in range(i+1):
X=np.concatenate( ( X,np.multiply( np.power(X[:,1],j),np.power(X[:,2],i-j) ) ),axis=1 )
return X
#运用高级梯度下降算法:scipy.optimize.fmin_tnc
X=XX
X=mapFeature(XX,8)
import scipy.optimize as opt
X=np.mat(X);y=np.mat(y)
theta=np.zeros((X.shape[1],1))
theta=np.mat(theta)
result1=opt.fmin_tnc(func=computeCost,x0=theta,args=(X,y,0))#L=0,会出现过拟合;
result2=opt.fmin_tnc(func=computeCost,x0=theta,args=(X,y,2))#L=2,矫正过拟合现象。
theta1=result1[0].reshape(theta.shape) #此处的最佳theta值匹配的是原始数据X;
theta2=result2[0].reshape(theta.shape)
#画出决策边界
fig,axs=plt.subplots(2,1)
axs[0].scatter(X_pos[:,1].flat,X_pos[:,2].flat,marker='x',label='pos')
axs[0].scatter(X_neg[:,1].flat,X_neg[:,2].flat,marker='o',label='neg')
axs[0].legend(loc='upper right')
axs[1].scatter(X_pos[:,1].flat,X_pos[:,2].flat,marker='x',label='pos')
axs[1].scatter(X_neg[:,1].flat,X_neg[:,2].flat,marker='o',label='neg')
axs[1].legend(loc='upper right')
X1=np.linspace(-1,1,30)
X2=np.linspace(-1,1,30)
XX1,XX2=np.meshgrid(X1,X2)
XX_ravel=np.concatenate((np.ones((XX1.size,1)),XX1.ravel().reshape(-1,1),XX2.ravel().reshape(-1,1)),axis=1)
XX_ravel=mapFeature(XX_ravel,8)
z1=XX_ravel*theta1
z2=XX_ravel*theta2
z1=z1.reshape(XX1.shape)
z2=z2.reshape(XX1.shape)
axs[0].contour(XX1,XX2,z1,0)
axs[1].contour(XX1,XX2,z2,0)
plt.show()
上图是忽略正则项的边界,可以看出过拟合;
下图是带正则项的边界,可以看出正则项可以有效减少过拟合。
本例子也说明另一个问题,针对欠拟合问题,可以通过扩展特征来解决。扩展特征的同时带上正则项,更加合理。
吴恩达机器学习编程作业
链接: https://pan.baidu.com/s/1cpMM0xWZ1Dxs8HhVAmeUkA 提取码: a36e