ex5.py
import scipy.optimize as op
import numpy as np
from scipy.io import loadmat
from ex5modules import *
#Part 1: Loading and visualize data
data=loadmat('ex5data1.mat')
X=data['X']
y=data['y']
Xtest=data['Xtest']
ytest=data['ytest']
Xval=data['Xval']
yval=data['yval']
plotData(X,y)
#Part 2: Costs and gradients
Xone=np.column_stack((np.ones((X.shape[0],1)),X))
theta_t=np.array([1,1]).flatten()
cost=lrCostFunc(theta_t,Xone,y,1)
grad=lrgradient(theta_t,Xone,y,1)
print("Cost at theta_t=[1,1] with lambda=1: %f (should be about 303.993)"%cost)
print("Gradient at theta_t=[1,1] with lambda=1: [%f,%f] (should be about [-15.30,598.250])"%(grad[0],grad[1]))
#Part 3: Train Linear Regression
init_theta=np.zeros(Xone.shape[1])
lamda=0
theta=op.fmin_cg(f=lrCostFunc,x0=init_theta,args=(Xone,y,lamda),\
fprime=lrgradient,disp=False).reshape((Xone.shape[1],1))
plotLinearRegression(X,y,theta)
#Part 4: Learning curve for Linear Regression
train_error,val_error=learningCurve(X,y,Xval,yval,init_theta,lamda)
plotLearningCurve(train_error,val_error)
#Part 5: Feature Mapping for Polynomial Regression
p=8
X_poly=polyFeatures(X,p)
X_poly,mu,sigma=featureNormalize(X_poly)
X_poly_one=np.column_stack((np.ones((X_poly.shape[0],1)),X_poly))
X_poly_test=polyFeatures(Xtest,p)
X_poly_test=(X_poly_test-mu)/sigma
X_poly_test_one=np.column_stack((np.ones((X_poly_test.shape[0],1)),X_poly_test))
X_poly_val=polyFeatures(Xval,p)
X_poly_val=(X_poly_val-mu)/sigma
X_poly_val_one=np.column_stack((np.ones((X_poly_val.shape[0],1)),X_poly_val))
lamda=3
poly_init_theta=np.zeros(X_poly_one.shape[1])
theta=op.fmin_cg(f=lrCostFunc,x0=poly_init_theta,args=(X_poly_one,y,lamda),\
fprime=lrgradient,disp=False,maxiter=200).reshape((X_poly_one.shape[1],1))
plotPolyRegression(X,y,mu,sigma,theta,p)
#Part 6: Learning curve for Polynomial Regression
train_error,val_error=learningCurve(X_poly,y,X_poly_val,yval,poly_init_theta,lamda)
plotLearningCurve(train_error,val_error)
#Part 7: Validation for Selecting Lambda
lamda_vec,train_error,val_error=validationCurve(X_poly_one, y, X_poly_val_one, yval,poly_init_theta)
plotLambdaError(lamda_vec,train_error,val_error)
print("\nlambda\t\tTrain Error\tValidation Error")
for i in range(lamda_vec.shape[0]):
print(' %f\t%f\t%f'%(lamda_vec[i], train_error[i], val_error[i]))
#Part 8: Testing error with best lambda=3
lamda=3
theta=op.fmin_cg(f=lrCostFunc,x0=poly_init_theta,args=(X_poly_one,y,lamda),fprime=lrgradient,disp=False,maxiter=200)
test_error=lrCostFunc(theta,X_poly_test_one,ytest,0)
print("Test error with lambda=3: %f (should be about 3.8599)" % test_error)
ex5modules.py
import scipy.optimize as op
import numpy as np
import matplotlib.pyplot as plt
def sigmoid(x):
return 1/(1+np.exp(-x))
def lrCostFunc(theta,X,y,lamda):
m = X.shape[0]
theta = theta.reshape((theta.shape[0], 1))
J=np.sum(np.square(np.dot(X,theta)-y))/(2*m)+ \
lamda*np.sum(np.square(theta[1:]))/(2*m)
return J
def lrgradient(theta,X,y,lamda):
m = X.shape[0]
theta = theta.reshape((theta.shape[0], 1))
grad=np.dot(X.T, X.dot(theta) - y) / m + theta * lamda / m
grad[0] = grad[0] - theta[0] * lamda / m
return grad.flatten()
def learningCurve(X,y,Xval,yval,init_theta,lamda):
train_error=np.zeros(X.shape[0])
val_error=np.zeros(X.shape[0])
X=np.column_stack((np.ones((X.shape[0],1)),X))
for i in range(X.shape[0]):
results = op.fmin_cg(f=lrCostFunc, x0=init_theta, disp=False,\
args=(X[:i+1,:], y[:i+1,:],lamda), fprime=lrgradient,maxiter=400)
theta = results.reshape((X.shape[1], 1))
train_error[i]=lrCostFunc(theta,X[:i+1,:],y[:i+1,:],lamda)
val_error[i]=lrCostFunc(theta,np.column_stack((np.ones(Xval.shape[0]),Xval)),yval,lamda)
return train_error,val_error
def validationCurve(X, y, Xval, yval,init_theta):
lamda_vec=np.array([0,0.001,0.003,0.01,0.03,.1,.3,1,3,10])
train_error=np.zeros((lamda_vec.shape[0],1))
val_error = np.zeros((lamda_vec.shape[0],1))
i=-1
for lamda in lamda_vec:
i=i+1
theta=op.fmin_cg(f=lrCostFunc,x0=init_theta,disp=False,\
args=(X,y,lamda),fprime=lrgradient,maxiter=200)
train_error[i]=lrCostFunc(theta,X,y,0)
val_error[i]=lrCostFunc(theta,Xval,yval,0)
return lamda_vec,train_error,val_error
def polyFeatures(X,p):
X_p = np.zeros((X.shape[0], p))
for i in range(p):
X_p[:, i] = np.power(X.flatten(), i + 1)
return X_p
def featureNormalize(X):
mu=np.mean(X,axis=0)
sigma=np.std(X,axis=0,ddof=1)
X_norm = (X - mu) / sigma
return X_norm,mu,sigma
def plotData(X,y):
plt.plot(X, y, 'rx')
plt.xlabel('Change in water level (x)')
plt.ylabel('Water flowing out of the dam (y)')
plt.show()
def plotLinearRegression(X,y,theta):
Xone=np.column_stack((np.ones((X.shape[0],1)),X))
plt.plot(X, y, 'rx')
plt.plot(X, Xone.dot(theta), 'b')
plt.xlabel('Change in water level (x)')
plt.ylabel('Water flowing out of the dam (y)')
plt.show()
def plotLearningCurve(train_error,val_error):
l1, = plt.plot(np.arange(1, 13), train_error, 'b', label='train')
l2, = plt.plot(np.arange(1, 13), val_error, 'g', label='cv')
plt.ylim(0, 150)
plt.legend(handles=[l1, l2], loc=1)
plt.title("Learning curve for linear regression")
plt.xlabel("Number of training examples")
plt.ylabel("Error")
plt.show()
def plotPolyRegression(X,y,mu,sigma,theta,p):
plt.plot(X, y, 'rx')
xmin = np.min(X) - 15
xmax = np.max(X) + 25
x = np.arange(xmin, xmax, .05)
x_poly = polyFeatures(x, p)
x_poly = (x_poly - mu) / sigma
x_poly = np.column_stack((np.ones((x_poly.shape[0], 1)), x_poly))
plt.plot(x, x_poly.dot(theta), '--b')
plt.xlabel('Change in water level (x)')
plt.ylabel('Water flowing out of the dam (y)')
plt.xlim(-80, 80)
plt.ylim(np.min(x_poly.dot(theta))-10,np.max(x_poly.dot(theta)+20))
plt.show()
def plotLambdaError(lamda_vec,train_error,val_error):
l1,=plt.plot(lamda_vec,train_error,'b',label='Train')
l2,=plt.plot(lamda_vec,val_error,'g',label='Cross Validation')
plt.legend(handles=[l1,l2],loc=1)
plt.xlabel("Lambda")
plt.ylabel("Error")
plt.xlim(0,10)
plt.ylim(0,25)
plt.show()
测试损失函数和梯度函数是否正确。
线性回归:
学习曲线:当lambda=0,不使用正则化时,随着训练集的增多,train error增加,而且值不小,cv error也不小,存在high bias的问题。
多项式回归:
lambda=0时,存在很严重的variance问题。
lambda=1时,train error跟cv error都很小,而且两者差距不大,是个合理的lambda,中和了bias和variance。
lambda=100时,既存在严重的bias,又存在严重的variance。
通过cv集,发现当ambda为1或者3时,train error和validation error相差最小,效果最好。
用CV集选择了lambda之后,最后用测试集看看这个模型的效果到底如何。