Coursera NG 机器学习 第五周 正则化 bias Vs variance Python实现

ex5.py

import scipy.optimize as op
import numpy as np
from scipy.io import loadmat
from ex5modules import *

#Part 1: Loading and visualize data
data=loadmat('ex5data1.mat')
X=data['X']
y=data['y']
Xtest=data['Xtest']
ytest=data['ytest']
Xval=data['Xval']
yval=data['yval']

plotData(X,y)

#Part 2: Costs and gradients
Xone=np.column_stack((np.ones((X.shape[0],1)),X))
theta_t=np.array([1,1]).flatten()

cost=lrCostFunc(theta_t,Xone,y,1)
grad=lrgradient(theta_t,Xone,y,1)
print("Cost at theta_t=[1,1] with lambda=1: %f  (should be about 303.993)"%cost)
print("Gradient at theta_t=[1,1] with lambda=1: [%f,%f]  (should be about [-15.30,598.250])"%(grad[0],grad[1]))

#Part 3: Train Linear Regression
init_theta=np.zeros(Xone.shape[1])
lamda=0

theta=op.fmin_cg(f=lrCostFunc,x0=init_theta,args=(Xone,y,lamda),\
                 fprime=lrgradient,disp=False).reshape((Xone.shape[1],1))
plotLinearRegression(X,y,theta)

#Part 4: Learning curve for Linear Regression
train_error,val_error=learningCurve(X,y,Xval,yval,init_theta,lamda)
plotLearningCurve(train_error,val_error)

#Part 5: Feature Mapping for Polynomial Regression
p=8

X_poly=polyFeatures(X,p)
X_poly,mu,sigma=featureNormalize(X_poly)
X_poly_one=np.column_stack((np.ones((X_poly.shape[0],1)),X_poly))

X_poly_test=polyFeatures(Xtest,p)
X_poly_test=(X_poly_test-mu)/sigma
X_poly_test_one=np.column_stack((np.ones((X_poly_test.shape[0],1)),X_poly_test))

X_poly_val=polyFeatures(Xval,p)
X_poly_val=(X_poly_val-mu)/sigma
X_poly_val_one=np.column_stack((np.ones((X_poly_val.shape[0],1)),X_poly_val))

lamda=3
poly_init_theta=np.zeros(X_poly_one.shape[1])

theta=op.fmin_cg(f=lrCostFunc,x0=poly_init_theta,args=(X_poly_one,y,lamda),\
                 fprime=lrgradient,disp=False,maxiter=200).reshape((X_poly_one.shape[1],1))
plotPolyRegression(X,y,mu,sigma,theta,p)

#Part 6: Learning curve for Polynomial Regression
train_error,val_error=learningCurve(X_poly,y,X_poly_val,yval,poly_init_theta,lamda)
plotLearningCurve(train_error,val_error)

#Part 7: Validation for Selecting Lambda

lamda_vec,train_error,val_error=validationCurve(X_poly_one, y, X_poly_val_one, yval,poly_init_theta)
plotLambdaError(lamda_vec,train_error,val_error)

print("\nlambda\t\tTrain Error\tValidation Error")
for i in range(lamda_vec.shape[0]):
    print(' %f\t%f\t%f'%(lamda_vec[i], train_error[i], val_error[i]))

#Part 8: Testing error with best lambda=3
lamda=3
theta=op.fmin_cg(f=lrCostFunc,x0=poly_init_theta,args=(X_poly_one,y,lamda),fprime=lrgradient,disp=False,maxiter=200)
test_error=lrCostFunc(theta,X_poly_test_one,ytest,0)
print("Test error with lambda=3:  %f  (should be about 3.8599)" % test_error)

ex5modules.py

import scipy.optimize as op
import numpy as np
import matplotlib.pyplot as plt

def sigmoid(x):
    return 1/(1+np.exp(-x))

def lrCostFunc(theta,X,y,lamda):
    m = X.shape[0]
    theta = theta.reshape((theta.shape[0], 1))
    J=np.sum(np.square(np.dot(X,theta)-y))/(2*m)+ \
      lamda*np.sum(np.square(theta[1:]))/(2*m)
    return J

def lrgradient(theta,X,y,lamda):
    m = X.shape[0]
    theta = theta.reshape((theta.shape[0], 1))
    grad=np.dot(X.T, X.dot(theta) - y) / m + theta * lamda / m
    grad[0] = grad[0] - theta[0] * lamda / m
    return grad.flatten()

def learningCurve(X,y,Xval,yval,init_theta,lamda):
    train_error=np.zeros(X.shape[0])
    val_error=np.zeros(X.shape[0])
    X=np.column_stack((np.ones((X.shape[0],1)),X))
    for i in range(X.shape[0]):
        results = op.fmin_cg(f=lrCostFunc, x0=init_theta, disp=False,\
                             args=(X[:i+1,:], y[:i+1,:],lamda), fprime=lrgradient,maxiter=400)
        theta = results.reshape((X.shape[1], 1))
        train_error[i]=lrCostFunc(theta,X[:i+1,:],y[:i+1,:],lamda)
        val_error[i]=lrCostFunc(theta,np.column_stack((np.ones(Xval.shape[0]),Xval)),yval,lamda)
    return train_error,val_error

def validationCurve(X, y, Xval, yval,init_theta):
    lamda_vec=np.array([0,0.001,0.003,0.01,0.03,.1,.3,1,3,10])
    train_error=np.zeros((lamda_vec.shape[0],1))
    val_error = np.zeros((lamda_vec.shape[0],1))
    i=-1
    for lamda in lamda_vec:
        i=i+1
        theta=op.fmin_cg(f=lrCostFunc,x0=init_theta,disp=False,\
                         args=(X,y,lamda),fprime=lrgradient,maxiter=200)
        train_error[i]=lrCostFunc(theta,X,y,0)
        val_error[i]=lrCostFunc(theta,Xval,yval,0)
    return lamda_vec,train_error,val_error

def polyFeatures(X,p):
    X_p = np.zeros((X.shape[0], p))
    for i in range(p):
        X_p[:, i] = np.power(X.flatten(), i + 1)
    return X_p

def featureNormalize(X):
    mu=np.mean(X,axis=0)
    sigma=np.std(X,axis=0,ddof=1)
    X_norm = (X - mu) / sigma
    return X_norm,mu,sigma

def plotData(X,y):
    plt.plot(X, y, 'rx')
    plt.xlabel('Change in water level (x)')
    plt.ylabel('Water flowing out of the dam (y)')
    plt.show()

def plotLinearRegression(X,y,theta):
    Xone=np.column_stack((np.ones((X.shape[0],1)),X))
    plt.plot(X, y, 'rx')
    plt.plot(X, Xone.dot(theta), 'b')
    plt.xlabel('Change in water level (x)')
    plt.ylabel('Water flowing out of the dam (y)')
    plt.show()

def plotLearningCurve(train_error,val_error):
    l1, = plt.plot(np.arange(1, 13), train_error, 'b', label='train')
    l2, = plt.plot(np.arange(1, 13), val_error, 'g', label='cv')
    plt.ylim(0, 150)
    plt.legend(handles=[l1, l2], loc=1)
    plt.title("Learning curve for linear regression")
    plt.xlabel("Number of training examples")
    plt.ylabel("Error")
    plt.show()

def plotPolyRegression(X,y,mu,sigma,theta,p):
    plt.plot(X, y, 'rx')
    xmin = np.min(X) - 15
    xmax = np.max(X) + 25
    x = np.arange(xmin, xmax, .05)
    x_poly = polyFeatures(x, p)
    x_poly = (x_poly - mu) / sigma
    x_poly = np.column_stack((np.ones((x_poly.shape[0], 1)), x_poly))
    plt.plot(x, x_poly.dot(theta), '--b')
    plt.xlabel('Change in water level (x)')
    plt.ylabel('Water flowing out of the dam (y)')
    plt.xlim(-80, 80)
    plt.ylim(np.min(x_poly.dot(theta))-10,np.max(x_poly.dot(theta)+20))
    plt.show()

def plotLambdaError(lamda_vec,train_error,val_error):
    l1,=plt.plot(lamda_vec,train_error,'b',label='Train')
    l2,=plt.plot(lamda_vec,val_error,'g',label='Cross Validation')
    plt.legend(handles=[l1,l2],loc=1)
    plt.xlabel("Lambda")
    plt.ylabel("Error")
    plt.xlim(0,10)
    plt.ylim(0,25)
    plt.show()

测试损失函数和梯度函数是否正确。                                                      

线性回归:

学习曲线:当lambda=0,不使用正则化时,随着训练集的增多,train error增加,而且值不小,cv error也不小,存在high bias的问题。

                                               

多项式回归:       

lambda=0时,存在很严重的variance问题。

           

lambda=1时,train error跟cv error都很小,而且两者差距不大,是个合理的lambda,中和了bias和variance。

lambda=100时,既存在严重的bias,又存在严重的variance。

通过cv集,发现当ambda为1或者3时,train error和validation error相差最小,效果最好。

用CV集选择了lambda之后,最后用测试集看看这个模型的效果到底如何。

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值