文章目录
一、数据
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
import scipy.optimize as opt
1.加载数据
data=loadmat('G:\Coursera-ML-AndrewNg-Notes\code\ex5-bias vs variance\ex5data1.mat')
X=data['X']
Xval=data['Xval']
Xtest=data['Xtest']
y=data['y']
yval=data['yval']
ytest=data['ytest']
#X.shape(12, 1),Xval.shape(21, 1),Xtest.shape(21, 1)
#y.shape(12, 1),yval.shape(21, 1),ytest.shape(21, 1)
plt.scatter(X,y,color='b')
plt.xlabel('water level')
plt.ylabel('flow')
plt.show()
2.处理数据
X=np.insert(X,0,np.ones(X.shape[0]),axis=1)
Xval=np.insert(Xval,0,np.ones(Xval.shape[0]),axis=1)
Xtest=np.insert(Xtest,0,np.ones(Xtest.shape[0]),axis=1)
y=y.flatten()
yval=yval.flatten()
ytest=ytest.flatten()
theta=np.ones(X.shape[1])
二、定义代价函数
def cost(theta,X,y):
return np.sum(np.power(X@theta-y,2))/(2*len(X))
def regularized_cost(theta,X,y,l):
return cost(theta,X,y)+l/(2*len(X))*np.sum(np.power(theta[1:],2))
三、定义梯度
def gradient(theta,X,y):
return (X.T@(X@theta-y))/len(X)
def regularized_gradient(theta,X,y,l):
reg=(l/len(X))*theta
reg[0]=0
return gradient(theta,X,y)+reg
四、拟合数据
def training(theta,X,y,l):
result=opt.minimize(fun=regularized_cost,x0=theta,args=(X,y,l),method='TNC',jac=regularized_gradient,options={'disp': True})
return result.x
1.线性回归
final_theta=training(theta,X,y,1)
a=final_theta[1]
b=final_theta[0]
(1)拟合曲线
plt.scatter(X[:,1],y,color='b',label='training data')
plt.plot(X[:,1],a*X[:,1]+b,color='r',label='prediction')
plt.xlabel('water level')
plt.ylabel('flow')
plt.legend()
plt.show()
(2)学习曲线
代价函数误差:
J
t
r
a
i
n
(
θ
)
=
1
2
m
∑
i
=
1
m
(
h
θ
x
(
i
)
)
−
y
(
i
)
)
2
J_{train}(\theta)=\frac{1}{2m}\sum_{i=1}^{m}(h_{\theta}x^{(i)})-y^{(i)})^2
Jtrain(θ)=2m1i=1∑m(hθx(i))−y(i))2
J
c
v
(
θ
)
=
1
2
m
c
v
∑
i
=
1
m
c
v
(
h
θ
x
c
v
(
i
)
)
−
y
c
v
(
i
)
)
2
J_{cv}(\theta)=\frac{1}{2m_{cv}}\sum_{i=1}^{m_{cv}}(h_{\theta}x_{cv}^{(i)})-y_{cv}^{(i)})^2
Jcv(θ)=2mcv1i=1∑mcv(hθxcv(i))−ycv(i))2
training_cost=[]
cv_cost=[]
for i in range(1,len(X)+1):
t=training(theta,X[:i,:],y[:i],l=0)
tc=cost(t,X[:i,:],y[:i])
cv=cost(t,Xval,yval)#所有的验证集来检验
training_cost.append(tc)
cv_cost.append(cv)
plt.plot(range(1,len(X)+1),training_cost,color='b',label='training cost')
plt.plot(range(1,len(X)+1),cv_cost,color='r',label='cv cost')
plt.xlabel('m(training set size)')
plt.ylabel('error')
plt.legend()
plt.show()
随着样本数量的增加,训练误差和交叉验证误差都很高,这属于高偏差,欠拟合。
2.多项式回归
归一化:所有数据集应该都用训练集的均值和样本标准差处理。所以要将训练集的均值和样本标准差存储起来,对后面的数据进行处理。
def add_polynomial_feature(X,power):
X_poly=X.copy()
for i in range(2,power+1):
X_poly=np.insert(X_poly,i,np.power(X_poly[:,1],i),axis=1)
return X_poly
def means_stds(X):
means=np.mean(X,axis=0)#每行相加
stds=np.std(X,axis=0,ddof=1)#样本标准差而不是总体标准差,使用np.std()时,将ddof=1则是样本标准差,默认=0是总体标准差。而pandas默认计算样本标准差。
return means,stds
def feature_normalize(X,means,stds):
X[:,1:]=(X[:,1:]-means[1:])/stds[1:]
return X
(1)代价函数误差与多项式次数
def plot_degree_error(degree):
training_cost=[]
cv_cost=[]
for d in range(1,degree+1):
X_poly=add_polynomial_feature(X,d)
X_poly_means,X_poly_stds=means_stds(X_poly)
X_poly=feature_normalize(X_poly,X_poly_means,X_poly_stds)
Xval_poly=add_polynomial_feature(Xval,d)
Xval_poly=feature_normalize(Xval_poly,X_poly_means,X_poly_stds)
theta=np.ones(d+1)
final_theta=training(theta,X_poly,y,0)
tc=cost(final_theta,X_poly,y)
cv=cost(final_theta,Xval_poly,yval)
training_cost.append(tc)
cv_cost.append(cv)
plt.plot(range(1,degree+1),training_cost,color='b',label='training cost')
plt.plot(range(1,degree+1),cv_cost,color='r',label='cv cost')
plt.xlabel('degree of polynomial d')
plt.ylabel('error')
plt.legend()
plt.show()
plot_degree_error(10)
从图中可以看出三次多项式和八次多项式比较好。
(2)不同 λ \lambda λ的学习曲线
power=8
X_poly=add_polynomial_feature(X,power)
X_poly_means,X_poly_stds=means_stds(X_poly)
X_poly=feature_normalize(X_poly,X_poly_means,X_poly_stds)
Xval_poly=add_polynomial_feature(Xval,power)
Xval_poly=feature_normalize(Xval_poly,X_poly_means,X_poly_stds)
Xtest_poly=add_polynomial_feature(Xtest,power)
Xtest_poly=feature_normalize(Xtest_poly,X_poly_means,X_poly_stds)
theta2=np.ones(power+1)
def plot_fitting_figure(l):
final_theta=training(theta2,X_poly,y,l)
x=np.linspace(X[:,1].min(),X[:,1].max(),100)
xx=x.reshape(-1,1)
xx=np.insert(xx,0,1,axis=1)
xx=add_polynomial_feature(xx,power)
xx=feature_normalize(xx,X_poly_means,X_poly_stds)
yy=xx@final_theta
plt.plot(x,yy,color='r',label='prediction')
plt.scatter(X[:,1],y,color='b',label='training data')
plt.xlabel('water level')
plt.ylabel('flow')
plt.legend()
plt.show()
def plot_learning_curve(l):
training_cost=[]
cv_cost=[]
for i in range(1,len(X)+1):
t=training(theta2,X_poly[:i,:],y[:i],l)
tc=regularized_cost(t,X_poly[:i,:],y[:i],0)
cv=regularized_cost(t,Xval_poly,yval,0)#所有的验证集来检验
training_cost.append(tc)
cv_cost.append(cv)
plt.plot(range(1,len(X)+1),training_cost,color='b',label='training cost')
plt.plot(range(1,len(X)+1),cv_cost,color='r',label='cv cost')
plt.xlabel('m(training set size)')
plt.ylabel('error')
plt.legend()
plt.show()
return cv_cost[len(X)-1]
λ = 0 \lambda=0 λ=0
plot_fitting_figure(l=0)
plot_learning_curve(l=0)
训练的代价太低了,不真实. 这是 过拟合了
λ = 1 \lambda=1 λ=1
plot_fitting_figure(l=1)
plot_learning_curve(l=1)
训练代价增加了些,不再是0了。
也就是说我们减轻过拟合
λ = 100 \lambda=100 λ=100
plot_fitting_figure(l=100)
plot_learning_curve(l=100)
太多正则化了.
变成 欠拟合状态
(3)选择合适的 λ \lambda λ
l_candidate = [0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]
training_cost=[]
cv_cost=[]
for l in l_candidate:
final_theta2=training(theta2,X_poly,y,l)
tc=cost(final_theta2,X_poly,y)
cv=cost(final_theta2,Xval_poly,yval)
training_cost.append(tc)
cv_cost.append(cv)
plt.plot(l_candidate,training_cost,color='b',label='training cost')
plt.plot(l_candidate,cv_cost,color='r',label='cv cost')
plt.xlabel('m(training set size)')
plt.ylabel('error')
plt.legend()
plt.show()
l_candidate[np.argmin(cv_cost)]
# l=3的时候cv_cost最低
for l in l_candidate:
final_theta=training(theta2,X_poly,y,l)
print('test cost(l={}) = {}'.format(l, cost(final_theta, Xval_poly, yval)))
final_theta=training(theta2,X_poly,y,l=3)
cost(final_theta, Xtest_poly, ytest)
用测试集计算代价函数误差为:3.8598760845199265