import numpy as np
import scipy.io as sio
import scipy.optimize as opt
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
def load_data(data_file):
d=sio.loadmat(data_file)
return map(np.ravel,[d['X'],d['y'],d['Xval'],d['yval'],d['Xtest'],d['ytest']])
X,y,Xval,yval,Xtest,ytest=load_data('ex5data1.mat')
df=pd.DataFrame({'water_level':X,'flow':y})
sns.lmplot('water_level','flow',data=df,fit_reg=False,size=8)
plt.show()
c:\users\19664\appdata\local\programs\python\python37\lib\site-packages\seaborn\regression.py:546: UserWarning: The `size` paramter has been renamed to `height`; please update your code.
warnings.warn(msg, UserWarning)
X.shape
(12,)
X, Xval, Xtest = [np.insert(x.reshape(x.shape[0], 1), 0, np.ones(x.shape[0]), axis=1) for x in (X, Xval, Xtest)]
J ( θ ) = 1 2 m ∑ i = 1 m ( h θ ( x ( i ) ) − y ( i ) ) 2 J(\theta)=\frac{1}{2 m} \sum_{i=1}^{m}\left(h_{\theta}\left(x^{(i)}\right)-y^{(i)}\right)^{2} J(θ)=2m1∑i=1m(hθ(x(i))−y(i))2
def cost(theta, X, y):
m = X.shape[0]
inner = X @ theta - y
cost = np.sum(np.multiply(inner,inner)) / (2 * m)
return cost
theta =np.ones(X.shape[1])
cost(theta,X,y)
303.9515255535976
theta
array([1., 1.])
def gradient(theta,X,y):
m=X.shape[0]
m = X.shape[0]
cost=1/m*(X.T @ (X @ theta - y))
return cost
gradient(theta,X,y)
array([-15.30301567, 598.16741084])
def regularized_gradient(theta,X,y,l=1):
m=X.shape[0]
regularized_term=theta.copy()
regularized_term[0]=0
regularized_term=(l/m)*regularized_term
return gradient(theta,X,y)+regularized_term
regularized_gradient(theta,X,y)
array([-15.30301567, 598.25074417])
def liner_regression(X,y,l=1):
theta = np.ones(X.shape[1])
res=opt.minimize(fun=regularized_cost,
x0=theta,
args=(X,y,l),
method='TNC',
jac=regularized_gradient,
options={'disp': True}
)
return res
∂ J ( θ ) ∂ θ 0 = 1 m ∑ i = 1 m ( h θ ( x ( i ) ) − y ( i ) ) x j ( i ) for j = 0 ∂ J ( θ ) ∂ θ j = ( 1 m ∑ i = 1 m ( h θ ( x ( i ) ) − y ( i ) ) x j ( i ) ) + λ m θ j for j ≥ 1 \begin{array}{ll}{\frac{\partial J(\theta)}{\partial \theta_{0}}=\frac{1}{m} \sum_{i=1}^{m}\left(h_{\theta}\left(x^{(i)}\right)-y^{(i)}\right) x_{j}^{(i)}} & {\text { for } j=0} \\ {\frac{\partial J(\theta)}{\partial \theta_{j}}=\left(\frac{1}{m} \sum_{i=1}^{m}\left(h_{\theta}\left(x^{(i)}\right)-y^{(i)}\right) x_{j}^{(i)}\right)+\frac{\lambda}{m} \theta_{j}} & {\text { for } j \geq 1}\end{array} ∂θ0∂J(θ)=m1∑i=1m(hθ(x(i))−y(i))xj(i)∂θj∂J(θ)=(m1∑i=1m(hθ(x(i))−y(i))xj(i))+mλθj for j=0 for j≥1
def regularized_cost(theta,X,y,l=1):
m=X.shape[0]
regularized_term=(l/(2*m))*np.sum(np.power(theta[1:],2))
return cost(theta,X,y)+regularized_term
theta=np.ones(X.shape[0])
final_theta = liner_regression(X, y, l=0).get('x')
b = final_theta[0] # intercept
m = final_theta[1] # slope
plt.scatter(X[:,1], y, label="Training data")
plt.plot(X[:, 1], X[:, 1]*m + b, label="Prediction")
plt.legend(loc=2)
plt.show()
def plot_learning_curve(X, y, Xval, yval, l=0):
training_cost, cv_cost = [], []
m = X.shape[0]
for i in range(1, m + 1):
res = liner_regression(X[:i, :], y[:i], l=l)
tc = cost(res.x, X[:i, :], y[:i])
cv = cost(res.x, Xval, yval)
training_cost.append(tc)
cv_cost.append(cv)
plt.plot(np.arange(1, m + 1), training_cost, label='training cost')
plt.plot(np.arange(1, m + 1), cv_cost, label='cv cost')
plt.legend(loc=1)
plot_learning_curve(X,y,Xval,yval,l=0)
def normalize_feature(df):#对每一列归一化
return (df-df.mean())/df.std()
def map_features(x,power):
data={'f{}'.format(i):np.power(x,i) for i in range(1,power+1)}
df=pd.DataFrame(data)
return df
def prepare_ploy(*args,power):
def prepare(x):#对每一列归一化
df=map_features(x,power)
ndarr=normalize_feature(df).as_matrix()
return np.insert(ndarr,0,np.ones(ndarr.shape[0]),axis=1)
return [prepare(x) for x in args]
X, y, Xval, yval, Xtest, ytest = load_data('ex5data1.mat')
map_features(X,power=3)
f1 | f2 | f3 | |
---|---|---|---|
0 | -15.936758 | 253.980260 | -4047.621971 |
1 | -29.152979 | 849.896197 | -24777.006175 |
2 | 36.189549 | 1309.683430 | 47396.852168 |
3 | 37.492187 | 1405.664111 | 52701.422173 |
4 | -48.058829 | 2309.651088 | -110999.127750 |
5 | -8.941458 | 79.949670 | -714.866612 |
6 | 15.307793 | 234.328523 | 3587.052500 |
7 | -34.706266 | 1204.524887 | -41804.560890 |
8 | 1.389154 | 1.929750 | 2.680720 |
9 | -44.383760 | 1969.918139 | -87432.373590 |
10 | 7.013502 | 49.189211 | 344.988637 |
11 | 22.762749 | 518.142738 | 11794.353058 |
X_poly, Xval_poly, Xtest_poly= prepare_ploy(X, Xval, Xtest, power=8)
X_poly[:3, :]
c:\users\19664\appdata\local\programs\python\python37\lib\site-packages\ipykernel_launcher.py:4: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
after removing the cwd from sys.path.
array([[ 1.00000000e+00, -3.62140776e-01, -7.55086688e-01,
1.82225876e-01, -7.06189908e-01, 3.06617917e-01,
-5.90877673e-01, 3.44515797e-01, -5.08481165e-01],
[ 1.00000000e+00, -8.03204845e-01, 1.25825266e-03,
-2.47936991e-01, -3.27023420e-01, 9.33963187e-02,
-4.35817606e-01, 2.55416116e-01, -4.48912493e-01],
[ 1.00000000e+00, 1.37746700e+00, 5.84826715e-01,
1.24976856e+00, 2.45311974e-01, 9.78359696e-01,
-1.21556976e-02, 7.56568484e-01, -1.70352114e-01]])
plot_learning_curve(X_poly, y, Xval_poly, yval, l=0)#多项式拓展特征
plt.show()
这个过拟合的已经没有任何意义了
plot_learning_curve(X_poly, y, Xval_poly, yval, l=1)
plt.show()
plot_learning_curve(X_poly, y, Xval_poly, yval, l=100)
plt.show()
这个的偏差太大,没有预测意义
l_candidate = [0,0.001,0.003,0.01,0.03,0.1,0.3,1,3,10]
training_cost, cv_cost = [], []
for l in l_candidate:
res=liner_regression(X_poly,y,l)#l lamda是唯一变量
print(res.x)
tc=cost(res.x,X_poly,y)
cv=cost(res.x,Xval_poly,yval)
training_cost.append(tc)
cv_cost.append(cv)
plt.plot(l_candidate, training_cost, label='training')
plt.plot(l_candidate, cv_cost, label='cross validation')
plt.legend(loc=2)
plt.xlabel('lambda')
plt.ylabel('cost')
plt.show()
[ 11.21706585 10.75070817 20.80739961 17.63150557 -40.16874767
-41.61078419 37.59076615 32.47932848 -5.17533895]
[ 11.21812521 11.45664545 15.77635131 11.25833727 -17.48707932
-18.51691323 12.98302732 8.8513085 -4.50041395]
[ 11.21770942 11.79060213 13.84306719 8.65250473 -9.26554607
-10.57333409 3.86781969 1.39067306 -3.4921462 ]
[11.21759634 12.2173356 12.39327894 5.76270318 -4.62502486 -4.95446703
-0.74749478 -2.07326146 -2.40161243]
[11.21758946 12.44970325 11.02104777 3.61872705 -2.27816769 -2.35528792
-1.7550052 -1.98066726 -1.62691143]
[11.21758813 12.0329889 9.02456212 2.46415079 -0.03071123 -0.75251458
-1.11387524 -0.71830965 -1.24986872]
[11.21759766 10.68432308 7.06179414 2.72156582 1.48304646 0.53447264
0.02826291 0.37208722 -0.5756052 ]
[11.21759084 8.58517818 5.21269487 3.59111691 2.2829443 1.76030041
1.06813152 1.10762807 0.39399214]
[11.21758902 6.65682319 3.8942189 3.7589937 2.25316196 2.20639857
1.33199233 1.3501427 0.76188997]
[11.21758894 4.36656761 2.39947367 2.90176749 1.50155106 1.86591874
0.89813759 1.19375382 0.50923143]
l_candidate[np.argmin(cv_cost)]
1
for l in l_candidate:
theta = liner_regression(X_poly, y, l).x
print('test cost(l={}) = {}'.format(l, cost(theta, Xtest_poly, ytest)))
test cost(l=0) = 10.188828746834819
test cost(l=0.001) = 11.042208340677332
test cost(l=0.003) = 11.258299662911943
test cost(l=0.01) = 10.881320921011255
test cost(l=0.03) = 10.022423316373619
test cost(l=0.1) = 8.631996528952413
test cost(l=0.3) = 7.336508859793177
test cost(l=1) = 7.466283147250186
test cost(l=3) = 11.643934061423678
test cost(l=10) = 27.71508028852076
最小值是惩罚系数为0.3的时候