这个项目包含了偏差和方差,训练集&验证集&测试集
1 正则化线性回归
1.1 数据可视化
import numpy as np
import scipy.io as sio
import scipy.optimize as opt
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
读取数据
data=sio.loadmat(r'C:\Users\xxx\Desktop\机器学习\machine-learning-ex5\machine-learning-ex5\ex5\ex5data1.mat')
X,y,Xval,yval,Xtest,ytest=map(np.ravel,[data['X'],data['y'],data['Xval'],data['yval'],data['Xtest'],data['ytest']])
X.shape, y.shape, Xval.shape, yval.shape, Xtest.shape, ytest.shape
((12,), (12,), (21,), (21,), (21,), (21,))
数据可视化
fig,ax=plt.subplots(figsize=(12,8))
ax.scatter(X,y)
ax.set_xlabel('water_level')
ax.set_ylabel('flow')
plt.show()
1.2正则化线性回归
插入x0
X,Xval,Xtest=[np.insert(x.reshape(x.shape[0],1),0,np.ones(x.shape[0]),axis=1) for x in(X,Xval,Xtest)]
X
array([[ 1. , -15.93675813],
[ 1. , -29.15297922],
[ 1. , 36.18954863],
[ 1. , 37.49218733],
[ 1. , -48.05882945],
[ 1. , -8.94145794],
[ 1. , 15.30779289],
[ 1. , -34.70626581],
[ 1. , 1.38915437],
[ 1. , -44.38375985],
[ 1. , 7.01350208],
[ 1. , 22.76274892]])
定义两种代价函数
def cost(theta,X,y):
m=X.shape[0]
inner=X@theta-y
square_sum=inner.T@inner
cost=square_sum/(2*m)
return cost
def costReg(theta,X,y,reg=1):
m=X.shape[0]
regularized_term=((reg/(2*m))*np.power(theta[1:],2)).sum()
return cost(theta,X,y)+regularized_term
theta=np.ones(X.shape[1])
costReg(theta,X,y,1)
303.9931922202643