局部加权线性回归可以解决欠拟合问题,给待测点附近的每个点赋予一定的权重
通过矩阵运算的方法解出回归系数w = (XTWX)-1xTWy,通过使用“核”来对附近的点赋予权重,最常用的高斯核权重如下:w(i,i)= exp(|x(i) - x| / -2k2),对K值取不同的结果,得到的模型也不用,K越小,可能会出现过拟合,K越大,可能会出现欠拟合,这里将给出不同的K值的函数图像,这种方法加重了计算量
k=1.0出现了欠拟合,k=0.1时效果最佳,k=0.03时出现了过拟合
from numpy import *
def loadDataSet(filename):
numFeat = len(open(filename).readline().split('\t')) - 1
dataMat = []
labelMat = []
fr = open(filename)
for line in fr.readlines():
lineArr = []
curLine = line.strip().split('\t')
for i in range(numFeat):
lineArr.append(float(curLine[i]))
dataMat.append(lineArr)
labelMat.append(float(curLine[-1]))
return dataMat,labelMat
def standRegress(xArr,yArr):
xMat = mat(xArr)
yMat = mat(yArr).T
xTx = xMat.T * xMat
if linalg.det(xTx) == 0.0:
print('error')
return
ws = xTx.I * (xMat.T * yMat)
return ws
def lwlr(testPoint,xArr,yArr,k=1.0):
xMat = mat(xArr)
yMat = mat(yArr).T
m = shape(xMat)[0]
weights = mat(eye((m)))
for j in range(m):
diffMat = testPoint - xMat[j,:]
weights[j,j] = exp(diffMat*diffMat.T/(-2.0*k**2))
xTx = xMat.T * (weights * xMat)
if linalg.det(xTx) == 0.0:
print("error")
return
ws = xTx.I * (xMat.T * (weights * yMat))
return testPoint * ws
def lwlrTest(testArr,xArr,yArr,k=1.0):
m = shape(testArr)[0]
yHat = zeros(m)
for i in range(m):
yHat[i] = lwlr(testArr[i],xArr,yArr,k)
return yHat
def rssError(yArr,yHatArr):
return ((yArr-yHatArr)**2).sum()
xArr,yArr = loadDataSet('ex0.txt')
xMat = mat(xArr)
yMat = mat(yArr)
k = [1.0,0.01,0.003]
for i in range(3):
yHat = lwlrTest(xArr, xArr, yArr, k[i])
srtInd = xMat[:, 1].argsort(0)
xSort = xMat[srtInd][:, 0, :]
import matplotlib.pyplot as plt
fig = plt.figure(i+1)
ax = fig.add_subplot(111)
ax.plot(xSort[:, 1], yHat[srtInd])
ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0], s=2, c='red')
plt.title('k=%g' % k[i])
plt.show()