1.预测结果:
为了寻找W,使用最小误差法
平方误差:
用矩阵表示:
对W求导:
一般线性回归:
令其等于0求解:
局部加权线性回归:
岭回归:
'''
Created on Jan 8, 2011
@author: Peter
'''
from numpy import *
#创建数据集
def loadDataSet(fileName): #general function to parse tab -delimited floats
numFeat = len(open(fileName).readline().split('\t')) - 1 #get number of fields
dataMat = []; labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr =[]
curLine = line.strip().split('\t')
for i in range(numFeat):
lineArr.append(float(curLine[i]))
dataMat.append(lineArr)
labelMat.append(float(curLine[-1]))
return dataMat,labelMat
#标准回归函数
def standRegres(xArr,yArr):
xMat = mat(xArr); yMat = mat(yArr).T
xTx = xMat.T*xMat#矩阵内积
if linalg.det(xTx) == 0.0:#判断矩阵能否进行逆运算
print ("This matrix is singular, cannot do inverse")
return
ws = xTx.I * (xMat.T*yMat)#计算回归系数
return ws
#局部加权线性回归,防止欠拟合
def lwlr(testPoint,xArr,yArr,k=1.0):
xMat = mat(xArr); yMat = mat(yArr).T
m = shape(xMat)[0]
weights = mat(eye((m)))#创建权重对角矩阵
for j in range(m): #next 2 lines create weights matrix
diffMat = testPoint - xMat[j,:] #
weights[j,j] = exp(diffMat*diffMat.T/(-2.0*k**2))
xTx = xMat.T * (weights * xMat)
if linalg.det(xTx) == 0.0:
print ("This matrix is singular, cannot do inverse")
return
ws = xTx.I * (xMat.T * (weights * yMat))#计算回归系数
return testPoint * ws
#
def lwlrTest(testArr,xArr,yArr,k=1.0): #loops over all the data points and applies lwlr to each one
m = shape(testArr)[0]
yHat = zeros(m)
for i in range(m):
yHat[i] = lwlr(testArr[i],xArr,yArr,k)
return yHat
def lwlrTestPlot(xArr,yArr,k=1.0): #same thing as lwlrTest except it sorts X first
yHat = zeros(shape(yArr)) #easier for plotting
xCopy = mat(xArr)
xCopy.sort(0)
for i in range(shape(xArr)[0]):
yHat[i] = lwlr(xCopy[i],xArr,yArr,k)
return yHat,xCopy
#计算平方误差
def rssError(yArr,yHatArr): #yArr and yHatArr both need to be arrays
return ((yArr-yHatArr)**2).sum()
#岭回归,特征大于样本数
def ridgeregres(xmat,ymat,lam=0.2):
xtx=xmat.T*xmat
demo=xtx+eye(shape(xmat)[1])*lam
if linalg.det(demo)==0.0:
print('矩阵求逆出现错误')
return
ws=demo.I*(xmat.T*ymat)
return ws
#进行数据标准化后岭回归
def rigtest(xarr,yarr):
xmat=mat(xarr);ymat=mat(yarr).T
ymean=mean(ymat,0)
ymat=ymat-ymean
xmean=mean(xmat,0)
xvar=var(xmat,0)
xmat=(xmat-xmean)/xvar
numlt=30
wmat=zeros((numlt,shape(xmat)[1]))
for i in range(numlt):
ws=ridgeregres(xmat,ymat,exp(i-10))
wmat[i,:]=ws.T
return wmat
#数据标准化
def rigsize(xmat):
xmean=mean(xmat,0)
xvar=var(xmat,0)
xmat=(xmat-xmean)/xvar
return xmat
#计算误差
def rssError(yArr,yHatArr): #yArr and yHatArr both need to be arrays
return ((yArr-yHatArr)**2).sum()
#逐步向前回归
def stagwise(xarr,yarr,eps=0.01,numlt=100):
xmat=mat(xarr);ymat=mat(yarr).T
ymean=mean(ymat,0)
ymat=ymat-ymean
xmat=rigsize(xmat)
m,n=shape(xmat)
returmat=zeros((numlt,n))
ws=zeros((n,1));wstest=ws.copy();wsmax=ws.copy()
for i in range(numlt):
errormax=inf
for j in range(n):
for sign in [-1,1]:
wstest=ws.copy()
wstest[j]+=eps*sign
yhat=xmat*wstest
reterror=rssError(ymat.A,yhat.A)
if reterror<errormax:
errormax=reterror
wsmax=wstest.copy()
ws=wsmax.copy()
returmat[i,:]=ws.T
return returmat