把代码保存于此,python3实现,详解就参考《机器学习实战》(Peter Harrington)啦...
regression.py :
from numpy import *
# 8-1 线性回归def loadDataSet(filename):
numFeat=len(open(filename).readline().split('\t'))-1 #number of features of X
dataMat=[];labelMat=[]
fr=open(filename)
for line in fr.readlines():#逐行处理
lineArr=[]
curLine=line.strip().split('\t')
for i in range(numFeat):
lineArr.append(float(curLine[i]))
dataMat.append(lineArr)#the i'th sample,X[i]
labelMat.append(float(curLine[-1]))# Y[i],the last element of curLine
return dataMat,labelMat
#compute the best 拟合曲线
def standRegres(xArr,yArr):
xMat=mat(xArr);yMat=mat(yArr).T#xMat:200*2, yMat:1*200
xTx=xMat.T * xMat#xTx:2*2
if linalg.det(xTx)==0.0:#判断行列式是否为0
print("This matrix is singular, cannot do inverse")
return
ws=xTx.I * (xMat.T * yMat)#计算w,2*1
return ws
'''
d:
cd pythonwp
cd ch08
python
import regression
from importlib import reload
reload(regression)
xArr,yArr=regression.loadDataSet('ex0.txt')
ws=regression.standRegres(xArr,yArr)
xMat=mat(xArr)
yMat=mat(yArr)
yHat=xMat*ws
corrcoef(yHat.T,yMat) #相关系数
# 绘图
import matplotlib.pyplot as plt #--绘图
fig=plt.figure()
ax=fig.add_subplot(111)
ax.scatter(xMat[:,1].flatten().A[0],yMat.T[:,0].flatten().A[0]) #绘--原始数据
xCopy=xMat.copy()
xCopy.sort(0) #将点按照feature2升序排列,all featrue1=1.0
yHat=xCopy*ws #--预测的y
ax.plot(xCopy[:,1],yHat) #绘--预测数据
plt.show()
'''
#8-2 局部加权线性回归
def lwlr(testPoint,xArr,yArr,k=1.0): #testPoint记作x, training set, y of training set,k for Gaussian kernel
xMat=mat(xArr);yMat=mat(yArr).T
m=shape(xMat)[0] #number of samples
weights=mat(eye((m))) #单位矩阵
for j in range(m): #计算weights
diffMat=testPoint-xMat[j,:]#x-x[j]
weights[j,j]=exp( diffMat*diffMat.T/(-2.0*k**2) )#W[i,i]
xTx=xMat.T*weights*xMat
if linalg.det(xTx)==0.0:
print("This matrix is singular, cannot do inverse")
return
ws=xTx.I*xMat.T*weights*yMat
return te