from numpy import *
import matplotlib.pyplot as plt
def loadDataSet(filename):
numFeat=len(open(filename).readline().split('\t'))-1
dataMat=[]
labelMat=[]
fr=open(filename)
for line in fr.readlines():
lineArr=[]
curLine=line.strip().split('\t')
for i in range(numFeat):
lineArr.append(float(curLine[i]))
dataMat.append(lineArr)
labelMat.append(float(curLine[-1]))
return dataMat,labelMat
def standRegres(xArr,yArr):
xMat=mat(xArr)
yMat=mat(yArr).T
xTx=xMat.T*xMat
if linalg.det(xTx)==0.0:
print('this matrix is singular,cannot do inverse')
return
ws=xTx.I*(xMat.T*yMat)
return ws
'''
#测试线性回归
xArr,yArr=loadDataSet('C:/Users/xuwei/Desktop/机器学习/机器学习实战(pdf版+源码)/machinelearninginaction/Ch08/ex0.txt')
print(xArr[0:2])
print(yArr)
ws=standRegres(xArr, yArr)
print(ws)
xMat=mat(xArr)
yMat=mat(yArr)
yHat=xMat*ws
fig=plt.figure()
ax=fig.add_subplot(111)
ax.scatter(xMat[:,1].flatten().A[0],yMat.T[:,0].flatten().A[0])
xCopy=xMat.copy()
xCopy.sort(0)
yHat=xCopy*ws
ax.plot(xCopy[:,1],yHat)
plt.show()
'''
#局部加权线性回归函数(lwlr)
def lwlr(testPoint,xArr,yArr,k=1.0): #计算的只是这行的数据对应y的线性回归
xMat=mat(xArr)
yMat=mat(yArr).T
m=shape(xMat)[0]
weights=mat(eye((m))) #生成对角矩阵
for j in range(m):
diffMat=testPoint-xMat[j,:] #权重大小以指数级衰减
weights[j,j]=exp(diffMat*diffMat.T/(-2.0*k**2))
xTx=xMat.T*(weights*xMat)
if linalg.det(xTx)==0.0:
print('this matrix is singular,cannot do inverse')
return
ws=xTx.I*(xMat.T*(weights*yMat))
return testPoint*ws
def lwlrTest(testArr,xArr,yArr,k=1.0):
m=shape(testArr)[0]
yHat=zeros(m)
for i in range(m):
yHat[i]=lwlr(testArr[i],xArr,yArr,k)
return yHat
'''
#测试lwlr
xArr,yArr=loadDataSet('C:/Users/xuwei/Desktop/机器学习/机器学习实战(pdf版+源码)/machinelearninginaction/Ch08/ex0.txt')
yHat=lwlrTest(xArr,xArr,yArr,0.01)
xMat=mat(xArr)
strInd=xMat[:,1].argsort(0) # 0为按列排序
xSort=xMat[strInd][:,0,:]
'''
'''
xSort格式
[[ 1. 0.014855]
[ 1. 0.015371]
[ 1. 0.033859]
[ 1. 0.038326]
[ 1. 0.040486]
[ 1. 0.045353]]
'''
'''
fig=plt.figure()
ax=fig.add_subplot(111)
ax.plot(xSort[:,1],yHat[strInd])
ax.scatter(xMat[:,1].flatten().A[0],mat(yArr).T.flatten().A[0],s=2,c='red')
plt.show()
'''
#测试鲍鱼寿命
def rssError(yArr,yHatArr):
return ((yArr-yHatArr)**2).sum()
'''
测试鲍鱼寿命
abX,abY=loadDataSet('C:/Users/xuwei/Desktop/机器学习/机器学习实战(pdf版+源码)/machinelearninginaction/Ch08/abalone.txt')
yHat01=lwlrTest(abX[0:99],abX[0:99],abY[0:99],0.1)
print(rssError(abY[0:99], yHat01))
yHat1=lwlrTest(abX[0:99],abX[0:99],abY[0:99],1)
print(rssError(abY[0:99], yHat1))
yHat10=lwlrTest(abX[0:99],abX[0:99],abY[0:99],10)
print(rssError(abY[0:99], yHat10))
yHat01=lwlrTest(abX[100:199],abX[0:99],abY[0:99],0.1)
print(rssError(abY[100:199], yHat01))
yHat1=lwlrTest(abX[100:199],abX[0:99],abY[0:99],1)
print(rssError(abY[100:199], yHat1))
yHat10=lwlrTest(abX[100:199],abX[0:99],abY[0:99],10)
print(rssError(abY[100:199], yHat10))
'''
#岭回归
def ridgeRegres(xMat,yMat,lam=0.2):
xTx=xMat.T*xMat
denom=xTx+eye(shape(xMat)[1])*lam
if linalg.det(denom)==0.0:
print("this matrix is singular,cannot do inverse")
return
ws=denom.I*(xMat.T*yMat)
return ws
def ridgeTest(xArr,yArr):
xMat=mat(xArr)
yMat=mat(yArr).T
yMean=mean(yMat,0)
yMat=yMat-yMean
xMeans=mean(xMat,0)
xVar=var(xMat,0)
xMat=(xMat-xMeans)/xVar #数据标准化
numTestPts=30
wMat=zeros((numTestPts,shape(xMat)[1]))
for i in range(numTestPts):
ws=ridgeRegres(xMat,yMat,exp(i-10))
wMat[i,:]=ws.T
return wMat
'''
测试岭回归
abX,abY=loadDataSet('C:/Users/xuwei/Desktop/机器学习/机器学习实战(pdf版+源码)/machinelearninginaction/Ch08/abalone.txt')
ridgeWeights=ridgeTest(abX,abY)
fig=plt.figure()
ax=fig.add_subplot(111)
ax.plot(ridgeWeights)
plt.show()
'''
def regularize(xMat):#regularize by columns
inMat = xMat.copy()
inMeans = mean(inMat,0) #calc mean then subtract it off
inVar = var(inMat,0) #calc variance of Xi then divide by it
inMat = (inMat - inMeans)/inVar
return inMat
#前向逐步线性回归
def stageWise(xArr,yArr,eps=0.01,numIt=100):
xMat=mat(xArr)
yMat=mat(yArr).T
yMean=mean(yMat,0)
yMat=yMat-yMean #均值为0方差为1标准化处理
xMat=regularize(xMat)
m,n=shape(xMat)
returnMat=zeros((numIt,n))
ws=zeros((n,1))
wsTest=ws.copy()
wsMat=ws.copy()
for i in range(numIt):
print(ws.T)
lowestError=inf;
for j in range(n):
for sign in [-1,1]:
wsTest=ws.copy()
wsTest[j]+=eps*sign
yTest=xMat*wsTest
rssE=rssError(yMat.A,yTest.A)
if rssE<lowestError:
lowestError=rssE
wsMax=wsTest
ws=wsMax.copy()
returnMat[i,:]=ws.T
return returnMat
xArr,yArr=loadDataSet('C:/Users/xuwei/Desktop/机器学习/机器学习实战(pdf版+源码)/machinelearninginaction/Ch08/abalone.txt')
print(stageWise(xArr,yArr,0.001,500))
机器学习实战 回归
最新推荐文章于 2022-11-23 22:34:57 发布