机器学习实战 回归

from numpy import *
import matplotlib.pyplot as plt


def loadDataSet(filename):
    numFeat=len(open(filename).readline().split('\t'))-1
    dataMat=[]
    labelMat=[]
    fr=open(filename)
    for line in fr.readlines():
        lineArr=[]
        curLine=line.strip().split('\t')
        for i in range(numFeat):
            lineArr.append(float(curLine[i]))
        dataMat.append(lineArr)
        labelMat.append(float(curLine[-1]))
    return dataMat,labelMat

def standRegres(xArr,yArr):
    xMat=mat(xArr)
    yMat=mat(yArr).T
    xTx=xMat.T*xMat
    if linalg.det(xTx)==0.0:
        print('this matrix is singular,cannot do inverse')
        return
    ws=xTx.I*(xMat.T*yMat)
    return ws
'''
#测试线性回归
xArr,yArr=loadDataSet('C:/Users/xuwei/Desktop/机器学习/机器学习实战(pdf版+源码)/machinelearninginaction/Ch08/ex0.txt')
print(xArr[0:2])
print(yArr)
ws=standRegres(xArr, yArr)
print(ws)

xMat=mat(xArr)
yMat=mat(yArr)
yHat=xMat*ws

fig=plt.figure()
ax=fig.add_subplot(111)
ax.scatter(xMat[:,1].flatten().A[0],yMat.T[:,0].flatten().A[0])
xCopy=xMat.copy()
xCopy.sort(0)
yHat=xCopy*ws
ax.plot(xCopy[:,1],yHat)
plt.show()

'''
#局部加权线性回归函数(lwlr)
def lwlr(testPoint,xArr,yArr,k=1.0):  #计算的只是这行的数据对应y的线性回归
    xMat=mat(xArr)
    yMat=mat(yArr).T
    m=shape(xMat)[0]
    weights=mat(eye((m)))  #生成对角矩阵
    for j in range(m):               
        diffMat=testPoint-xMat[j,:]      #权重大小以指数级衰减
        weights[j,j]=exp(diffMat*diffMat.T/(-2.0*k**2))
    xTx=xMat.T*(weights*xMat)
    if linalg.det(xTx)==0.0:
        print('this matrix is singular,cannot do inverse')
        return
    ws=xTx.I*(xMat.T*(weights*yMat))
    return testPoint*ws
    
def lwlrTest(testArr,xArr,yArr,k=1.0):
    m=shape(testArr)[0]
    yHat=zeros(m)
    for i in range(m):
        yHat[i]=lwlr(testArr[i],xArr,yArr,k)
    return yHat
'''
#测试lwlr
xArr,yArr=loadDataSet('C:/Users/xuwei/Desktop/机器学习/机器学习实战(pdf版+源码)/machinelearninginaction/Ch08/ex0.txt')
yHat=lwlrTest(xArr,xArr,yArr,0.01)

xMat=mat(xArr)
strInd=xMat[:,1].argsort(0)   # 0为按列排序
xSort=xMat[strInd][:,0,:]
''' 
'''
xSort格式
[[ 1.        0.014855]
 [ 1.        0.015371]
 [ 1.        0.033859]
 [ 1.        0.038326]
 [ 1.        0.040486]
 [ 1.        0.045353]]
'''
'''
fig=plt.figure()
ax=fig.add_subplot(111)
ax.plot(xSort[:,1],yHat[strInd])
ax.scatter(xMat[:,1].flatten().A[0],mat(yArr).T.flatten().A[0],s=2,c='red')
plt.show()

'''

#测试鲍鱼寿命
def rssError(yArr,yHatArr):
    return ((yArr-yHatArr)**2).sum()
'''
测试鲍鱼寿命
abX,abY=loadDataSet('C:/Users/xuwei/Desktop/机器学习/机器学习实战(pdf版+源码)/machinelearninginaction/Ch08/abalone.txt')
yHat01=lwlrTest(abX[0:99],abX[0:99],abY[0:99],0.1)
print(rssError(abY[0:99], yHat01))
yHat1=lwlrTest(abX[0:99],abX[0:99],abY[0:99],1)
print(rssError(abY[0:99], yHat1))
yHat10=lwlrTest(abX[0:99],abX[0:99],abY[0:99],10)
print(rssError(abY[0:99], yHat10))

yHat01=lwlrTest(abX[100:199],abX[0:99],abY[0:99],0.1)
print(rssError(abY[100:199], yHat01))
yHat1=lwlrTest(abX[100:199],abX[0:99],abY[0:99],1)
print(rssError(abY[100:199], yHat1))
yHat10=lwlrTest(abX[100:199],abX[0:99],abY[0:99],10)
print(rssError(abY[100:199], yHat10))
'''

#岭回归
def ridgeRegres(xMat,yMat,lam=0.2):
    xTx=xMat.T*xMat
    denom=xTx+eye(shape(xMat)[1])*lam 
    if linalg.det(denom)==0.0:
        print("this matrix is singular,cannot do inverse")
        return
    ws=denom.I*(xMat.T*yMat)
    return ws
    
def ridgeTest(xArr,yArr):
    xMat=mat(xArr)
    yMat=mat(yArr).T     
    yMean=mean(yMat,0)
    yMat=yMat-yMean
    xMeans=mean(xMat,0)
    xVar=var(xMat,0)
    xMat=(xMat-xMeans)/xVar     #数据标准化
    numTestPts=30
    wMat=zeros((numTestPts,shape(xMat)[1]))
    for i in range(numTestPts):
        ws=ridgeRegres(xMat,yMat,exp(i-10))
        wMat[i,:]=ws.T
    return wMat
    
'''
测试岭回归
abX,abY=loadDataSet('C:/Users/xuwei/Desktop/机器学习/机器学习实战(pdf版+源码)/machinelearninginaction/Ch08/abalone.txt')
ridgeWeights=ridgeTest(abX,abY)
fig=plt.figure()
ax=fig.add_subplot(111)
ax.plot(ridgeWeights)
plt.show()

'''

def regularize(xMat):#regularize by columns
    inMat = xMat.copy()
    inMeans = mean(inMat,0)   #calc mean then subtract it off
    inVar = var(inMat,0)      #calc variance of Xi then divide by it
    inMat = (inMat - inMeans)/inVar
    return inMat

#前向逐步线性回归
def stageWise(xArr,yArr,eps=0.01,numIt=100):
    xMat=mat(xArr)
    yMat=mat(yArr).T 
    yMean=mean(yMat,0)
    yMat=yMat-yMean    #均值为0方差为1标准化处理
    xMat=regularize(xMat)
    m,n=shape(xMat)
    returnMat=zeros((numIt,n))
    ws=zeros((n,1))
    wsTest=ws.copy()
    wsMat=ws.copy()
    for i in range(numIt):
        print(ws.T)
        lowestError=inf;
        for j in range(n):
            for sign in [-1,1]:
                wsTest=ws.copy()
                wsTest[j]+=eps*sign
                yTest=xMat*wsTest
                rssE=rssError(yMat.A,yTest.A)
                if rssE<lowestError:
                    lowestError=rssE
                    wsMax=wsTest
        ws=wsMax.copy()
        returnMat[i,:]=ws.T
    return returnMat

xArr,yArr=loadDataSet('C:/Users/xuwei/Desktop/机器学习/机器学习实战(pdf版+源码)/machinelearninginaction/Ch08/abalone.txt')
print(stageWise(xArr,yArr,0.001,500))

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值