<machine_learning>_Regression

from numpy import *
#filename=data+label
#thinking about the method on your own!
#recite: pay attention to the position of h
def load_data(filename):
    f=open(filename,'r')
    data=[];label=[]
    for i in f.readlines():
        line=i.strip().split('\t')
        h=[]
        for j in range(len(line)-1):
            h.append(float(line[j]))
        label.append(float(line[-1]))
        data.append(h)
    return data,label
    
#data+label=weights
def mls(data,label):
    weights=((mat(data).T*mat(data)).I)*((mat(data).T)*(mat(label).T))
    return weights

#recite the functions: 1.flatten().A[0] 2.scatter-point;plot-line;
def draw(data,label):
    import matplotlib.pyplot as plt
    X=mat(data)[:,1].flatten().A[0]
    Y=mat(label).flatten().A[0]
    plt.scatter(X,Y)
    weights=mls(data,label)
    plt.plot(mat(data)[:,1],mat(data)*weights)
    plt.show()

def evaluate(data,label):
    weights=mls(data,label)
    return corrcoef((mat(data)*weights).T,mat(label))
    
#locally weighted linear regression
#recite:two formulas-- (XWX)-1 XWy; w=exp(|x(i)-x|/-2k^2);mat(eye())
#sqrt
#recite:test k=1,0.01,0.003
def lwlr_point(x,data,label,k):
    m=shape(data)[0]
    W=mat(eye(m))
    for i in range(m):
        diff=x-mat(data)[i,:]
        W[i,i]=exp(sqrt(diff*diff.T)/(-2.0*k**2))
    if linalg.det(mat(data).T*W*mat(data))==0.0:
        print "this matrix is singular"
        return
    WS=(mat(data).T*W*mat(data)).I*(mat(data).T*W*mat(label).T)
    return x*WS
    
def lwlr(X,data,label,k):
    m=shape(X)[0]
    y=zeros(m)
    for i in range(m):
        y[i]=lwlr_point(mat(X)[i,:],data,label,k)
    return y
def rsserror(a,b):
    return ((a-b)**2).sum()
#ind=mat(data)[0:99,1].argsort(0)
#xsort=mat(data)[0:99,1][ind][:,0,:]
#plt.plot(xsort[:,1],y1[ind])

#ridge regression: feature>sample
#recite: w=(XX+lI)-1 Xy (making it inversable,focus on the diagonal line)

def ridgeRgres(data,label,lam=0.2):
    m=shape(data)[1]
    denom=mat(data).T*mat(data)+mat(eye(m))*lam
    if linalg.det(denom)==0.0:
    #linear algebra
        print "this matrix is singular"
        return
    ws=denom.I*(mat(data).T*mat(label).T)
    return ws

def ridgelam(data,label,num):
    m=shape(data)[1]
    WS=zeros((num,m))
    for i in range(num):
        w=ridgeRgres(data,label,exp(i-10))
        WS[i]=w.T
    return WS
#why exp: using exp makes the scope (0.00004,480000000)
#plot(WS):the best lambda is between the scope.
#actually, I still don't know how to choose the best w for ridge.


#lasso regression:adding constraints,the result is the same as ridge


#stagewise:focusing on the position of 'ws=wsmax.copy()'
def stagewise(data,label,step,iternum):
    data1=(mat(data)-mean(mat(data)))/var(mat(data),0)
    label1=(mat(label)-mean(mat(label)))/var(mat(label))
    ws=zeros(shape(data)[1])
    returnMat=zeros((iternum,shape(data)[1]))
    wsmax=ws.copy()
    for i in range(iternum):
        lowesterror=inf
        for j in range(mat(data).shape[1]):
            for sign in [-1,1]:
                wstest=ws.copy()
                wstest[j]+=step*sign
                ytest=mat(data1)*mat(wstest).T
                rss=rsserror(label1.flatten().A[0],ytest.flatten().A[0])
                if rss<lowesterror:
                    lowesterror=rss
                    wsmax=wstest
        ws=wsmax.copy()
        returnMat[i,:]=ws.T
    return returnMat
#when facing errors, focusing on data!!!just data!!
#the book use 'regularize' to deal with data, I
#just use mean and var,so the result is a little #different                
#recite: var(,0);ws.copy();draw 'plot(ws)' to help people find
#the important features
#operation: k-10 cross validation;100 iteration is enough(accroding
# to the plot);find the lowesterror model

#NOTES:
#error=bias+measurement error+noise
#reduce some coefficients to zero to simplify model for understand.
#compare bias and covriance
    
    
 
    
    
    


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值