from numpy import *
#filename=data+label
#thinking about the method on your own!
#recite: pay attention to the position of h
def load_data(filename):
f=open(filename,'r')
data=[];label=[]
for i in f.readlines():
line=i.strip().split('\t')
h=[]
for j in range(len(line)-1):
h.append(float(line[j]))
label.append(float(line[-1]))
data.append(h)
return data,label
#data+label=weights
def mls(data,label):
weights=((mat(data).T*mat(data)).I)*((mat(data).T)*(mat(label).T))
return weights
#recite the functions: 1.flatten().A[0] 2.scatter-point;plot-line;
def draw(data,label):
import matplotlib.pyplot as plt
X=mat(data)[:,1].flatten().A[0]
Y=mat(label).flatten().A[0]
plt.scatter(X,Y)
weights=mls(data,label)
plt.plot(mat(data)[:,1],mat(data)*weights)
plt.show()
def evaluate(data,label):
weights=mls(data,label)
return corrcoef((mat(data)*weights).T,mat(label))
#locally weighted linear regression
#recite:two formulas-- (XWX)-1 XWy; w=exp(|x(i)-x|/-2k^2);mat(eye())
#sqrt
#recite:test k=1,0.01,0.003
def lwlr_point(x,data,label,k):
m=shape(data)[0]
W=mat(eye(m))
for i in range(m):
diff=x-mat(data)[i,:]
W[i,i]=exp(sqrt(diff*diff.T)/(-2.0*k**2))
if linalg.det(mat(data).T*W*mat(data))==0.0:
print "this matrix is singular"
return
WS=(mat(data).T*W*mat(data)).I*(mat(data).T*W*mat(label).T)
return x*WS
def lwlr(X,data,label,k):
m=shape(X)[0]
y=zeros(m)
for i in range(m):
y[i]=lwlr_point(mat(X)[i,:],data,label,k)
return y
def rsserror(a,b):
return ((a-b)**2).sum()
#ind=mat(data)[0:99,1].argsort(0)
#xsort=mat(data)[0:99,1][ind][:,0,:]
#plt.plot(xsort[:,1],y1[ind])
#ridge regression: feature>sample
#recite: w=(XX+lI)-1 Xy (making it inversable,focus on the diagonal line)
def ridgeRgres(data,label,lam=0.2):
m=shape(data)[1]
denom=mat(data).T*mat(data)+mat(eye(m))*lam
if linalg.det(denom)==0.0:
#linear algebra
print "this matrix is singular"
return
ws=denom.I*(mat(data).T*mat(label).T)
return ws
def ridgelam(data,label,num):
m=shape(data)[1]
WS=zeros((num,m))
for i in range(num):
w=ridgeRgres(data,label,exp(i-10))
WS[i]=w.T
return WS
#why exp: using exp makes the scope (0.00004,480000000)
#plot(WS):the best lambda is between the scope.
#actually, I still don't know how to choose the best w for ridge.
#lasso regression:adding constraints,the result is the same as ridge
#stagewise:focusing on the position of 'ws=wsmax.copy()'
def stagewise(data,label,step,iternum):
data1=(mat(data)-mean(mat(data)))/var(mat(data),0)
label1=(mat(label)-mean(mat(label)))/var(mat(label))
ws=zeros(shape(data)[1])
returnMat=zeros((iternum,shape(data)[1]))
wsmax=ws.copy()
for i in range(iternum):
lowesterror=inf
for j in range(mat(data).shape[1]):
for sign in [-1,1]:
wstest=ws.copy()
wstest[j]+=step*sign
ytest=mat(data1)*mat(wstest).T
rss=rsserror(label1.flatten().A[0],ytest.flatten().A[0])
if rss<lowesterror:
lowesterror=rss
wsmax=wstest
ws=wsmax.copy()
returnMat[i,:]=ws.T
return returnMat
#when facing errors, focusing on data!!!just data!!
#the book use 'regularize' to deal with data, I
#just use mean and var,so the result is a little #different
#recite: var(,0);ws.copy();draw 'plot(ws)' to help people find
#the important features
#operation: k-10 cross validation;100 iteration is enough(accroding
# to the plot);find the lowesterror model
#NOTES:
#error=bias+measurement error+noise
#reduce some coefficients to zero to simplify model for understand.
#compare bias and covriance
#filename=data+label
#thinking about the method on your own!
#recite: pay attention to the position of h
def load_data(filename):
f=open(filename,'r')
data=[];label=[]
for i in f.readlines():
line=i.strip().split('\t')
h=[]
for j in range(len(line)-1):
h.append(float(line[j]))
label.append(float(line[-1]))
data.append(h)
return data,label
#data+label=weights
def mls(data,label):
weights=((mat(data).T*mat(data)).I)*((mat(data).T)*(mat(label).T))
return weights
#recite the functions: 1.flatten().A[0] 2.scatter-point;plot-line;
def draw(data,label):
import matplotlib.pyplot as plt
X=mat(data)[:,1].flatten().A[0]
Y=mat(label).flatten().A[0]
plt.scatter(X,Y)
weights=mls(data,label)
plt.plot(mat(data)[:,1],mat(data)*weights)
plt.show()
def evaluate(data,label):
weights=mls(data,label)
return corrcoef((mat(data)*weights).T,mat(label))
#locally weighted linear regression
#recite:two formulas-- (XWX)-1 XWy; w=exp(|x(i)-x|/-2k^2);mat(eye())
#sqrt
#recite:test k=1,0.01,0.003
def lwlr_point(x,data,label,k):
m=shape(data)[0]
W=mat(eye(m))
for i in range(m):
diff=x-mat(data)[i,:]
W[i,i]=exp(sqrt(diff*diff.T)/(-2.0*k**2))
if linalg.det(mat(data).T*W*mat(data))==0.0:
print "this matrix is singular"
return
WS=(mat(data).T*W*mat(data)).I*(mat(data).T*W*mat(label).T)
return x*WS
def lwlr(X,data,label,k):
m=shape(X)[0]
y=zeros(m)
for i in range(m):
y[i]=lwlr_point(mat(X)[i,:],data,label,k)
return y
def rsserror(a,b):
return ((a-b)**2).sum()
#ind=mat(data)[0:99,1].argsort(0)
#xsort=mat(data)[0:99,1][ind][:,0,:]
#plt.plot(xsort[:,1],y1[ind])
#ridge regression: feature>sample
#recite: w=(XX+lI)-1 Xy (making it inversable,focus on the diagonal line)
def ridgeRgres(data,label,lam=0.2):
m=shape(data)[1]
denom=mat(data).T*mat(data)+mat(eye(m))*lam
if linalg.det(denom)==0.0:
#linear algebra
print "this matrix is singular"
return
ws=denom.I*(mat(data).T*mat(label).T)
return ws
def ridgelam(data,label,num):
m=shape(data)[1]
WS=zeros((num,m))
for i in range(num):
w=ridgeRgres(data,label,exp(i-10))
WS[i]=w.T
return WS
#why exp: using exp makes the scope (0.00004,480000000)
#plot(WS):the best lambda is between the scope.
#actually, I still don't know how to choose the best w for ridge.
#lasso regression:adding constraints,the result is the same as ridge
#stagewise:focusing on the position of 'ws=wsmax.copy()'
def stagewise(data,label,step,iternum):
data1=(mat(data)-mean(mat(data)))/var(mat(data),0)
label1=(mat(label)-mean(mat(label)))/var(mat(label))
ws=zeros(shape(data)[1])
returnMat=zeros((iternum,shape(data)[1]))
wsmax=ws.copy()
for i in range(iternum):
lowesterror=inf
for j in range(mat(data).shape[1]):
for sign in [-1,1]:
wstest=ws.copy()
wstest[j]+=step*sign
ytest=mat(data1)*mat(wstest).T
rss=rsserror(label1.flatten().A[0],ytest.flatten().A[0])
if rss<lowesterror:
lowesterror=rss
wsmax=wstest
ws=wsmax.copy()
returnMat[i,:]=ws.T
return returnMat
#when facing errors, focusing on data!!!just data!!
#the book use 'regularize' to deal with data, I
#just use mean and var,so the result is a little #different
#recite: var(,0);ws.copy();draw 'plot(ws)' to help people find
#the important features
#operation: k-10 cross validation;100 iteration is enough(accroding
# to the plot);find the lowesterror model
#NOTES:
#error=bias+measurement error+noise
#reduce some coefficients to zero to simplify model for understand.
#compare bias and covriance