# 基本线性回归、局部加权线性回归和缩减方法（岭回归、前向逐步回归） in Python

## 1、基本线性回归 LR

from numpy import *

dataMat = []
labelMat = []
fr = open(fileName)
lineArr = []
curLine = line.strip().split('\t')
for ii in range (numFeat):
lineArr.append(float(curLine[ii]))
dataMat.append(lineArr)
labelMat.append(float(curLine[-1]))
return dataMat, labelMat

### Standard Regression ###
def stdRegres(xArr, yArr):
xMat = mat(xArr)
yMat = mat(yArr).T
xTx = xMat.T * xMat
if linalg.det(xTx) == 0.0:
print "This matrix is singular, cannot do inverse"
return
ws = xTx.I * (xMat.T * yMat)
return ws

def testing(xArr, yArr, ws):
xMat = mat(xArr)
yMat = mat(yArr)
yHat = xMat * ws
corr = corrcoef(yHat.T, yMat)
import matplotlib.pyplot as plt
fig = plt.figure()
ax.scatter(xMat[:,1].flatten().A[0], yMat.T[:,0].flatten().A[0])
xCopy = xMat.copy()
xCopy.sort(0)
yHat = xCopy * ws
ax.plot(xCopy[:,1], yHat)
plt.show()
return corr

## 2、局部加权线性回归 LWLR

LWLR即是利用了此原理，给待预测点附近的每个点赋予一定的权重（LR则是所有点权重相同），然后在这个子集上（实际这里仍为整个训练数据集）执行LR，最终解出的回归系数为。权重矩阵用来对每个数据点赋予权重。LWLR使用与SVM类似的核来对附近的点赋予更高的权重，最常用的核就是高斯核，高斯核对应的权重矩阵为一对角矩阵，其中为待预测点，参数决定了对附近的点赋予多大的权重，越小，相应更关注附近的点而忽视较远的点。当为1时，基本相当于LR，当很小时，易导致过拟合。

### Locally Weighted Linear Regression ###
def lwlr(testPoint, xArr, yArr, k=1.0):
xMat = mat(xArr)
yMat = mat(yArr).T
N = shape(xMat)[0]
weights = mat(eys(N))
for ii in range(N):
diffMat = testPoint - xMat[ii,:]
weights[ii,ii] = exp(diffMat*diffMat.T/(-2.0*k**2))
xTx = xMat.T * (weights*xMat)
if linalg.det(xTx) == 0.0:
print "This matrix is singular, cannot do inverse"
return
ws = xTx.I * (xMat.T * (weights * yMat))
return testPoint * ws

def lwlrTest(testArr, xArr, yArr, k=1.0):
N = shape(testArr)[0]
yHat = zeros(N)
for ii in range(N):
yHat[ii] = lwlr(testArr[ii], xArr, yArr, k)
return yHat

return ((yArr-yHatArr)**2).sum()

## 3、缩减方法

### 3.1岭回归

### Ridge Regression ###
def ridgeRegres(xMat, yMat, lam=0.2):
xTx = xMat.T * xMat
denom = xTx + lam*eye(shape(xMat)[1])
if linalg.det(denom) == 0.0:
print "This matrix is singular, cannot do inverse"
return
ws = denom.I * (xMat.T*yMat)
return ws

def ridgeTest(xArr, yArr):
xMat = mat(xArr)
xMeans = mean(xMat, 0)
xVar = var(xMat, 0)
xMat = (xMat-xMeans) / xVar
yMat = mat(yArr).T
yMean = mean(yMat, 0)
yMat = yMat - yMean
numTestPts = 30
wMat = zeros((numTestPts, shape(xMat)[1]))
for ii in range(numTestPts):
ws = ridgeRegres(xMat, yMat, exp(ii-10))
wMat[ii,:] = ws.T
return wMat


### 3.2 前向逐步回归

### Front stage-wise Regression ###
def stageWise(xArr, yArr, step=0.01, numIt=100) :
xMat = mat(xArr)
xMat = regularize(xMat)
yMat = mat(yArr).T
yMean = mean(yMat)
yMat = yMat - yMean
N, n = shape(xMat)
returnMat = zeros((numIt, n))
ws = zeros((n,1))
wsTest = ws.copy()
weMax = ws.copy()
for ii in range(numIt) :
print ws.T
lowestErr = inf
for jj in range(n) :
wsTest = ws.copy()
wsTest[jj] += step*sign
yTest = xMat*wsTest