前言:
本篇文章基于《机器学习实战》第八章回归部分,加上了自己的理解。
回归算法与偏差方差的平衡
一、标准线性回归
标准回归函数和数据导入函数
from numpy import *
def loadDataSet(fileName):
numFeat = len(open(fileName).readline().split('\t')) - 1
dataMat = [];labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr = []
curLine = line.strip().split('\t')
for i in range(numFeat):
lineArr.append(float(curLine[i]))
dataMat.append(lineArr)
labelMat.append(float(curLine[-1]))
return dataMat,labelMat
def standRegres(xArr,yArr):
xMat = mat(xArr);yMat = mat(yArr).T
xTx = xMat.T*xMat
#numpy.linalg.det() 计算方阵行列值
if linalg.det(xTx) == 0.0:
print('This matrix is singular,cannot do inverse')
return
ws = xTx.I * (xMat.T*yMat) #回归系数
return ws
测试
xArr,yArr = loadDataSet("E:\\DataMining\\资料\\machinelearninginaction\\Ch08\\ex0.txt")
ws = standRegres(xArr,yArr)
ws
matrix([[3.00774324],
[1.69532264]])
开始拟合
xMat = mat(xArr)
yMat = mat(yArr)
#拟合值,或叫预测值
yHat = xMat*ws
绘制拟合直线图
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xMat[:,1].flatten().A[0],yMat.T[:,0].flatten().A[0])
xCopy = xMat.copy()
xCopy.sort(0)
yHat = xCopy*ws
ax.plot(xCopy[:,1],yHat)
plt.show()
计算两个序列的相关系数,评价模型的好坏
#numpy 的corrcoef()
yHat = xMat*ws
corrcoef(yHat.T,yMat)
array([[1. , 0.98647356],
[0.98647356, 1. ]])
解释:对角线1.0,表明yHat,yMat与自身的匹配是最完美的,而yHat,yMat之间的相关系数为0.986
二、局部加权线性回归
def lwlr(testPoint,xArr,yArr,k=1.0):
xMat = mat(xArr);yMat = mat(yArr).T
m = shape(xMat)[0]
#eye(m,n,k) 生成对角矩阵;m,n代表行列,只写一个默认m=n;k代表对角向上或向下移动多少(不一定主对角)
weights = mat(eye((m)))
#确定权值大小
for j in range(m):
diffMat = testPoint - xMat[j,:]
weights[j,j] = exp(diffMat*diffMat.T/(-2.0*k**2))
xTx = xMat.T * (weights * xMat)
if linalg.det(xTx) == 0.0:
print('This matrix is singular,cannot do inverse')
return
#matrix.I 逆矩阵inverse
ws = xTx.I * (xMat.T * (weights * yMat))
return testPoin