08 线性回归

最新推荐文章于 2021-02-21 11:50:10 发布

肥猫64

最新推荐文章于 2021-02-21 11:50:10 发布

阅读量102

点赞数

分类专栏：机器学习类笔记文章标签：机器学习数据挖掘

版权声明：本文为博主原创文章，遵循 CC 4.0 BY-SA 版权协议，转载请附上原文出处链接和本声明。

本文链接：https://blog.csdn.net/weixin_46403555/article/details/104628102

版权

机器学习类笔记专栏收录该内容

4 篇文章 0 订阅

订阅专栏

对于单变量线性回归而言，在误差函数服从正态分布的情况下，从几何意义出发的最小二乘法与从概率意义出发的最大似然估计是等价的

几何意义推导:

$1 一元线性回归:

y = ax + b

图1 y 到直线截距最小

手工计算例子:

$2 多元线性回归:

机器学习实战的推导：

$3 代码实例(机器学习实战):

from numpy import *

def loadDataSet(fileName): #装载文件转化 tab分割的浮点数

numFeat = len(open(fileName).readline().split('\t')) - 1 #get number of fields

dataMat = []; labelMat = []

fr = open(fileName)

for line in fr.readlines():

lineArr =[]

curLine = line.strip().split('\t')

for i in range(numFeat):

lineArr.append(float(curLine[i]))

dataMat.append(lineArr)

labelMat.append(float(curLine[-1]))

return dataMat,labelMat

#标准线性回归方法

def standRegres(xArr,yArr):

xMat = mat(xArr); yMat = mat(yArr).T

xTx = xMat.T*xMat

if linalg.det(xTx) == 0.0:

print("This matrix is singular, cannot do inverse")

return

ws = xTx.I * (xMat.T*yMat)

return ws

#加权线性回归方法

def lwlr(testPoint,xArr,yArr,k=1.0):

xMat = mat(xArr); yMat = mat(yArr).T

m = shape(xMat)[0]

weights = mat(eye((m)))

for j in range(m): #next 2 lines create weights matrix

diffMat = testPoint - xMat[j,:] #

weights[j,j] = exp(diffMat*diffMat.T/(-2.0*k**2))

xTx = xMat.T * (weights * xMat)

if linalg.det(xTx) == 0.0:

print("This matrix is singular, cannot do inverse")

return

ws = xTx.I * (xMat.T * (weights * yMat))

return testPoint * ws

#加权回归测试

def lwlrTest(testArr,xArr,yArr,k=1.0): #loops over all the data points and applies lwlr to each one

m = shape(testArr)[0]

yHat = zeros(m)

for i in range(m):

yHat[i] = lwlr(testArr[i],xArr,yArr,k)

return yHat

def lwlrTestPlot(xArr,yArr,k=1.0): #same thing as lwlrTest except it sorts X first

yHat = zeros(shape(yArr)) #easier for plotting

xCopy = mat(xArr)

xCopy.sort(0)

for i in range(shape(xArr)[0]):

yHat[i] = lwlr(xCopy[i],xArr,yArr,k)

return yHat,xCopy

def rssError(yArr,yHatArr): #yArr and yHatArr both need to be arrays

return ((yArr-yHatArr)**2).sum()

$4 岭回归：

解决过拟合问题，常见的做法是正则化，即添加额外的惩罚项。

￥4.1 岭回归代码

#岭回归方法

def ridgeRegres(xMat,yMat,lam=0.2):

xTx = xMat.T*xMat

denom = xTx + eye(shape(xMat)[1])*lam

if linalg.det(denom) == 0.0:

print("This matrix is singular, cannot do inverse")

return

ws = denom.I * (xMat.T*yMat)

return ws

#岭回归测试

def ridgeTest(xArr,yArr):

xMat = mat(xArr); yMat=mat(yArr).T

yMean = mean(yMat,0)

yMat = yMat - yMean #to eliminate X0 take mean off of Y

#regularize X's

xMeans = mean(xMat,0) #calc mean then subtract it off

xVar = var(xMat,0) #calc variance of Xi then divide by it

xMat = (xMat - xMeans)/xVar

numTestPts = 30

wMat = zeros((numTestPts,shape(xMat)[1]))

for i in range(numTestPts):

ws = ridgeRegres(xMat,yMat,exp(i-10))

wMat[i,:]=ws.T

return wMat

def regularize(xMat):#regularize by columns

inMat = xMat.copy()

inMeans = mean(inMat,0) #calc mean then subtract it off

inVar = var(inMat,0) #calc variance of Xi then divide by it

inMat = (inMat - inMeans)/inVar

return inMat

def stageWise(xArr,yArr,eps=0.01,numIt=100):

xMat = mat(xArr); yMat=mat(yArr).T

yMean = mean(yMat,0)

yMat = yMat - yMean #can also regularize ys but will get smaller coef

xMat = regularize(xMat)

m,n=shape(xMat)

returnMat = zeros((numIt,n)) #testing code remove

ws = zeros((n,1)); wsTest = ws.copy(); wsMax = ws.copy()

for i in range(numIt):

print(ws.T)

lowestError = inf;

for j in range(n):

for sign in [-1,1]:

wsTest = ws.copy()

wsTest[j] += eps*sign

yTest = xMat*wsTest

rssE = rssError(yMat.A,yTest.A)

if rssE < lowestError:

lowestError = rssE

wsMax = wsTest

ws = wsMax.copy()

returnMat[i,:]=ws.T

return returnMat

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
08 线性回归

对于单变量线性回归而言，在误差函数服从正态分布的情况下，从几何意义出发的最小二乘法与从概率意义出发的最大似然估计是等价的几何意义推导: ...
复制链接

扫一扫

专栏目录

评论

被折叠的条评论为什么被折叠?

到【灌水乐园】发言

查看更多评论

添加红包

成就一亿技术人!

hope_wisdom

发出的红包

实付元

使用余额支付

点击重新获取

扫码支付

钱包余额 0

抵扣说明：

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。