# 机器学习实战--局部加权线性回归（LWR）

## 二 代码

import numpy as np
import matplotlib.pyplot as plt

"""
:param file:文件路径
:return: 返回测试数据与标签
"""

ftrain = open(file)
trainingset = []
labelset = []
LineArr = line.strip().split('\t')
trainingset.append([float(LineArr[0]),float(LineArr[1])])
labelset.append(float(LineArr[2]))
return trainingset, labelset

def LWRTest(traingxarr,xarr,yarr,k):
Xmat = np.mat(xarr)
Ymat = np.mat(yarr)
# m是输入数据的个数,weights是单位矩阵
m = np.shape(Xmat)[0]
weights = np.eye(m)
for i in range(m):
diffmat = traingxarr - Xmat[i,:]
weights[i,i] = np.exp(-(diffmat*diffmat.transpose())/(2.0*k**2))
XT = Xmat.transpose()*(weights*Xmat)
if np.linalg.det(XT) == 0:              #矩阵的行列式为0时不能进行之后的计算
print('This Matrix is singular, cannot do inverse')
return
#theta是回归系数
theta = XT.I*Xmat.transpose()*(weights*Ymat.transpose())
ytest= traingxarr*theta
return ytest

def LWR(trainingset,xarr,yarr,k):
"""
:param trianingset:训练数据集
:param xarr: 输入样本的横坐标
:param yarr: 输入样本的纵坐标
:param k: 带宽参数
:return:返回预测值
"""
# m是输入数据的个数,weights是单位矩阵
m = np.shape(trainingset)[0]
ytest = np.zeros(m)
for i in range(m):
ytest[i] = LWRTest(trainingset[i],xarr,yarr,k)
return  ytest

def Show(xarr,yarr,ytest,k):
"""
:param xarr: 样本数据特征值
:param ytest: LWR回归得到的预测
:param yarr: 样本数据标记值
"""
xmat = np.mat(xarr)
strInd = xmat[:, 1].argsort(0)
xSort = xmat[strInd][:, 0, :]
fig = plt.figure()
ax.plot(xSort[:, 1], ytest[strInd])
ax.scatter(xmat[:, 1].flatten().A[0], np.mat(yarr).T.flatten().A[0], s=2, c='red')
title = "k = "
title += str(k)
ax.set_title(title)
plt.savefig("D:\\Program Files (x86)\\机器学习\\斯坦福大学机器学习\\中文笔记\\Linear Regression\\LWR\\"+title+".jpg",dpi=400,bbox_inches='tight')
plt.show()

def run_main():
"""
这是主函数
"""
file = 'D:\\Program Files (x86)\\机器学习\\机器学习实战\\源代码\\Ch08\\ex0.txt'
k = [1.0,0.01,0.003]
for i in k:
ytest = LWR(xarr,xarr,yarr,i)
Show(xarr,yarr,ytest,i)

if __name__ == '__main__':
run_main()

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2018/8/1614:07
# @Author  : DaiPuWei
# E-Mail   : 771830171@qq.com
# @Site    :
# @File    : LWR.py
# @Software: PyCharm

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

class LWR:
def __init__(self,train_data,train_predict):
"""
这是局部加权回归的构造函数
:param train_data: 训练数据集
:param train_predict: 训练数据集对应的预测值
"""
self.Train_Data = train_data
self.Train_Predict = train_predict

def Gaussian_Weight(self,data,k):
"""
这是计算测试数据data高斯核函数
:param data: 预测数据
:param k: 带宽系数
"""
# data的数据类型是np.array，那么利用dot方法进行矩阵运算的结果是矩阵，哪怕只有一个元素
sum = np.sum(data*data)
return np.exp(sum/(-2*k*k))

def predict(self,test_data,k):
"""
这是预测测试数据结果的函数
:param test_data:测试数据
:param k: 带宽系数
"""
size = np.shape(self.Train_Data)[0]
weights = np.zeros((size,size))
for i in range(size):
diff = test_data-self.Train_Data[i]
weights[i,i] = self.Gaussian_Weight(diff,k)
XT = self.Train_Data.T.dot(weights)
inv = np.linalg.inv(XT.dot(self.Train_Data))
theta = inv.T.dot(XT.dot(self.Train_Predict))
test_predict = np.sum(theta.T*test_data)
return test_predict

def Test(self,Test_Data,k):
Test_Predict = []
for test_data in Test_Data:
test_predict = self.predict(test_data,k)
Test_Predict.append(test_predict)
Test_Predict = np.array(Test_Predict)
return Test_Predict

'''
导入西瓜数据
'''
ftrain = open(file)
trainingset = []
labelset = []
LineArr = line.strip().split('\t')
trainingset.append([float(LineArr[0]),float(LineArr[1])])
labelset.append(float(LineArr[2]))
trainingset = np.array(trainingset)
labelset = np.array(labelset).reshape((len(labelset),1))
return trainingset, labelset

def run_main():
"""
这是主函数
"""
path = "./ex0.txt"

# 解决画图是的中文乱码问题
mpl.rcParams['font.sans-serif'] = [u'simHei']
mpl.rcParams['axes.unicode_minus'] = False

K = [1.0,0.1,0.01,0.003]
for k in K:
lwr = LWR(trainingset,labelset)
testpredict = lwr.Test(trainingset,k)
plt.scatter(trainingset[:,1],labelset,s =2,c='red')
index = trainingset[:, 1].argsort(0)
xdata = trainingset[index][:,1]
plt.plot(xdata,testpredict[index],'b')
plt.title("k=%f"%(k))
plt.savefig("k=%f.jpg"%(k))
plt.show()
plt.close()

if __name__ == '__main__':
run_main()

k = 1.0 (欠拟合)

k = 0.01(最佳拟合)

k = 0.003(过拟合)

• 评论

• 下一篇
• 上一篇