自己造数据的能力是有限的,咱们当然可以借助外来的数据,进行练习,本次的数据源https://github.com/Jack-Cherish/Machine-Learning/blob/master/kNN/2.%E6%B5%B7%E4%BC%A6%E7%BA%A6%E4%BC%9A/datingTestSet.txt可以下载下来试用
import matplotlib.pyplot as plt
import matplotlib.colors
import operator
import numpy as np
#读取文件数据
def fileread(filem):
f_open=open(filem)
f_read=f_open.readlines()
f_hang=len(f_read)
dataH=np.zeros((f_hang,3))
lable=[]
index=0
########对数据进行处理,使得数据符合我们的分类器算法的要求
for ha in f_read:
f_qukong=ha.strip()#
f_zhibaio=f_qukong.split('\t')#
dataH[index,:]=f_zhibaio[:3]
index+=1
if f_zhibaio[-1]=="didntLike":
lable.append(1)
if f_zhibaio[-1] == "largeDoses":
lable.append(2)
if f_zhibaio[-1] == "smallDoses":
lable.append(3)
return dataH,lable
#构建分类器
def classfen(test, train, labe, k):
global so
hangshu = train.shape[0]
xiangjian = np.tile(test, (hangshu, 1)) - train
pingfang = xiangjian ** 2
qiuhe = pingfang.sum(axis=1)
kaifang = qiuhe ** 0.5
paixu = kaifang.argsort()
count = {}
for i in range(k):
lable = labe[paixu[i]]
count[lable] = count.get(lable, 0) + 1
so = sorted(count.items(), key=operator.itemgetter(1), reverse=True)
return so[0][0]
if __name__ == '__main__':
file='data.txt'
gro, ll = fileread(file)
te = [60000, 5,800]
plt.scatter(gro[:,0],gro[:,1],c=ll)
plt.scatter(te[0:1],te[1:2],marker='*',c='red',s=400)
plt.show()
tw = classfen(te, gro, ll, 3)
print(te)
print(te[0:1])
最近较忙,如有一问,欢迎博客留言