程序引用《机器学习实战》
用python实现kNN算法,题中假设把目标分为A类和B类,在前3中统计出与A类最近的点
from numpy import *
import operator
def createDataSet():
group=array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels=['A','A','B','B']
print(labels)
return group,labels
group,labels=createDataSet()
print(labels)
print(group)
def classify0(inX,dataSet,labels,k):
dataSetSize=dataSet.shape[0]#计算数据的长度
print(dataSetSize)
diffMat=tile(inX,(dataSetSize,1))-dataSet#计算差值
print(diffMat)
sqDiffMat=diffMat**2#计算每个方向上距离的平方
print(sqDiffMat)
sqDistances=sqDiffMat.sum(axis=1)#计算每个点与输入点距离的和
print(sqDistances)
distances=sqDistances**0.5#计算距离
sortedDistIndicies=distances.argsort()#对距离进行排序
classCount={}
#计算出距离最近的选项那一个
for i in range(k):
voteIlabel=labels[sortedDistIndicies[i]]
classCount[voteIlabel]=classCount.get(voteIlabel,0)+1
sortedClassCount=sorted(classCount.iteritems(),
key=operator.itemgetter(1),reverse=True)
print(sortedClassCount)
return sortedClassCount[0][0]
result=classify0([0,0],group,labels,3)
print(result)
result1=classify0([0.2,0.2],group,labels,3)
print(result1)