采用python3.7,需要安装Numpy库
算法的逻辑如下:
1.计算已知分类数据集中的点与输入点之间的距离
2.根据距离递增排序
3 选择与输入点距离最小的k个点
4 确认前k个点所在分类的出现频率
5 返回前k个点出现频率最高的分类作为输入点的预测分类
数据集:
[[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]]
['A','A','B','B']
输入:[1.2,0]
输出:A
输入:[1.5,1]
输出:A
输入:[0,0.5]
输出:B
下面是源码:
from numpy import *
import operator
'''
KNN分类算法
出现问题
AttributeError: 'dict' object has no attribute 'iteritems'
Python3.5中:iteritems变为items
'''
def createDataSet():
#创建数据集
group = array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels = ['A','A','B','B']
return group,labels
#print(group)
#print(labels)
def classify0(intX,dataSet,labels,k):
#距离计算
dataSetSize = dataSet.shape[0]
diffMat = tile(intX,(dataSetSize,1))-dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
#选择距离最小的K个点
classCount = {}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0)+1
#排序
sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
return sortedClassCount[0][0]
group,labels = createDataSet()
print("输入:[1.2,0]")
print("输出:"+classify0([1.2,0],group,labels,3))
print("输入:[1.5,1]")
print("输出:"+classify0([1.5,1],group,labels,3))
print("输入:[0,0.5]")
print("输出:"+classify0([0,0.5],group,labels,3))