from numpy import *
import operator
def create_datas():
datasets = array([[1, 0], [0, 1], [0, 0], [1, 1], [2, 2]])
labels = ['A', 'A', 'A', 'B', 'B']
return datasets, labels
def KNN_algorithm(x, datas, labels, k):
print('x=', x)
tmp = tile(x, (datas.shape[0], 1))
print('res=\n', tmp)
diffMax = datas - tmp
print('diffMax=\n', diffMax)
powMax = diffMax ** 2
print('powMax=\n', powMax)
sumPowMax = powMax.sum(axis=1)
print('sumPowMax=\n', sumPowMax)
sqrtMax = sumPowMax ** 0.5
print('sqrtMax=\n', sqrtMax)
sortMat = sqrtMax.argsort()
print('sortMat=\n', sortMat)
countMat = {}
for key in range(k):
print('i = ', sortMat[key])
print('label = ', labels[sortMat[key]])
if labels[sortMat[key]] in countMat.keys():
countMat[labels[sortMat[key]]] += 1
else:
countMat[labels[sortMat[key]]] = 1
sortedResult = sorted(countMat.items(), key=operator.itemgetter(1), reverse=True)
print('sortedResult=\n', sortedResult)
return sortedResult
def domain():
datas, labels = create_datas()
print('datas=\n', datas)
print('labels=', labels)
res = KNN_algorithm([1, 2], datas, labels, 3)
print('res = ', res[0])
if __name__ == '__main__':
domain()
欧式距离:
其算法的描述为:
1)计算测试数据与各个训练数据之间的距离;
2)按照距离的递增关系进行排序;
3)选取距离最小的K个点;
4)确定前K个点所在类别的出现频率;
5)返回前K个点中出现频率最高的类别作为测试数据的预测分类。