1.原理
利用欧式距离计算各个特征的相似度,欧式距离越小,相似度越大。
2.代码:
from numpy import *
import operator
from os import listdir
def kNNClassify(inX, dataSet, labels, k):
#sample size
dataSize = dataSet.shape[0]
#get the difference between inX and sample
diffMat = tile(inX, (dataSize,1)) - dataSet
diffMat = diffMat**2
#get sum of each row so set axis = 1
sumMat = diffMat.sum(axis = 1)
sqdiffMat = sumMat**0.5
#sort sqdiffMat and get the index result
diffSortIndices = sqdiffMat.argsort()
#res dict
resLabels = {}
maxTimes = 0
for i range(k):
tempLabel = labels[diffSortIndices[i]]
resLabels[tempLabel] = resLabels.get(tempLabel, 0) + 1
for key in resLabels.keys():
if(resLabels.get(key) > maxTimes):
maxTimes = resLabels.get(key)
res = key
return res