在简单实现KNN算法的时候遇到的问题
1、numpy.mat 与numpy.array
2、numpy.tile([a,b],(x,y)) 将[a,b] 在行上复制x次,在列上复制y次,如tile([1,1],(2,3)) = [ [2,3],[2,3]
[2,3],[2,3]
[2,3],[2,3] ]
3、python 自带的sorted()函数参数设置
4、KNN简单代码实现及注释:
import numpy import operator group = numpy.array([[1.0,1.1],[1.0,1.0],[0.0,0.0],[0.0,0.1]]) labels = ['A','A','B','B'] print(group) print(labels) def classfy0(inx,dataset,labels,k): datasetsize = dataset.shape[0] #列数 diffMat = numpy.tile(inx,(datasetsize , 1))- dataset #print(diffMat) sqDiffmat = diffMat**2 #print(sqDiffmat) sqDistance = sqDiffmat.sum(axis=1)#每一行的项都相加生成一行 #print(sqDistance) distance = numpy.sqrt(sqDistance)#距离 sortedDistance = distance.argsort() #print(sortedDistance) classCount = {} for i in range(k): voteIlable = labels[sortedDistance[i]]#将labels中对应的特征设置到voteIable中 #print(voteIlable) classCount[voteIlable] = classCount.get(voteIlable,0)+1 #print(classCount) sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True) return sortedClassCount[0][0] print("predict:%s" % classfy0([0,0.2],group,labels,3))