from numpy import * import operator def classify0(inX, dataSet, labels, k): dataSetSize = dataSet.shape[0] print tile(inX, (dataSetSize, 1)) # generate array([[0,0],[0,0],[0,0],[0,0]]) diffMat = tile(inX, (dataSetSize, 1)) - dataSet sqDiffMat = diffMat ** 2 # array calculate:(elements of array)^2 sqDistances = sqDiffMat.sum(axis=1) distances = sqDistances ** 0.5 sortedDistIndices = distances.argsort() classCount = {} for i in range(k): voteIlabel = labels[sortedDistIndices[i]] classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1 sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1),reverse=True) return sortedClassCount[0][0] if __name__ == "__main__": group = array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]]) labels = ['A','A','B','B'] print classify0([0,0], group, labels, 3)
二维数组,axis=0,按列求和,axis=1,按行求和
numpy的array中的sum():
https://blog.csdn.net/wangxiao7474/article/details/78867390
这里根据distance的内容升序排序,返回内容的索引
关于numpy的argsort()的用法:
https://blog.csdn.net/maoersong/article/details/21875705
# -*- coding: UTF-8 -*-