KNN的原理就是取最近的k个点,然后将类别最多的作为预测类别,原来简单,代码也很简单,如下:
# coding=utf8 import sys import os import numpy as np from numpy import * reload(sys) sys.setdefaultencoding('utf-8') os.chdir(r'D:\Study\ML\MLAction') def euclidean_dist(x1,x2): return np.sqrt(x1.T*x2) def kNNClassify(X,label,predict_X,k): dist=np.sum(np.power(X-predict_X,2),axis=1) sort_index=np.argsort(dist,axis=0) sort_index=sort_index.reshape(1, len(sort_index)).tolist()[0] label_stata={} for i in range(k): lbl=label[sort_index.index(i)] label_stata[lbl]=label_stata.get(lbl,0)+1 sort_label=sorted(label_stata.items(),key=lambda x:x[1],reverse=True) return sort_label[0][0] def createDataSet(): group = mat([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]]) labels = ['A','A','B','B'] return group, labels group,labels=createDataSet() result=kNNClassify(group,labels,[0,0,1],3) print(result)