python实现KNN:
1.KNN.py
# KNN method
'''
step1:calculate distance between input and given datasets
step2:sort and find the nearest k distances
step3:find the class which occupy the most
Notice:if KNN revised,reload,the run KNN_test
'''
from numpy import *
import operator
def CreateDataset():
group=array([[1.0,0.9],[1.0,1.0],[0.1,0.2],[0.0,0.1]])
labels=(['A','A','B','B'])
return group,labels
# KNN_classify
def classify(Input,Dataset,labels,k):
#step1:calculate distance
row_number=Dataset.shape[0] #get row numbers
diffValue=tile(Input,(row_number,1))-Dataset
diffSquare=diffValue**2
sumSquare=sum(diffSquare,axis=1)
distance=sumSquare**0.5
#step2:sort
#sort from small to big
sorted_distance=distance.argsort()
#step3:find the nearest
classCount={}
for i in xrange(k):
votelabels=labels[sorted_distance[i]]#Notice: use []
classCount[votelabels]=classCount.get(votelabels,0)+1 #Notice: add [votelabels] after classCount
maxCount=0
for key,value in classCount.items(): #Notice: add .items()
if value>maxCount:
maxCount=value
maxIndex=key
return maxIndex
from numpy import *
import KNN
test_A=array([1.5,1.2])
test_B=array([0.2,0.5])
Dataset,labels=KNN.CreateDataset()
k=3
output1_label=KNN.classify(test_A,Dataset,labels,k)
output2_label=KNN.classify(test_B,Dataset,labels,k)
print output1_label,output2_label
A,B