from numpy import *
from operator import itemgetter
from collections import Counter
def dataset():
group = np.array([[1.,1.1],[1.,1.],[0.,0.],[0.,0.1]])
labels = ['A','A','B','B']
return group,labels
group,labels = dataset()
def classsify(array,dataset,labels,k):
#print(dataset.shape[0])
dataset_size = dataset.shape[0]
#对输入进来的点进行维度扩展,扩展为4行2列,再减去要计算长度的点
diff_mat = tile(array,(dataset_size,1)) - dataset
#print(diff_mat)
sqrt_diff_mat = diff_mat**2
print(sqrt_diff_mat)
#将一行中的两个元素进行相加
sqrt_distances = sqrt_diff_mat.sum(axis = 1)
#print(sqrt_diff_mat.sum(axis = 0))
distances = sqrt_distances**0.5
print(distances)
#argsort函数返回的是数组值从小到大的索引值,这里0是最小的,在原数组中索引下标为2,0.1是第二小的,索引下标为3
sorted_dist_distances = distances.argsort()
print(sorted_dist_distances)
classcount = {}
for i in range(k):
labels_list = labels[sorted_dist_distances[i]]
#get(key,0)可用于计数,将键值保存在dict中,若key不存在则返回0。最初状态时字典为空,用0进行初始化,因为这时
#字典中有了对应的key,所以+1,初始状态过后,第二次,由于字典中已经存在这个key了,所以get()返回1,因为key对应的
#键值为1,第二次过后,key对应的键值为2......
#print(classcount.get(labels_list,0))
classcount[labels_list] = classcount.get(labels_list,0) + 1
#print(classcount.get(labels_list,0))
sortedclasscount = sorted(classcount.items(),key = itemgetter(1),reverse = True)
print(sortedclasscount)
return sortedclasscount[0][0]
classsify([0,0],group,labels,3)
注释都写在程序里了