import numpy as np
import operator
def createDataSet():
group = np.array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])
labels = ['A','A','B','B']
return group, labels
def classify0(inX, dataset, labels, k):
'''
inX:输入向量
dataset:数据集中X向量
labels:数据集的标签集合
k:k-近邻算法中的K
return:输入向量的标签
'''
size = dataset.shape[0]
'''
计算欧氏距离
'''
d_value = np.tile(inX, (size, 1)) - dataset
sqrt_value = d_value**2
sum_value = sqrt_value.sum(axis=1)
distance = sum_value**0.5
sort_dis_index = distance.argsort()
class_count = {}
for i in range(k):
label = labels[sort_dis_index[i]]
class_count[label] = class_count.get(label, 0) + 1
'''
根据class_count的第二个字段进行排序
reverse=true: 降序
reverse=false: 升序
'''
sort_label_index = sorted(class_count.items(), key = operator.itemgetter(1), reverse = True)
return sort_label_index[0][0]
numpy的tile函数
tile(x,(row,col))
x:要重复的数据
row: