from numpy import *
import operator # 运算符模块
# 训练样本集
def createDataSet():
group = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]]) # 数据集
labels = ['A', 'A', 'B', 'B'] # 与数据集对应的标签
return group, labels
# 分类器,inX为用于分类的输入样本,dataSet为数据集,labels为标签,k为k-近邻算法的k值
def classify0(inX, dataSet, labels, k):
# 数组和矩阵的shape函数得到它们的维度(几行几列),返回值是二元元组,[0]位行数,[1]位列数
dataSetSize = dataSet.shape[0]
# tile函数的功能是重复一个数组,例子如下:
# inx = [1, 2]
# newMat0 = tile(inx, 3) # 结果为 [1 2 1 2 1 2]
# newMat1 = tile(inx, (4, 1)) # 结果为 [[1 2]
# [1 2]
# [1 2]
# [1 2]]
# newMat2 = tile(inx, (1, 4)) # 结果为 [[1 2 1 2 1 2 1 2]]
# newMat3 = tile(inx, (4, 2)) # 结果为 [[1 2 1 2]
# [1 2 1 2]
# [1 2 1 2]
# [1 2 1 2]]
# newMat4 = tile(inx, (2, 4)) # 结果为 [[1 2 1 2 1 2 1 2]
# [1 2 1 2 1 2 1 2]]
diffMat = tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat ** 2
#举证的sum(axis=1)函数是让矩阵的每一行相加,返回值是一个数组
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances ** 0.5
# argsort()函数返回数组值从小到大的索引值
sortedDistIndicies = distances.argsort()
classCount = {}
# 选择距离最小的k个点,并计算他们的频率,若字典里没有找到voteILable,则返回0,,若找到则加1
for i in range(k):
voteILable = labels[sortedDistIndicies[i]]
classCount[voteILable] = classCount.get(voteILable, 0) + 1
# 字典的iteritems()函数依次返回每一个元素,operator.itemgetter(1)函数的功能是获取对象的第二维数据
# sorted函数用于排序,reverse=True表示逆序
sortedClassCount = sorted(classCount.iteritems(),key=operator.itemgetter(1),reverse=True)
return sortedClassCount[0][0]
group,labels = createDataSet()
print classify0([0,0], group, labels, 3) # 结果为B
print classify0([1,2], group, labels, 3) # 结果为A
转载于:https://my.oschina.net/wolfoxliu/blog/653713