python环境:3.7
from numpy import *
import operator
#设置数据集,以及每个数据对应的分类
def createDataSet() :
group = array([[1.0,1.1], [1.0,1.0], [0,0], [0,0.1], [0.1, 0]])
labels = ['A', 'A', 'B', 'B', 'B']
return group, labels
group,labels = createDataSet()
#group
# [[1. 1.1]
# [1. 1. ]
# [0. 0. ]
# [0. 0.1]]
def classify(inX, dataSet, labels, k) :
#获取行数
dataSetSize = dataSet.shape[0]
# 生成n个与训练样本数量对应的测试样本矩阵,计算测试样本与训练样本之间的距离
diffMat = tile(inX, (dataSetSize, 1)) - dataSet
# tile(inX, (dataSetSize, 1)) : 对inX,在列方向上重复dataSetSize次,在行方向上重复1次,生成dataSetSize * 1 的矩阵
# 即
# [[0, 0],
# [0, 0],
# [0, 0],
# [0, 0]]
#
# diffMat:
# [[-1. - 1.1]
# [-1. - 1.]
# [0. 0.]
# [0. - 0.1]]
# diffMat距离矩阵的每个距离值平方 (不平方也不太影响)
sqDiffMat = power(diffMat, 2)
# sqDiffMat = diffMat
# [[1., 1.21],