本博文主要参考机器学习KNN实现的第一个例程,有不当之处,请指正!
import numpy as np
import operator
from os import listdir
def createDataSet():
"""
创建数据集和标签
"""
group = np.array([[1.0, 1.1],[1.0, 1.0], [0, 0], [0, 0.1]])
labels = ['A','A','B','B']
return group,labels
def classify0(inX,dataSet,labels,k):
"""
inX:测试样本
dataSet:训练集
labels:训练集标签
k:选择最近邻的数目
"""
#距离计算
dataSetSize = dataSet.shape[0]
#tile生成和训练样本对应的矩阵,并与训练样本做差
diffMat = np.tile(inX,(dataSetSize,1)) - dataSet
#取平方
sqDiffMat = diffMat ** 2
#对矩阵的每一行进行相加
sqDistance = sqDiffMat.sum(axis = 1)
#开方
distances = sqDistance ** 0.5
#argsort()是将x中的元素从小到大排列,提取对应的index
sortedDistIndicies = distances.argsort()
#选择距离最小的k个点
classCount = {}
for i in range(k):
#找到该样本的类型
votelabel = labels[sortedDistIndicies[i]]
classCount[votelabel] = classCount.get(votelabel,0) + 1
#排序并返回出现最多的那个类型
sortedClassCount = sorted(classCount.items(),key = operator.itemgetter(1),reverse = True)
return sortedClassCount[0][0]
def test1():
group,labels = createDataSet()
print str(group)
print str(labels)
print classify0([0.1,0.1],group,labels,3)
if __name__ == '__main__':
test1();