kNN(k-nearest neighbor)算法是一个简单而经典的机器学习分类算法,通过度量”待分类数据”和”类别已知的样本”的距离对样本进行分类。
from numpy import *
import operator
#产生数据集
def createDataSet():
groups = array([[1.0, 1.0], [1.0, 1.1], [0, 0], [0, 0.1]])
lables = ['A', 'A', 'B', 'B']
return groups, lables
#分类
def classify0(intX, dataSet, labels, k):
#1、计算到所有点的距离
dataSetSize = dataSet.shape[0]
diffMat = tile(intX, (dataSetSize, 1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
#2、根据距离进行排序
sortedClassCount = sqDistances.argsort()
classCount = {}
#3、选取种类最多的类别
for i in range(k):
voteIlabels = labels[sortedClassCount[i]]
classCount[voteIlabels] = classCount.get(voteIlabels, 0) + 1
sortedCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
return sortedCount[0][0]
def main():
groups,labels = createDataSet()
result = classify0([0, 0], groups, labels, 3)
print result
main()
来自《机器学习实战》一书,刚刚开始看,目前感觉还比较友好,继续ing