代码来自Peter Harrington《机器学习实战》
相关数据集见www.manning.com/MachineLearninginAction
kNN.py
from numpy import *
import operator
class k_algrithom:
def __init__(self):
self.inx = None
self.dataset = None
self.labels = None
self.k = None
# def createDataset(self):
# group = array([1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1])
# labaels = ['A', 'A', 'B', 'B']
# return group, labaels
def classify0(self,inx, dataset, labels, k):
# inx:输入向量,dataset:输入样本,labels:标签向量,k:选择最邻近的数目
dataSetSize = dataset.shape[0]
diffMat = tile(inx, (dataSetSize, 1)) - dataset
sqDiffMat = diffMat ** 2
sqDistance = sqDiffMat.sum(axis=1)
distance = sqDistance ** 0.5
sortedDistIndices = distance.argsort()
classcount = {
}
for i in range(k):
votelabel = labels[sortedDistIndices[i]]
classcount[votelabel] = classcount.get(votelabel, 0) + 1
sortedclasscount = sorted(classcount.items(), key=operator.itemgetter(1),
reverse