Python版本为:3.2.3
代码为:
from numpy import*
import operator
#k邻近算法
def classify0(inX,dataSet,labels,k):
dataSetSize = dataSet.shape[0] #数据矩阵的行
diffMat = tile(inX,(dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sum(sqDiffMat,axis=1)
distances = sqDistances**0.5
sortedDistIndicies = argsort(distances)
classCount = {}
for i in range(k):
voteLabel = labels[sortedDistIndicies[i]]
classCount[voteLabel] = classCount.get(voteLabel,0)+1
sortedClassCount = sorted(classCount.items(),key = operator.itemgetter(1),reverse=True)
return sortedClassCount[0][0]
#创建一些训练数据
def createDataSet():
group = array([[1,1.1],[1,1],[0,0],[0,0.1]])
labels = ['A','A','B','B']
return group,labels
#测试
data,label = createDataSet()
test_data = [0,0]
classify0(test_data,data,label,3)
将代码复制在Python的IDLE工具中时,不能直接复制粘贴,不然有些语句运行不了。
实际粘贴运行过程为:
>>> from numpy import*
>>> import operator
>>> def classify0(inX,dataSet,labels,k):
dataSetSize = dataSet.shape[0] #数据矩阵的行
diffMat = tile(inX,(dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sum(sqDiffMat,axis=1)
distances = sqDistances**0.5
sortedDistIndicies = argsort(distances)
classCount = {}
for i in range(k):
voteLabel = labels[sortedDistIndicies[i]]
classCount[voteLabel] = classCount.get(voteLabel,0)+1
sortedClassCount = sorted(classCount.items(),key = operator.itemgetter(1),reverse=True)
return sortedClassCount[0][0]
>>> def createDataSet():
group = array([[1,1.1],[1,1],[0,0],[0,0.1]])
labels = ['A','A','B','B']
return group,labels
>>> data,label = createDataSet()
>>> data
array([[ 1. , 1.1],
[ 1. , 1. ],
[ 0. , 0. ],
[ 0. , 0.1]])
>>> label
['A', 'A', 'B', 'B']
>>> test_data = [0,0]
>>> classify0(test_data,data,label,3)
'B'