机器学习实战k-近邻算法
# -*- coding: utf-8 -*-
import numpy as np
#导入运算符模块
import operator
def createDataSet():
group=np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels=['A','A','B','B']
return group,labels
def classify0(inX,dataSet,labels,k):
dataSetSize=dataSet.shape[0]#获取训练样本的个数 也就是行数
diffMat=np.tile(inX,(dataSetSize,1))-dataSet#行扩展 与训练样本维度相同
sqDiffMat=diffMat**2
sqDistances=sqDiffMat.sum(axis=1)#矩阵的每一行相加
distances=sqDistances**0.5
sortedDistIndicies=distances.argsort()#排序
classCount={}
for i in range(k):
voteIlabel=labels[sortedDistIndicies[i]]
classCount[voteIlabel]=classCount.get(voteIlabel,0)+1
sortedClassCount=sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
return sortedClassCount[0][0]
Python numpy函数:shape用法
shape函数是numpy.