代码实现:
# -*- coding: utf-8 -*-
import numpy
#from numpy import *
import operator
#函数功能:将数据文件转换为矩阵形式
def FileToMartix(filename):
f = open(filename)
arrayLines = f.readlines()
numberOfLines = len(arrayLines)
returnMat = numpy.zeros((numberOfLines , 3))
classLabeVector = []
index = 0
for line in arrayLines:
line = line.strip()
listFromLine = line.split('\t')
returnMat[index,:] = listFromLine[0:-1]
classLabeVector.append(int(listFromLine[-1]))
index += 1
return returnMat , classLabeVector
#数据归一化处理
#newvalue = (oldvalue - min)/(max - min)
def normlized(dataset):
#n维数组的最大值,最小值
minval = dataset.min(0)
maxval = dataset.max(0)
ranges = maxval - minval
norDataSet = numpy.zeros(numpy.shape(dataset))
m = dataset.shape[0]
norDataSet = dataset - numpy.tile(minval , (m , 1))
norDataSet = norDataSet / numpy.tile(ranges , (m , 1))
return norDataSet , ranges , minval
#KNN分类器
#输入参数:目标测试样本vect ,训练集:dataset , 样本标签:lables , 近邻数:k
def classified(vect , dataSet , lables , k):
dataSize = dataSet.shape[0]
diffMat = numpy.tile(vect , (dataSize , 1)) - dataSet
sqDiffMat = diffMat**2
sqDistance = sqDiffMat.sum(axis = 1)
distances = sqDistance**0.5
sortedIndicies = distances.argsort()
classCount = {}
for i in range(k):
voteLable = lables[sortedIndicies[i]]
classCount[voteLable] = classCount.get(voteLable , 0) + 1
sortedClassCount = sorted(classCount.iteritems() ,
key = operator.itemgetter(1) , reverse = True)
return sortedClassCount[0][0]