# 算法步骤:

1. step.1—初始化距离为最大值
2. step.2—计算未知样本和每个训练样本的距离dist
3. step.3—得到目前K个最临近样本中的最大距离maxdist
4. step.4—如果dist小于maxdist，则将该训练样本作为K-最近邻样本
5. step.5—重复步骤2、3、4，直到未知样本和所有训练样本的距离都算完
6. step.6—统计K-最近邻样本中每个类标号出现的次数
7. step.7—选择出现频率最大的类标号作为未知样本的类标号

# 案例分析

from numpy import *
import operator

def createDataSet():
group = array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels = ['A','A','B','B']
return group,labels

def classify0(inX,dataSet,labels,k):
dataSetSize = dataSet.shape[0]
diffMat = tile(inX,(dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis = 1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount = {}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.iteritems(),
key=operator.itemgetter(1),reverse=True)
return sortedClassCount[0][0] 

# 测试算法:

#! /usr/bin/env python
# -*- coding: utf-8 -*-
from numpy import *
from os import listdir
import KNN
from numpy.core import multiarray

def img2vector(filename):
'图像文件转换成矩阵'
returnVect = zeros((1,1024))
fr = open(filename)
for i in range(32):             #将32行合并成一行
for j in range(32):
returnVect[0,32*i+j] = int(lineStr[j])
return returnVect               #一个样本最终成为一个1*1024的向量

def handwritingClassTest():
'手写识别测试函数，调用了KNN模块的KNN分类器函数'
hwLabels = []
trainingFileList = listdir('trainingDigits')
m = len(trainingFileList)
trainingMat = zeros((m,1024))
for i in range(m):
fileNameStr = trainingFileList[i]
fileStr = fileNameStr.split('.')[0]
classNumStr = int(fileStr.split('_')[0])
hwLabels.append(classNumStr)
trainingMat[i,:] = img2vector('trainingDigits/%s' % fileNameStr)

testFileList = listdir('testDigits')
errorCount = 0.0
mTest = len(testFileList)
for i in range(mTest):
fileNameStr = testFileList[i]
fileStr = fileNameStr.split('.')[0]
classNumStr = int(fileStr.split('_')[0])
vectorUnderTest = img2vector('testDigits/%s' % fileNameStr)
classifierResult = KNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3)
print "in #%d, the classifier came back with: %d, the real answer is: %d" % (i, classifierResult, classNumStr)
if (classifierResult != classNumStr):
errorCount += 1.0
print "\nthe total number of errors is: %d" % errorCount
print "\nthe total error rate is: %f" % (errorCount/float(mTest))

handwritingClassTest()


## 源码下载地址

举报原因： 您举报文章：[机器学习案例1]基于KNN手写数字识别 色情 政治 抄袭 广告 招聘 骂人 其他 (最多只允许输入30个字)