python knnmatch_KNN分类算法--python实现

##########################################kNN: k Nearest Neighbors

#Input: inX: vector to compare to existing dataset (1xN)#dataSet: size m data set of known vectors (NxM)#labels: data set labels (1xM vector)#k: number of neighbors to use for comparison

#Output: the most popular class label#########################################

from numpy import *

importoperatorimportosfrom Canvas importLine#classify using kNN

defkNNClassify(newInput, dataSet, labels, k):

numSamples= dataSet.shape[0] #shape[0] stands for the num of row

## step 1: calculate Euclidean distance

#tile(A, reps): Construct an array by repeating A reps times

#the following copy numSamples rows for dataSet

diff = tile(newInput, (numSamples, 1)) - dataSet #Subtract element-wise

squaredDiff = diff ** 2 #squared for the subtract

squaredDist = sum(squaredDiff, axis = 1) #sum is performed by row

distance = squaredDist ** 0.5

## step 2: sort the distance

#argsort() returns the indices that would sort an array in a ascending order

sortedDistIndices =argsort(distance)

classCount= {} #define a dictionary (can be append element)

for i inxrange(k):## step 3: choose the min k distance

voteLabel =labels[sortedDistIndices[i]]## step 4: count the times labels occur

#when the key voteLabel is not in dictionary classCount, get()

#will return 0

classCount[voteLabel] = classCount.get(voteLabel, 0) + 1

## step 5: the max voted class will return

maxCount =0for key, value inclassCount.items():if value >maxCount:

maxCount=value

maxIndex=keyreturnmaxIndex#convert image to vector

defimg2vector(filename):

rows= 32cols= 32imgVector= zeros((1, rows *cols))

fileIn=open(filename)for row inxrange(rows):

lineStr=fileIn.readline()for col inxrange(cols):

imgVector[0, row* 32 + col] =int(lineStr[col])returnimgVector#load dataSet

defloadDataSet():## step 1: Getting training set

print "---Getting training set..."dataSetDir= 'F:/eclipse/workspace/KnnTest/'trainingFileList= os.listdir(dataSetDir + 'trainingDigits') #load the training set

numSamples =len(trainingFileList)

train_x= zeros((numSamples, 1024))

train_y=[]for i inxrange(numSamples):

filename=trainingFileList[i]#get train_x

train_x[i, :] = img2vector(dataSetDir + 'trainingDigits/%s' %filename)#get label from file name such as "1_18.txt"

label = int(filename.split('_')[0]) #return 1

train_y.append(label)## step 2: Getting testing set

print "---Getting testing set..."testingFileList= os.listdir(dataSetDir + 'testDigits') #load the testing set

numSamples =len(testingFileList)

test_x= zeros((numSamples, 1024))

test_y=[]for i inxrange(numSamples):

filename=testingFileList[i]#get train_x

test_x[i, :] = img2vector(dataSetDir + 'testDigits/%s' %filename)#get label from file name such as "1_18.txt"

label = int(filename.split('_')[0]) #return 1

test_y.append(label)returntrain_x, train_y, test_x, test_y#test hand writing class

deftestHandWritingClass():## step 1: load data

print "step 1: load data..."train_x, train_y, test_x, test_y=loadDataSet()## step 2: training...

print "step 2: training..."

pass

## step 3: testing

print "step 3: testing..."numTestSamples=test_x.shape[0]

matchCount=0for i inxrange(numTestSamples):

predict= kNNClassify(test_x[i], train_x, train_y, 3)if predict ==test_y[i]:

matchCount+= 1accuracy= float(matchCount) /numTestSamples## step 4: show the result

print "step 4: show the result..."

print 'The classify accuracy is: %.2f%%' % (accuracy * 100)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值