最近选修了计算机学院的模式识别课,其中讲到了Parzen窗和kNN算法这两种对于总体的非参数估计方法,之前在机器学习实战中用kNN算法进行了手写数字识别,今天继续用Parzen窗原理的PNN(概率神经网络)算法写出python程序比较一下优劣。
PNN算法将是我写的第一个需要训练的机器学习程序。
2016.3.15
———分割线,以下为之前内容,kNN算法—————————————————————
kNN.py
# -*- coding: utf-8 -*-
###########
#KNN: the k nearest neighbours
###########
from numpy import *
import operator
import os
##############
#new_input : a matrix with [1,1024]
#dataset : a matrix with [num,1024]
#labels : 0:9
# k : the k in knn
def kNNClassify(new_input,dataset,labels,k):
#cal the distance
num = dataset.shape[0]
subMatrix=tile(new_input,[num,1])
dist=subMatrix-dataset
dist=dist**2
#distance=dist.sum(axis=1)
distance=sum(dist,axis=1)
disttance=distance**0.5
#sort
sortedDistIndice=argsort(distance)
#cal the most
voteCount={}
for i in xrange(k):
voteIndice=labels[sortedDistIndice[i]]
#cal the votes
voteCount[voteIndice]=voteCount.get(voteIndice,0)+1
#max votecount
maxCount = 0
for key,value in voteCount.items():
if value > maxCount:
maxCount = value
maxIndex = key
return maxIndex
def img2Vector(filename):
rows = 32
cols = 32
imgVector = zeros([1,rows*cols])
fileIn = open(filename)
for row in xrange(rows):
lineStr = fileIn.readline()
for col in xrange(cols):
imgVector[0,row*cols+col] = int(lineStr[col])
return imgVector
def loadDataSet():
#getting training set
print 'getting training set'
rows = 32
cols = 32
dirTrain = './1/'
fileTrain = os.listdir('./1/')
train_x = zeros((len(fileTrain),rows*cols))
#labels = []
#zeros(1,len(filename))
train_y =[]
for i in xrange(len(fileTrain)):
dirFile=dirTrain+fileTrain[i]
train_x[i,:] = img2Vector(dirFile)
label = int(fileTrain[i].split('_')[0])
train_y.append(label)
print 'getting test set'
dirTest = './2/'
fileTest = os.listdir('./2/')
test_x=zeros((len(fileTest),rows*cols))
test_y=[]
for j in xrange(len(fileTest)):
dirFile=dirTest+fileTest[j]
test_x[j,:]=img2Vector(dirFile)
label = int(fileTest[j].split('_')[0])
test_y.append(label)
return train_x,train_y,test_x,test_y
def testHandWriting():
print 'loading data'
train_x,train_y,test_x,test_y =loadDataSet()
print 'traning'
pass
print 'testing'
numTestSamples = test_x.shape[0]
matchCount = 0
for i in xrange(numTestSamples):
predict = kNNClassify(test_x[i],train_x,train_y,3)
if predict == test_y[i]:
matchCount += 1
accuracy = float(matchCount)/numTestSamples
print 'show the result...\n'
print '%.2f%%' %(accuracy*100)
test_kNN.py
#test-kNN
import kNN
kNN.testHandWriting()