下面的例子来源为《机器学习实战》,例子只能识别0-9。
首先需要将图像二进制数据转化为测试向量:
def imgTransformVector(filename): # 将 32x32 二进制图像矩阵转化为 1x1024 向量
returnVector = np.zeros((1,1024))
fr = open(filename)
for i in range(32):
lineStr = fr.readline()
for j in range(32):
returnVector[0,32*i+j] = int(lineStr[j])
return returnVector
接着是算法的实现代码:
def handWritingTextTest():
handWritingLabels = []
# listdir 返回指定的文件夹包含的文件或文件夹的名字的列表
trainingFileList = os.listdir('/Users/Desktop/trainingDigits')
trainingDataLen = len(trainingFileList) # 获取训练数据集的大小
trainingMatrix = np.zeros((trainingDataLen,1024))
for i in range(trainingDataLen -1):
fileNameString = trainingFileList[i + 1] # 第i个训练样本的文件名
fileString = fileNameString.split('.')[0] # 截去.txt部分
classNumberString = int(fileString.split('_')[0]) #获得分类数字
handWritingLabels.append(classNumberString)
trainingMatrix[i,:] = imgTransformVector('/Users/Desktop/trainingDigits/%s'%fileNameString)
testFileList = os.listdir('/Users/Desktop/testDigits')
errorCount = 0.0
testDataLen = len(testFileList)
for i in range(testDataLen - 1):
fileNameString = testFileList[i +1]
fileString = fileNameString.split('.')[0]
classNumberString = int(fileString.split('_')[0])
testDataVector = imgTransformVector('/Users/Desktop/testDigits/%s'%fileNameString)
classifierResult = classifyPerson(testDataVector,trainingMatrix,handWritingLabels,3)
if (classifierResult != classNumberString):
errorCount += 1
print('the classifier:%d, the real answer:%d' % (classifierResult, classNumberString))
print('\nthe total errorCount:%d'%errorCount)
print('\nthe total errorRate:%.d'%(errorCount/float(testDataLen)))