这里手写数据的数据集并不是大家熟知的MNIST,而是土耳其伊斯坦布尔海峡大学计算机工程系的E.Alpaydin与C.Kaynak的数据集,地址:
每一个数字为 黑白图像,如下图所示:
程序如下:
import numpy as np
import os
import operator
def img2vector(filename):
return_vec = np.zeros((1, 1024))
fr = open(filename)
for i in range(32):
line_str = fr.readline()
for j in range(32):
return_vec[0, 32 * i + j] = int(line_str[j])
return return_vec
def classify0(inX, data_set, labels, k):
data_set_size = data_set.shape[0]
diff_mat = np.tile(inX, (data_set_size, 1)) - data_set
sq_diff_mat = diff_mat ** 2
sq_distances = sq_diff_mat.sum(axis=1)
distances = sq_distances ** 0.5
sorted_dist_indicies = distances.argsort()
class_count = {}
for i in range(k):
vote_ilabel = labels[sorted_dist_indicies[i]]
class_count[vote_ilabel] = class_count.get(vote_ilabel, 0) + 1
sorted_class_count = sorted(class_count.items(), key=operator.itemgetter(1), reverse=True)
return sorted_class_count[0][0]
def handwriting_class_test():
hw_labels = []
training_file_list = os.listdir('digits/trainingDigits')
m = len(training_file_list)
training_mat = np.zeros((m, 1024))
for i in range(m):
file_name_string = training_file_list[i]
file_string = file_name_string.split('.')[0]
digit_class = int(file_string.split('_')[0])
hw_labels.append(digit_class)
training_mat[i, :] = img2vector('digits/trainingDigits/%s' % file_name_string)
test_file_list = os.listdir('digits/testDigits')
error_count = 0.0
error_rate = 0.0
num_test = len(test_file_list)
for i in range(num_test):
file_name_string = test_file_list[i]
file_string = file_name_string.split('.')[0]
test_digit_class = int(file_string.split('_')[0])
test_vector = img2vector('digits/testDigits/%s' % file_name_string)
classify_result = classify0(test_vector, training_mat, hw_labels, 3)
if classify_result != test_digit_class:
error_count += 1
error_rate = error_count/(i+1)
print("classify result: %d, real result: %d, current error rate: %f%%" % (classify_result, test_digit_class, error_rate*100))
print("=====> end of the program <=====")
print("total error rate: %f%%" % (error_rate*100))
if __name__ == '__main__':
handwriting_class_test()
结果如下:
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
classify result: 0, real result: 0, current error rate: 0.000000%
...
classify result: 9, real result: 9, current error rate: 1.084599%
classify result: 9, real result: 9, current error rate: 1.083424%
classify result: 9, real result: 9, current error rate: 1.082251%
classify result: 9, real result: 9, current error rate: 1.081081%
classify result: 9, real result: 9, current error rate: 1.079914%
classify result: 9, real result: 9, current error rate: 1.078749%
classify result: 9, real result: 9, current error rate: 1.077586%
classify result: 9, real result: 9, current error rate: 1.076426%
classify result: 9, real result: 9, current error rate: 1.075269%
classify result: 9, real result: 9, current error rate: 1.074114%
classify result: 9, real result: 9, current error rate: 1.072961%
classify result: 9, real result: 9, current error rate: 1.071811%
classify result: 9, real result: 9, current error rate: 1.070664%
classify result: 9, real result: 9, current error rate: 1.069519%
classify result: 9, real result: 9, current error rate: 1.068376%
classify result: 9, real result: 9, current error rate: 1.067236%
classify result: 9, real result: 9, current error rate: 1.066098%
classify result: 9, real result: 9, current error rate: 1.064963%
classify result: 9, real result: 9, current error rate: 1.063830%
classify result: 9, real result: 9, current error rate: 1.062699%
classify result: 9, real result: 9, current error rate: 1.061571%
classify result: 9, real result: 9, current error rate: 1.060445%
classify result: 9, real result: 9, current error rate: 1.059322%
classify result: 9, real result: 9, current error rate: 1.058201%
classify result: 9, real result: 9, current error rate: 1.057082%
=====> end of the program <=====
total error rate: 1.057082%