经典的数字识别问题,调用Knn, randforest, svm&pca这3种方法。
主要利用的是sklearn库,pandas库, numpy库
1.knn是是看了别人的博客,然后自己动手重复了一下,后来发现这种方法的提取数据太冗长了,后续会贴出更精炼的code
from numpy import * import operator import csv def loadTrainData(): l = [] with open('train.csv') as file: lines = csv.reader(file) for line in lines: l.append(line) l.remove(l[0]) l = array(l) label = l[:,0] data = l[:,1:] return nomalizing(toInt(data)),toInt(label) #label 1*42000 data 42000*784 #return data label def toInt(array): array = mat(array) m,n = shape(array) newArray = zeros((m,n)) for i in xrange(m): for j in xrange(n): newArray[i,j] = int(array[i,j]) return newArray def nomalizing(array): m