sklearn的简单使用
import numpy as np
from sklearn import neighbors
from sklearn.model_selection import train_test_split
import joblib
import os
def image2vector(filename):
returnVect = np.zeros(1024)
fr = open(filename)
for i in range(32):
lineStr = fr.readline()
for j in range(32):
returnVect[i * 32 + j] = int(lineStr[j])
return returnVect
def getSourceDatas(filepath):
datas = []
labels = []
for files in os.listdir(filepath):
datas.append(image2vector(filepath + "/" + files))
labels.append(files.strip().split("_")[0])
return datas, labels
if __name__ == "__main__":
dir_path = os.getcwd()
train_path = dir_path + "/trainingDigits"
test_path = dir_path + "/testDigits"
train_datas, train_labels = getSourceDatas(train_path)
test_datas, test_labels = getSourceDatas(test_path)
x_train, x_test, y_train, y_test = train_test_split(train_datas, train_labels, test_size=0.3)
clf = neighbors.KNeighborsClassifier()
clf.fit(train_datas, train_labels)
score = clf.score(test_datas, test_labels)
print("正确率:" + str(score))
joblib.dump(clf, 'clf.pkl')
clf3 = joblib.load('clf.pkl')
print("预测结果:")
print(clf3.predict(test_datas[0:1]))
print("test结果:")
print(test_labels[0:1])
测试和训练数据以及源码