调用sklearn中KNN算法库,图片数据级为matlab文件,使用matplot读取,并进行一维化处理
from scipy.io import loadmat as load
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier as kNN
import numpy as np
# 加载matlab格式数据文件
train = load('train_32x32.mat')
test = load('test_32x32.mat')
train_samples = train['X']
train_labels = train['y']
test_samples = test['X']
test_labels = test['y']
def reformat(samples, labels):
"""
改变原始数据的形状
(图片高,图片宽,通道数,图片数)-> (图片数,图片高,图片宽,通道数)
把labers转化为列向量
"""
# 改变张量形状信息
samples = np.transpose(samples, (3, 0, 1, 2))
# 改变标签信息
labels = np.array([x[0] for x in labels])
one_hot_labels = []
for num in labels:
one_hot =[0.0] * 10
if num == 10:
one_hot[0] = 1.0
else:
one_hot[num] = 1.0
one_hot_labels.append(one_hot)
labels = np.array(one_hot_labels).astype(np.float32)
return samples, labels
def normalize(samples):
"""
将图片灰度化:三色通道到单通道
将图片 0到255线性映射到 -1.0到1.0
"""
samples = np.add.reduce(samples, keepdims = True, axis = 3)
samples = samples/128 - 1
return samples
def inspect(data_set, labels, i):
"""显示第i张数据集图片"""
print(labels[i])
plt.imshow(data_set[i])
plt.show()
if __name__ == '__main__':
train_samples,train_labels= reformat(train_samples, train_labels)
train_samples = normalize(train_samples)
# 一维化图像
train_samples = train_samples.reshape(train_samples.shape[0], -1)
test_samples, test_labels = reformat(test_samples, test_labels)
test_samples = normalize(test_samples)
# 一维化图像
test_samples = test_samples.reshape(test_samples.shape[0], -1)
# 取100张进行预测
test = test_samples[0:1000, :]
y = test_labels[0:1000, :]
knn_clf = kNN(n_neighbors=5)
print(knn_clf.fit(train_samples, train_labels))
y_ = knn_clf.predict(test)
rate = (y_ == y).sum()/y_.size
print("正确率为%f"%rate)