前言
这是CS231n 图像分类课时的作业,自己编写KNN算法,实现图像分类,该任务涉及L1,L2距离,以及K值超参数的选择
1. KNN算法
KNN算法思想就是将输入的每一个测试样本与所有的训练样本计算距离值,然后选择K个距离最小的候选者,采用少数服从多数的投票方式选择类别
2. 实战部分
2.1 数据集载入
这里选用的数据集是 cifar-10 数据集 http://www.cs.toronto.edu/~kriz/cifar.html
The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images.
import numpy as np
def load_cifar10_batch(filename):
import pickle
with open(filename,'rb') as f:
data=pickle.load(f,encoding='bytes') ##加载二进值文件
x=data[b'data']
y=data[b'labels']
x=x.reshape(10000,3,32,32).transpose(0,2,3,1).astype("float")
y=np.array(y)
return x,y
def load_cifar10(filename):
'''加载全部数据,共有6个batch'''
X=[]
Y=[]
for i in range(1,6):
dir_url=filename+'/'+'data_batch_%s'%i
x,y=load_cifar10_batch(dir_url)
X.append(x)
Y.append(y)
X_train=np.concatenate(X)
Y_train=np.concatenate(Y)
X_test,Y_test=load_cifar10_batch(filename+'/'+'test_batch')
return X_train,Y_train,X_test,Y_test
X_train,Y_train,X_test,Y_test=load_cifar10("/Users/yugui/Downloads/cifar-10-batches-py")
print('训练集大小:',X_train.shape)
print('测试集大小:',X_test.shape)
# 图片展示
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # 设置figure_size尺寸
plt.rcParams['image.interpolation'] = 'nearest' # 设置 interpolation style
plt.rcParams['image.cmap'] = 'gray' # 设置 颜色 style
'''enumerate函数说明:
函数原型:enumerate(sequence, [start=0]) #第二个参数为指定索引
功能:将可循环序列sequence以start开始分别列出序列数据和数据下标
即对一个可遍历的数据对象(如列表、元组或字符串),enumerate会将该数据对象组合为一个索引序列,同时列出数据和数据下标'''
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
num_classes = len(classes)
samples_per_class = 7
for y, cls in enumerate(classes):
idxs = np.flatnonzero(Y_train == y) #flatnonzero该函数输入一个矩阵,返回扁平化后矩阵中非零元素的位置(index)
idxs = np.random.choice(idxs, samples_per_class, replace=False)
for i, idx in enumerate(idxs):
plt_idx = i * num_classes + y + 1
plt.subplot(samples_per_class, num_classes, plt_idx)
plt.imshow(X_train[idx].astype('uint8'))
plt.axis('off')
if i == 0:
plt.title(cls)
plt.show()
## 数据转化变成二维,为了测试速度,只选取500个测试集
# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test=X_test[0:500]
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print (X_train.shape, X_test.shape)
2.2 train部分
KNN算法train部分不涉及计算就赋值一下
def train(self,X_tr,Y_tr):
self.X_train=X_tr
self.Y_train=Y_tr
2.2 距离计算
这里只列举欧式距离算法有三种脚本写法:
- compute_distances_two_loops
- compute_distances_one_loop
- compute_distances_no_loop
2.2.1 compute_distances_two_loops
使用两层循环来计算测试样本与训练样本的L2距离(欧式距离)
def compute_distances_two_loops(self,X):
num_test=X.shape[0] # 有多少行即样本数
num_train=self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
for i in range(num_test):
for j in range(num_train):
dists[i,j]=np.sqrt(np.sum(np.square(self.X_train[j]-X[i])))
return dists
2.2.2 compute_distances_one_loop
使用一层for循环实现:
def compute_distances_one_loop(self,X):
num_test=X.shape[0] # 有多少行即样本数
num_train=self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
for i in range(num_test):
dists[i,:]=np.sqrt(np.sum(np.square(X[i]-self.X_train),axis=1)) #横向求和
return dists
2.2.3 compute_distances_no_loop
使用numpy的矩阵运算方式
def compute_distances_no_loop(self,X):
num_test=X.shape[0] # 有多少行即样本数
num_train=self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
dists = np.sqrt(getNormMatrix(X,num_train).T+getNormMatrix(self.X_train,num_test)-2*np.dot(X,self.X_train.T))
def getNormMatrix(self, x, lines_num):
return np.ones((lines_num,1))*np.sum(np.square(x),axis=1) # num_train*1*1*num_test 矩阵 平铺的功能
2.3 predict 部分
#根据计算得到的距离关系, 挑选 K 个数据组成选民, 进行党派选举
def predict_lables(self,dists,k):
num_test = dists.shape[0]
y_pred = np.zeros(num_test)
y_pred2 = np.zeros(num_test)
for i in xrange(num_test):
sorted_index = np.argsort(dists[i])
closest_y = self.y_train[sorted_index[:k]]
timeLabel = sorted([(np.sum(closest_y == y_), y_) for y_ in set(closest_y)])[-1]
y_pred[i] = timeLabel[1]
def predict(self,X,k,loop_num):
##选择距离
if loop_num==1:
dists=compute_distances_one_loop(X)
elif loop_num==0:
dists=compute_distances_no_loop(X)
elif loop_num==2:
dists=compute_distances_two_loops(X)
else:
raise ValueError("invalid loop_num %d" %loop_num) ## 抛出错误
return self.predict_lables(dists,k)
2.4 评估准确率
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / y_test.shape[0]
print ('Got %d / %d correct => accuracy: %f' % (num_correct, y_test.shape[0], accuracy))
3. KNN类的代码
class KNN():
def __init__(self):
pass
def compute_distances_no_loop(self,X):
num_test=X.shape[0] # 有多少行即样本数
num_train=self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
dists = np.sqrt(self.getNormMatrix(X,num_train).T+self.getNormMatrix(self.X_train,num_test)-2*np.dot(X,self.X_train.T))
return dists
def getNormMatrix(self, x, lines_num):
return np.ones((lines_num,1))*np.sum(np.square(x),axis=1) # num_train*1*1*num_test 矩阵 平铺的功能
def train(self,X_tr,Y_tr):
self.X_train=X_tr
self.Y_train=Y_tr
def compute_distances_two_loops(self,X):
num_test=X.shape[0] # 有多少行即样本数
num_train=self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
for i in range(num_test):
for j in range(num_train):
dists[i,j]=np.sqrt(np.sum(np.square(self.X_train[j]-X[i])))
return dists
def compute_distances_one_loop(self,X):
num_test=X.shape[0] # 有多少行即样本数
num_train=self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
for i in range(num_test):
dists[i,:]=np.sqrt(np.sum(np.square(X[i]-self.X_train),axis=1)) #横向求和
return dists
def predict_lables(self,dists,k):
num_test = dists.shape[0]
y_pred = np.zeros(num_test)
y_pred2 = np.zeros(num_test)
for i in range(num_test):
sorted_index = np.argsort(dists[i])
closest_y = np.array(self.Y_train[sorted_index[:k]])
timeLabel = sorted([(np.sum(closest_y == y_), y_) for y_ in set(closest_y)])[-1]
y_pred[i] = timeLabel[0]
return y_pred
def predict(self,X,k,loop_num):
##选择距离
if loop_num==1:
dists=self.compute_distances_one_loop(X)
elif loop_num==0:
dists=self.compute_distances_no_loop(X)
elif loop_num==2:
dists=self.compute_distances_two_loops(X)
else:
raise ValueError("invalid loop_num %d" %loop_num) ## 抛出错误
return self.predict_lables(dists,k)
classifier=KNN()
classifier.train(X_train,Y_train) #(50000, 32, 32, 3)
def time_function(f, *args):
"""
Call a function f with args and return the time (in seconds) that it took to execute.
"""
import time
tic = time.time()
f(*args)
toc = time.time()
return toc - tic
two_loop_time = time_function(classifier.compute_distances_two_loops, X_test)
print ('Two loop version took %f seconds' % two_loop_time)
one_loop_time = time_function(classifier.compute_distances_one_loop, X_test)
print ('One loop version took %f seconds' % one_loop_time)
no_loop_time = time_function(classifier.compute_distances_no_loop, X_test)
print ('No loop version took %f seconds' % no_loop_time)
结果如下:
Two loop version took 656.721672 seconds
One loop version took 2033.967067 seconds
No loop version took 10.213138 seconds
从这里可以看出使用numpy矩阵运算明显快很多。
4. 交叉验证
num_folds=5
k_choices=[1,3,5]
k_to_accuracies = {}
X_train_folds=[]
Y_train_folds=[]
X_train_folds=np.array_split(X_train,num_fold)
Y_train_folds=np.array_split(Y_train,num_fold)
print(np.array(X_train_folds[:1] + X_train_folds[2:]).shape)
for k in k_choices:
k_to_accuracies[k]=np.zeros(num_fold) ## 对于每个K值所对应折数的准确率
for i in range(num_folds):
Xtr = np.array(X_train_folds[:i] + X_train_folds[i+1:])
ytr = np.array(Y_train_folds[:i] + Y_train_folds[i+1:])
Xte = np.array(X_train_folds[i])
yte = np.array(Y_train_folds[i])
Xtr = np.reshape(Xtr, (int(X_train.shape[0] * 4 / 5), -1))
ytr = np.reshape(ytr, (int(Y_train.shape[0] * 4 / 5), -1))
Xte = np.reshape(Xte, (int(X_train.shape[0] / 5), -1))
yte = np.reshape(yte, (int(Y_train.shape[0] / 5), -1))
print(Xte.shape)
print(Xtr.shape)
classifier=KNN()
classifier.train(Xtr, ytr)
yte_pred = classifier.predict(Xte, k,0)
yte_pred = np.reshape(yte_pred, (yte_pred.shape[0], -1))
num_correct = np.sum(yte_pred == yte)
accuracy = float(num_correct) / len(yte)
k_to_accuracies[k][i] = accuracy
# Print out the computed accuracies
for k in sorted(k_to_accuracies):
for accuracy in k_to_accuracies[k]:
print('k = %d, accuracy = %f' % (k, accuracy))