一、KNN
1.读取数据
import numpy as np import random from cs231n.data_utils import load_CIFAR10 import matplotlib.pyplot as plt import os plt.rcParams['figure.figsize']=(10.0,8.0) plt.rcParams['image.interpolation']='nearest' plt.rcParams['image.cmap']='gray' #load the raw CIFAR-10 data os.chdir('E://Python//deep learning CS231n//assignment1') cifar10_dir='E://Python//deep learning CS231n//assignment1//cs231n//datasets' X_train,y_train,X_test,y_test=load_CIFAR10(cifar10_dir) print('Training data shape:',X_train.shape) print("Training labels shape:",y_train.shape) print('Test data shape:',X_test.shape) print('Test labels shape:',y_test.shape)
结果为
Training data shape: (50000, 32, 32, 3)
Training labels shape: (50000,)
Test data shape: (10000, 32, 32, 3)
Test labels shape: (10000,)
2.显示一些样本
enumerate()返回一个可迭代对象的枚举形式,如下例
返回0 plane/ 1 car/ 2 bird/......
classes=['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] num_classes=len(classes) samples_per_class=7 for y,cls in enumerate(classes): idxs=np.flatnonzero(y_train==y)#记录y_train中等于y的索引值 idxs=np.random.choice(idxs,samples_per_class,replace=False)#选出7张图 for i,idx in enumerate(idxs): plt_idx=i* num_classes+y+1 plt.subplot(samples_per_class,num_classes,plt_idx) plt.imshow(X_train[idx].astype('uint8')) plt.axis('off') if i==0: plt.title(cls) plt.show()
3.调整数据集大小
#调整数据集的大小 num_training=5000 mask=range(num_training) X_train=X_train[mask] y_train=y_train[mask] num_test=500 mask=range(num_test) X_test=X_test[mask] y_test=y_test[mask] #把所有图片变成一列 X_train=np.reshape(X_train,(X_train.shape[0],-1)) X_test=np.reshape(X_test,(X_test.shape[0],-1)) print (X_train.shape,X_test.shape)
现在X_train变成5000*3072
X_test变成了500*3072
4.KNN类的实现
计算距离用的是L2距离
- np.argsort()函数返回的是数组值从小到大的索引值,我们需要将距离最小的k个图片挑出来,然后数它们所属的类的个数
- np.bincount(x)函数给出了它的索引值在x中出现的次数,如a=np.array([1,1,2,3,4,6]), np.bincount(a)=[0(0的个数),2(1的个数),1,1,1,0,1]
class KNearestNeighbor: # 首先是定义一个处理KNN的类 """ a kNN classifier with L2 distance """ def __init__(self): pass def train(self, X, y): """ Train the classifier. For k-nearest neighbors this is just memorizing the training data. Inputs: - X: A numpy array of shape (num_train, D) containing the training data consisting of num_train samples each of dimension D. - y: A numpy array of shape (N,) containing the training labels, where y[i] is the label for X[i]. """ self.X_train = X self.y_train = y def predict(self,