cs231n knn作业-解释所用函数

作业的代码借鉴了https://zhuanlan.zhihu.com/p/28204173的代码。认真复习了很多函数的用法。这篇用来解释那些函数

print('Training data shape: ', X_train.shape)
print('Training labels shape: ', y_train.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)
Training data shape:  (50000, 32, 32, 3)
Training labels shape:  (50000,)
Test data shape:  (10000, 32, 32, 3)
Test labels shape:  (10000,)

先注意输出是四维数组和一维数组。

in[3]的函数

#enumerate  
lis = ["C", "S", "D", "N"]  
for index, value in enumerate(lis):    #获得index and  value
    print(index, value)
0 C
1 S
2 D
3 N
#np.arange
import numpy as np 
print(np.arange(-2, 4, 2)) #从-2到3步长为2
print(np.arange(5)) #0-5
[-2  0  2]
[0 1 2 3 4]
#np.flatnonzero  
print(np.flatnonzero(np.array([1, 3, 2, 4 ,3]) == 3)) #find 3' index
[1 4]
#numpy.random.choice(a, size=None, replace=True, p=None)   
#a为数字或者数字,size为纬度,replace选择是否重复 false不重复, p为分配a的概率
print(np.random.choice(5, 3)) #获得0-4中随机的值 
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
print(np.random.choice(classes, 7, replace=False)) #输出不重复的个值
[4 3 4]
['ship' 'plane' 'dog' 'horse' 'cat' 'deer' 'frog']
subplot(numRows, numCols, plotNum)
#分成子图, 行, 列, 放第x个
matplotlib.pyplot.imshow(X, cmap=None)
#X: 要绘制的图像或数组cmap: 颜色图谱(colormap), 默认绘制为RGB(A)颜色空间。
plt.axis('off') 
#不显示坐标尺寸
plt.title(cls) 
#加上标题

in[5]的函数

#numpy.reshape(a,newshape,order ='C')
#a为矩阵,newshape为更改的维度 比如三维(3,4,5) 可以是-1。在这种情况下,该值是从数组长度和剩余维度推断出来的。
a = np.arange(8)
print(np.reshape(a,(2,4)))
print(np.reshape(a,(4,-1)))
[[0 1 2 3]
 [4 5 6 7]]
[[0 1]
 [2 3]
 [4 5]
 [6 7]]

两次循环代码

def compute_distances_two_loops(self, X):
    num_test = X.shape[0] #行
    num_train = self.X_train.shape[0] #列
    dists = np.zeros((num_test, num_train)) #全0用来计算差值
    for i in range(num_test):
        for j in range(num_train):
            dists[i, j] = np.sqrt(np.sum((X[i,:] - self.X_train[j,:]) **2)) #两个循环,行与行相减平方根号
    return dists

一次循环代码

def compute_distances_one_loop(self, X):
    num_test = X.shape[0]
    num_train = self.X_train.shape[0]
    dists = np.zeros((num_test, num_train))
    for i in range(num_test):
        dists[i,:] = np.linalg.norm(X[i,:]-self.X_train,axis=1) #这里用np.linalg.norm计算范式和一次广播,顺便复习一下广播
    return dists

没有循环代码

def compute_distances_no_loops(self, X):
    num_test = X.shape[0]
    num_train = self.X_train.shape[0]
    dists = np.zeros((num_test, num_train)) 
    test_sum = np.sum(np.square(X), axis = 1) # 500*3072 - 500*1 以500,形式表示
    train_sum = np.sum(np.square(self.X_train), axis = 1) # 5000*3072 - 5000*1 以5000,形式表示
    dianji = np.dot(X, self.X_train.T)  #点积(转置)500*5000
    dists = np.sqrt(-2 * dianji + test_sum.reshape(-1,1) + train_sum) #平方展开,广播
    return dists
这里复习广播和sum函数

#广播
a = np.array([
    [1,2,3],
    [1,2,3]
])
b = np.array([1,2,3])
print('a.shape:',a.shape,'b.shape:', b.shape) #维度尾部对齐,是1就等于另一个(没有自动为1),看最后一不一样 如2,3和1,3就都是2,3
print('a + b:',a + b)
#sum
print('sum:',np.sum(a, axis = 0).shape, np.sum(a, axis = 1).shape) #行列sum会变成一维数组,比较重要
a.shape: (2, 3) b.shape: (3,)
a + b: [[2 4 6]
 [2 4 6]]
sum: (3,) (2,)

  def predict_labels(self, dists, k=1):
    num_test = dists.shape[0]  #结果的行
    y_pred = np.zeros(num_test) #这行全为0
    for i in range(num_test):
        closest_y = []
        y_index = np.argsort(dists[i,:], axis = 0)  #第i行的所有的排序 显示的是它们的index
        closest_y = self.y_train[y_index[:k]]     #前k个的索引值存入closest_y
        y_pred[i] = np.argmax(np.bincount(closest_y))  #找出哪个数出现最多次 计数排序
        #binocunt:假如[0, 1, 1, 3, 2, 1, 7]  bincount为[1, 3, 1, 1, 0, 0, 0, 1] 说明1出现最多次
        #argmax:找出值最大的索引 即为1
    return y_pred
分成5组
X_train_folds = np.array_split(X_train, num_folds)
y_train_folds = np.array_split(y_train, num_folds)

5次k投票的值存放

k_to_accuracies = {}
for k in k_choices:
    k_to_accuracies.setdefault(k,[]) #添加空值

交叉验证,之前都是用库,实在不太会写

for i in range(num_folds):  #5次
    classifier = KNearestNeighbor()
    x_val_train = np.vstack(X_train_folds[0:i]+X_train_folds[i+1:])  #交叉验证
    y_val_train = np.vstack(y_train_folds[0:i]+y_train_folds[i+1:])
    y_val_train = y_val_train[:,0] #变成一维(一行) 因为用到了bincount,要一维
    #个人理解:closest_y = self.y_train[y_index[:k]] 中self.y_train如果是二维的,closest_y也是二维,跟之前第一步提到的维度对应,不然会出现too many。
    #x : array_like, 1 dimension, nonnegative ints    文档中bincount的x数组的定义。
    classifier.train(x_val_train, y_val_train)
    for k in k_choices:  #k个投票
        y_val_pred = classifier.predict(X_train_folds[i],k = k)
        num_correct = np.sum(y_val_pred == y_train_folds[i][:,0])
        accuracy = float(num_correct) / len(y_val_pred)
        k_to_accuracies[k] = k_to_accuracies[k]+[accuracy]  #加到dict多个值
np.vstack()  竖直叠
np.hstack()  水平叠
#matlab会更方便
总之,矩阵一定要注意维度














  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值