CS231N assignment 1 KNN 代码+注释

一、k_nearest_neighbor.py 部分:

1.两层循环

def compute_distances_two_loops(self, X):
        """
        Compute the distance between each test point in X and each training point
        in self.X_train using a nested loop over both the training data and the
        test data.

        Inputs:
        - X: A numpy array of shape (num_test, D) containing test data.

        Returns:
        - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
          is the Euclidean distance between the ith test point and the jth training
          point.
        """
        num_test = X.shape[0]        #X_train 5000,3072  X_test 500 3072
        num_train = self.X_train.shape[0]
        dists = np.zeros((num_test, num_train))
        for i in range(num_test):
            for j in range(num_train):
                #####################################################################
                # TODO:                                                             #
                # Compute the l2 distance between the ith test point and the jth    #
                # training point, and store the result in dists[i, j]. You should   #
                # not use a loop over dimension, nor use np.linalg.norm().          #
                #####################################################################
                # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

                 dists[i,j]=np.sqrt(np.sum(np.square(self.X_train[j]-X[i])))
                 #dists[i][j] = np.linalg.norm(X[i] - self.X_train[j])
        
                # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        return dists

2.一层循环

def compute_distances_one_loop(self, X):
        """
        Compute the distance between each test point in X and each training point
        in self.X_train using a single loop over the test data.

        Input / Output: Same as compute_distances_two_loops
        """
        num_test = X.shape[0]           #X_train 5000,3072  X_test 500 3072
        num_train = self.X_train.shape[0]
        dists = np.zeros((num_test, num_train))
        for i in range(num_test):
            #######################################################################
            # TODO:                                                               #
            # Compute the l2 distance between the ith test point and all training #
            # points, and store the result in dists[i, :].                        #
            # Do not use np.linalg.norm().                                        #
            #######################################################################
            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

            dists[i,:]=np.sqrt(np.sum(np.square(X[i]-self.X_train),axis=1))   
            
            #dists[i,:] = np.sqrt(np.sum(np.square(X[i]-self.X_train),axis=1)).T#dists :500*5000  为什么不需要转置?

            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        return dists

3.不用循环

def compute_distances_no_loops(self, X):
        """
        Compute the distance between each test point in X and each training point
        in self.X_train using no explicit loops.

        Input / Output: Same as compute_distances_two_loops
        """
        num_test = X.shape[0]
        num_train = self.X_train.shape[0]
        dists = np.zeros((num_test, num_train))
        #########################################################################
        # TODO:                                                                 #
        # Compute the l2 distance between all test points and all training      #
        # points without using any explicit loops, and store the result in      #
        # dists.                                                                #
        #                                                                       #
        # You should implement this function using only basic array operations; #
        # in particular you should not use functions from scipy,                #
        # nor use np.linalg.norm().                                             #
        #                                                                       #
        # HINT: Try to formulate the l2 distance using matrix multiplication    #
        #       and two broadcast sums.                                         #
        #########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

        dists=np.sqrt(-2*X.dot(self.X_train.T)+np.sum(np.square(X),axis=1).reshape(-1,1)
                                                 +np.sum(np.square(self.X_train),axis=1))
            #np.sum()函数,无论axis取何值,得到的都是一个行向量,故需要转置,
         # dists = np.sqrt(np.sum(np.square(X),axis=1)+np.sum(np.square(X_train),axis=1)-2*X.dot(self.X_train.T)
        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        return dists

二、knn.ipynb 部分:

交叉验证

num_folds = 5
k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]

X_train_folds = []
y_train_folds = []
################################################################################
# TODO:                                                                        #
# Split up the training data into folds. After splitting, X_train_folds and    #
# y_train_folds should each be lists of length num_folds, where                #
# y_train_folds[i] is the label vector for the points in X_train_folds[i].     #
# Hint: Look up the numpy array_split function.                                #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
y_train=y_train.reshape(-1,1)
X_train_folds=np.array_split(X_train,num_folds)
y_train_folds=np.array_split(y_train,num_folds)

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

# A dictionary holding the accuracies for different values of k that we find
# when running cross-validation. After running cross-validation,
# k_to_accuracies[k] should be a list of length num_folds giving the different
# accuracy values that we found when using that value of k.
k_to_accuracies = {}

for k in k_choices:
     k_to_accuracies.setdefault(k,[])


################################################################################
# TODO:                                                                        #
# Perform k-fold cross validation to find the best value of k. For each        #
# possible value of k, run the k-nearest-neighbor algorithm num_folds times,   #
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

for i in range(num_folds):
    classifier = KNearestNeighbor()
    x_val_train = np.vstack(X_train_folds[0:i]+X_train_folds[i+1:]) #挑选[0,i-1]U[i+1,num_folds-1]作为训练集,第i项作为测试集
    y_val_train = np.vstack(y_train_folds[0:i]+y_train_folds[i+1:]) 
    y_val_train=y_val_train[:,0]               #将(4000,1)变为(4000,)
    classifier.train(x_val_train, y_val_train)
    dists = classifier.compute_distances_no_loops(X_train_folds[i])
    
    for k in k_choices:
        y_val_pred = classifier.predict_labels(dists, k=k)
        num_correct = np.sum(y_val_pred == y_train_folds[i][:,0])   #y_train_folds[i]shape:(1000,1)  y_train_folds[i][:,0]shape:(1000,)
        accuracy = float(num_correct) / len(y_val_pred)
        k_to_accuracies[k]=k_to_accuracies[k]+[accuracy]    #字典赋值,每个k对应一个accuracy


# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

#Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值