import numpy as np
class KNearestNeighbor(object):
""" a kNN classifier with L2 distance """
def __init__(self):
pass
def train(self, X, y):
self.X_train = X
self.y_train = y
def predict(self, X, k=1, num_loops=0):
if num_loops == 0:
dists = self.compute_distances_no_loops(X)
elif num_loops == 1:
dists = self.compute_distances_one_loop(X)
elif num_loops == 2:
dists = self.compute_distances_two_loops(X)
else:
raise ValueError('Invalid value %d for num_loops' % num_loops)
return self.predict_labels(dists, k=k)
def compute_distances_two_loops(self, X):
num_test = X.shape[0]
num_train = self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
for i in range(num_test):
for j in range(num_train):
dist = np.sqrt(np.sum(np.square(self.X_train[j] - X[i]))) #l2 distance
# dist = np.sum(abs(self.X_train[j]-X[i])) #l1 distance
dists[i][j] = dist
return dists
def compute_distances_one_loop(self, X):
num_test = X.shape[0]
num_train = self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
# print(self.X_train.T.shape)
# print(X.shape)
for i in range(num_test):
dists[i] = np.sqrt(np.sum(np.square(self.X_train-X[i]),axis = 1))
return dists
def compute_distances_no_loops(self, X):
num_test = X.shape[0]
num_train = self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
a2 = np.sum(np.square(self.X_train),axis=1)
b2 = np.sum(np.square(X),axis=1,keepdims=True)
f2ab = np.multiply(np.dot(X,self.X_train.T),-2)
dists = np.add(np.add(a2,f2ab),b2)
dists = np.sqrt(dists)
return dists
def predict_labels(self, dists, k=1):
num_test = dists.shape[0]
y_pred = np.zeros(num_test)
for i in range(num_test):
# A list of length k storing the labels of the k nearest neighbors to
# the ith test point.
closest_y = []
closest_y = self.y_train[np.argsort(dists[i])[:k]]
y_pred[i] = np.argmax(np.bincount(closest_y))
return y_pred