KNN Algorithm implementation
reference: https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets
iris = datasets.load_iris()
print('data shape is {}'.format(iris.data.shape))
print('class shape is {}'.format(iris.target.shape))
X = iris.data[:, :2] # use first two version for simplicity
y = iris.target
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets
iris = datasets.load_iris()
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
K = 3
x = X[-1]
fig, ax = plt.subplots(figsize=(4,4))
for i, iris_class in enumerate(['Iris Setosa', 'Iris Versicolour', 'Iris Virginica']):
idx = y==i
ax.scatter(X[idx,0], X[idx,1],
c=cmap_bold.colors[i], edgecolor='k',
s=20, label=iris_class);
ax.set(xlabel='sepal length (cm)', ylabel='sepal width (cm)')
ax.legend()
# GRADED FUNCTION: DO NOT EDIT THIS LINE
def pairwise_distance_matrix(X, Y):
"""Compute the pairwise distance between rows of X and rows of Y
Arguments
----------
X: ndarray of size (N, D)
Y: ndarray of size (M, D)
Returns
--------
distance_matrix: matrix of shape (N, M), each entry distance_matrix[i,j] is the distance between
ith row of X and the jth row of Y (we use the dot product to compute the distance).
"""
if(len(Y.shape) == 1):
Y = Y.reshape(1, -1)
N, D = X.shape
M, _ = Y.shape
#distance_matrix = np.zeros((N, M)) # <-- EDIT THIS to compute the correct distance matrix.
xydiff = X[:, :, None] - Y[:, :, None].T
distance_matrix = np.sqrt(np.sum(xydiff**2, axis = 1))
return distance_matrix
# GRADED FUNCTION: DO NOT EDIT THIS LINE
def KNN(k, X, y, x):
"""K nearest neighbors
k: number of nearest neighbors
X: training input locations
y: training labels
x: test input
"""
N, D = X.shape
num_classes = len(np.unique(y))
#dist = np.zeros(N) # <-- EDIT THIS to compute the pairwise distance matrix
dist = pairwise_distance_matrix(X, x)# return Nx1 matrix and reshape to 1 dimension array
# Next we make the predictions
ypred = np.zeros(num_classes)
classes = y[np.argsort(dist, axis=0)][:k] # find the labels of the k nearest neighbors
for c in np.unique(classes):
ypred[c] = len(classes[classes == c]) # <-- EDIT THIS to compute the correct prediction