通过KNN查找离样本最近的邻居
from sklearn import datasets
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
iris = datasets.load_iris()
features = iris.data
standardizer = StandardScaler()
features_standardized = standardizer.fit_transform(features)
nearest_neighbors = NearestNeighbors(n_neighbors=2).fit(features_standardized)
new_observation = [1, 1, 1, 1]
distances, indices = nearest_neighbors.kneighbors([new_observation])
indices
distances
features_standardized[indices]
array([[[1.03800476, 0.55861082, 1.10378283, 1.18556721],
[0.79566902, 0.32841405, 0.76275827, 1.05393502]]])
nearestneighbors_euclidean = NearestNeighbors(
n_neighbors=2, metric='euclidean').fit(features_standardized)
distances
array([[0.49140089, 0.74294782]])
nearestneighbors_euclidean = NearestNeighbors(
n_neighbors=3, metric="euclidean").fit(features_standardized)
nearestneighbors_euclidean
nearest_neighbors_with_self = nearestneighbors_euclidean.kneighbors_graph(
features_standardized).toarray()
nearest_neighbors_with_self
list(enumerate(nearest_neighbors_with_self))
for i, x in enumerate(nearest_neighbors_with_self):
x[i] = 0
nearest_neighbors_with_self
nearest_neighbors_with_self[0]
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])