def _labels_inertia_precompute_dense(X, x_squared_norms, centers, distances):
"""Compute labels and inertia using a full distance matrix.
This will overwrite the 'distances' array in-place.
Parameters
----------
X : numpy array, shape (n_sample, n_features)
Input data.
x_squared_norms : numpy array, shape (n_samples,)
Precomputed squared norms of X.
centers : numpy array, shape (n_clusters, n_features)
Cluster centers which data is assigned to.
distances : numpy array, shape (n_samples,)
Pre-allocated array in which distances are stored.
Returns
-------
labels : numpy array, dtype=np.int, shape (n_samples,)
Indices of clusters that samples are assigned to.
inertia : float
Sum of distances of samples to their closest cluster center.
"""
# 样本数
n_samples = X.shape[0]
# 中心点数
k = centers.shape[0]
# 计算样本点到各中心的距离。返回shape(k,n_samples)
all_distances = euclidean_distances(centers, X, x_squared_norms,
squared=True)
# 初始化labels
labels = np.empty(n_samples, dtype=np.int32)
# 赋值-1
labels.fill(-1)
# 初始化mindist,赋值inf。
mindist = np.empty(n_samples)
mindist.fill(np.infty)
# 对每个中心点:
for center_id in range(k):
# 取样本点到该中心点的距离,赋值dist。
dist = all_distances[center_id]
# 距离比历史距离小,则更新其labels为center_id。
labels[dist < mindist] = center_id
# 更新mindist为dist与mindist中较小的一个。
mindist = np.minimum(dist, mindist)
# 检查维度。
if n_samples == distances.shape[0]:
# distances will be changed in-place
# 更新distances
distances[:] = mindist
# 计算inertia。
inertia = mindist.sum()
return labels, inertia
转载于:https://my.oschina.net/u/2336323/blog/479506