def _labels_inertia(X, x_squared_norms, centers,
precompute_distances=True, distances=None):
"""E step of the K-means EM algorithm.
Compute the labels and the inertia of the given samples and centers.
This will compute the distances in-place.
Parameters
----------
X: float64 array-like or CSR sparse matrix, shape (n_samples, n_features)
The input samples to assign to the labels.
x_squared_norms: array, shape (n_samples,)
Precomputed squared euclidean norm of each data point, to speed up
computations.
centers: float64 array, shape (k, n_features)
The cluster centers.
precompute_distances : boolean, default: True
Precompute distances (faster but takes more memory).
distances: float64 array, shape (n_samples,)
Pre-allocated array to be filled in with each sample's distance
to the closest center.
Returns
-------
labels: int array of shape(n)
The resulting assignment
inertia : float
Sum of distances of samples to their closest cluster center.
"""
# 获取样本数。
n_samples = X.shape[0]
# set the default value of centers to -1 to be able to detect any anomaly
# easily
# 初始化所属中心的值为-1.
labels = -np.ones(n_samples, np.int32)
# 如果distances为空,初始化。
if distances is None:
distances = np.zeros(shape=(0,), dtype=np.float64)
# distances will be changed in-place
# 稀疏矩阵计算方式,返回距离和,并更新labels。
if sp.issparse(X):
inertia = _k_means._assign_labels_csr(
X, x_squared_norms, centers, labels, distances=distances)
# dense矩阵计算方式,返回距离和,并更新labels。
else:
# 若需预先计算距离
if precompute_distances:
return _labels_inertia_precompute_dense(X, x_squared_norms,
centers, distances)
inertia = _k_means._assign_labels_array(
X, x_squared_norms, centers, labels, distances=distances)
return labels, inertia
函数 : _labels_insertia_precompute_dense()