cluster.k-means_._labels_inertia_precompute_dense

def _labels_inertia_precompute_dense(X, x_squared_norms, centers, distances):

    """Compute labels and inertia using a full distance matrix.
    This will overwrite the 'distances' array in-place.

    Parameters
    ----------
    X : numpy array, shape (n_sample, n_features)
        Input data.
    x_squared_norms : numpy array, shape (n_samples,)
        Precomputed squared norms of X.
    centers : numpy array, shape (n_clusters, n_features)
        Cluster centers which data is assigned to.
    distances : numpy array, shape (n_samples,)
        Pre-allocated array in which distances are stored.

    Returns
    -------
    labels : numpy array, dtype=np.int, shape (n_samples,)
        Indices of clusters that samples are assigned to.
    inertia : float
        Sum of distances of samples to their closest cluster center.
    """

    # 样本数
	n_samples = X.shape[0]
	# 中心点数
    k = centers.shape[0]
    # 计算样本点到各中心的距离。返回shape(k,n_samples)
	all_distances = euclidean_distances(centers, X, x_squared_norms,
                                        squared=True)
	# 初始化labels
    labels = np.empty(n_samples, dtype=np.int32)
	# 赋值-1
    labels.fill(-1)
	# 初始化mindist,赋值inf。
    mindist = np.empty(n_samples)
    mindist.fill(np.infty)
	# 对每个中心点:
    for center_id in range(k):
		# 取样本点到该中心点的距离,赋值dist。
        dist = all_distances[center_id]
		# 距离比历史距离小,则更新其labels为center_id。
        labels[dist < mindist] = center_id
		# 更新mindist为dist与mindist中较小的一个。
        mindist = np.minimum(dist, mindist)
	
	# 检查维度。
    if n_samples == distances.shape[0]:
        # distances will be changed in-place
		# 更新distances
        distances[:] = mindist
	# 计算inertia。
    inertia = mindist.sum()
    return labels, inertia

函数:euclidean_distances()



转载于:https://my.oschina.net/u/2336323/blog/479506

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
import time import numpy as np import matplotlib.pyplot as plt from sklearn.cluster import MiniBatchKMeans, KMeans from sklearn.metrics.pairwise import pairwise_distances_argmin from sklearn.datasets import make_blobs # Generate sample data np.random.seed(0) batch_size = 45 centers = [[1, 1], [-1, -1], [1, -1]] n_clusters = len(centers) X, labels_true = make_blobs(n_samples=3000, centers=centers, cluster_std=0.7) # Compute clustering with Means k_means = KMeans(init='k-means++', n_clusters=3, n_init=10) t0 = time.time() k_means.fit(X) t_batch = time.time() - t0 # Compute clustering with MiniBatchKMeans mbk = MiniBatchKMeans(init='k-means++', n_clusters=3, batch_size=batch_size, n_init=10, max_no_improvement=10, verbose=0) t0 = time.time() mbk.fit(X) t_mini_batch = time.time() - t0 # Plot result fig = plt.figure(figsize=(8, 3)) fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9) colors = ['#4EACC5', '#FF9C34', '#4E9A06'] # We want to have the same colors for the same cluster from the # MiniBatchKMeans and the KMeans algorithm. Let's pair the cluster centers per # closest one. k_means_cluster_centers = k_means.cluster_centers_ order = pairwise_distances_argmin(k_means.cluster_centers_, mbk.cluster_centers_) mbk_means_cluster_centers = mbk.cluster_centers_[order] k_means_labels = pairwise_distances_argmin(X, k_means_cluster_centers) mbk_means_labels = pairwise_distances_argmin(X, mbk_means_cluster_centers) # KMeans for k, col in zip(range(n_clusters), colors): my_members = k_means_labels == k cluster_center = k_means_cluster_centers[k] plt.plot(X[my_members, 0], X[my_members, 1], 'w', markerfacecolor=col, marker='.') plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=6) plt.title('KMeans') plt.xticks(()) plt.yticks(()) plt.show() 这段代码每一句在干什么
最新发布
06-01

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值