(小白的个人理解,很多地方可能不准确,欢迎大家指正,向大家学习)
#encoding = utf-8
"""
@version:??
@author: xq
@contact:xiaoq_xiaoq@163.com
@file: k_means.py
@time: 2017/10/18 15:56
"""
import warnings
import numpy as np
import scipy.sparse as sp
from sklearn.base import BaseEstimator, ClusterMixin, TransformerMixin
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.utils.extmath import row_norms, squared_norm, stable_cumsum
from sklearn.utils.sparsefuncs_fast import assign_rows_csr
from sklearn.utils import check_array
from sklearn.utils import check_random_state
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import FLOAT_DTYPES
from sklearn.cluster import k_means
class KMeans(BaseEstimator, ClusterMixin, TransformerMixin):
"""K-Means clustering
Read more in the :ref:`User Guide <k_means>`.
Examples
--------
>>> from sklearn.cluster import KMeans
>>> import numpy as np
>>> X = np.array([[1, 2], [1, 4], [1, 0],
... [4, 2], [4, 4], [4, 0]])
>>> kmeans = KMeans(n_clusters=2, random_state=0).fit(X)
>>> kmeans.labels_
array([0, 0, 0, 1, 1, 1], dtype=int32)
>>> kmeans.predict([[0, 0], [4, 4]])
array([0, 1], dtype=int32)
>>> kmeans.cluster_centers_
array([[ 1., 2.],
[ 4., 2.]])
"""
def __init__(self, n_clusters=8, init='k-means++', n_init=10,
max_iter=300, tol=1e-4, precompute_distances='auto',
verbose=0, random_state=None, copy_x=True,
n_jobs=1, algorithm='auto'):
self.n_clusters = n_clusters#集群个数
self.init = init#选择中心点的方式
self.max_iter = max_iter#算法每次迭代的最大次数
self.tol = tol#迭代的总次数
self.precompute_distances = precompute_distances#是否提前计算距离
self.n_init = n_init#用不同的中心点初始化值运行算法的次数
self.verbose = verbose