k means教程
- 引入依赖
import numpy as np
import matplotlib.pyplot as plt
# 从sklearn中直接生成聚类数据
from sklearn.datasets.samples_generator import make_blobs
- 数据加载
x, y = make_blobs( n_samples=100, centers=6, random_state=1234, cluster_std=0.6 )
plt.figure(figsize=(6,6))
plt.scatter(x[:,0], x[:,1], c=y)
plt.show()
- 算法实现
# 引入scipy中的距离函数,默认欧式距离
from scipy.spatial.distance import cdist
class K_Means(object):
# 初始化,参数 n_clusters(K)、迭代次数max_iter、初始质心 centroids
def __init__(self, n_clusters=5, max_iter=300, centroids=[]):
self.n_clusters = n_clusters
self.max_iter = max_iter
self.centroids = np.array( centroids, dtype=np.f