import random
from kmeans_tools import Cluster, get_distance, gen_random_sample
import matplotlib.pyplot as plt
from matplotlib import colors as mcolors
def kmeans(samples, k, cutoff):
“”"
kmeans函数
“”"
# 随机选k个样本点作为初始聚类中心
init_samples = random.sample(samples, k)
# 创建k个聚类,聚类的中心分别为随机初始的样本点
clusters = [Cluster([sample]) for sample in init_samples]
# 迭代循环直到聚类划分稳定
n_loop = 0
while True:
# 初始化一组空列表用于存储每个聚类内的样本点
lists = [[] for _ in clusters]
# 开始迭代
n_loop += 1
# 遍历样本集中的每个样本
for sample in samples:
# 计算样本点sample和第一个聚类中心的距离
smallest_distance = get_distance(sample, clusters[0].centroid)
# 初始化属于聚类 0
cluster_index = 0
# 计算和其他聚类中心的距离
for i in range(k - 1):
# 计算样本点sample和聚类中心的距离
distance = get_distance(sample, clusters[i+1].centroid)
# 如果存在更小的距离,更新距离
if di