整体思路:
1、随机选择k个聚合点 A B
2、计算各点到聚合点的距离,选择最近的聚合点为一组, 如AC > BC 则 BC一组
3、根据没一组数据,计算平均值,作为该组的新的聚合点
4、重复2,3直到所有的聚合点不再发生变化。
缺点:
聚合结果与初始的聚合点相关,具体看图1,根据y=x直线进行的分类聚合,但是也可以像图2一样分类聚合成为两个圆;
# -*- encoding: utf-8 -*-
from matplotlib import pyplot as plt
import numpy as np
def cal_distance(dot1: list or tuple, dot2: list or tuple):
distance = 0
for i in range(len(dot1)):
distance += (dot1[i] - dot2[i]) ** 2
return distance ** 0.5
def cal_center(res: list):
x, y = 0, 0
for i in res:
x += i[0]
y += i[1]
return [x/len(res), y/len(res)]
def circle(x, y, r, color='r', count=100):
cicle_x=[]
cicle_y=[]
for i in range(count):
j = float(i)/count * 2 * np.pi
cicle_x.append(x+r*np.cos(j))
cicle_y.append(y+r*np.sin(j))
plt.plot(cicle_x, cicle_y, c=color)
def main(dots: list, k: int, kind: 1) -> list:
if kind == 1:
centers_new = dots[0: k]
else:
n = len(dots)
centers_new = dots[(n-k)//2: (n+k)//2]
centers = [[0, 0] for i in range(k)]
while centers_new != centers:
centers = centers_new
res = [[] for i in range(k)]
for dot in dots:
distances = [cal_distance(dot, center) for center in centers]
center_id = distances.index(min(distances))
res[center_id].append(dot)
centers_new = [cal_center(i) for i in res]
print(centers_new)
x = [dot[0] for dot in dots]
y = [dot[1] for dot in dots]
plt.plot(x, y, "ob")
r = [max([cal_distance(res[i][j], centers_new[i]) for j in range(len(res[i]))])for i in range(len(centers_new))]
for i in range(k):
circle(centers_new[i][0], centers_new[i][1], r[i])
plt.show()
return centers_new
if __name__ == "__main__":
s = [[0, 1], [1, 0], [0, -1], [-1, 0], [5, 4], [5, 6], [4, 5], [6, 5]]
res = main(s, 2, 1)
print(res)
s = [[0, 1], [1, 0], [0, -1], [-1, 0], [5, 4], [5, 6], [4, 5], [6, 5]]
res = main(s, 2, 2)
print(res)