简单实现k-means算法和non standard k-means方法。基本思想就在于找到一组center以后,对新的数据进行聚类,直到误差不再变化。
defdis(a,b):return ((a[0]-b[0])**2+(a[1]-b[1])**2)**(0.5)
defError(x,y):assert(len(x)==len(y))
z = (0,0)
m = mean(dis(a,b) for a,b in zip(x,y))
s = mean(dis(a,z)+dis(b,z) for a,b in zip(x,y))/2return m/s
defk_means(x,k=2,initial=None):
c = initial if initial else [x[i] for i in range(k)]
old_center = [(0,0)]* len(initial) if initial else k
while Error(old_center,c)>0.001:
cluster = defaultdict(list)
old_center = c.copy()
for point in x:
d,index = min((dis(center,point),i) for i,center in enumerate(c))
cluster[index].append((point))
for index,points in cluster.items():
c[index] = mean(p[0] for p in points),mean(p[1] for p in points)
return c
defbin_split(x,M):defsplit(centers):
new_center = []
for center in centers:
new_center.append((center[0]*(1+e),center[1]*(1+e)))
new_center.append((center[0]*(1-e),center[1]*(1-e)))
return new_center
m,e = 1,0.05
centers = [(mean(a[0] for a in x),mean(a[1] for a in x))]
while m<M:
centers = split(centers)
centers = k_means(x,initial = centers)
m<<=1return centers