仿制matlab的kmeans聚类 http://blog.csdn.net/nbu2004/article/details/43084999注意 data 是list类型,但二维array类型也没问题。比起MATLAB,Python只能更简单,即使没有MATLAB矩阵表达式。
# -*- coding: utf-8 -*-
# import copy
import numpy as np
import numpy.linalg as LA
def kmeans(data, K, dist=lambda x,y: LA.norm(x-y), tol=1e-8):
'''
arguments:
data: list of data
K: int, numbers of classes
dist: distance
return: kls, ctr
example:'''
ctr = data[:K]
N = len(data)
while True:
# Assignment step
kls = [[] for _ in range(K)] # K clusters
for dt in data:
d = dist(dt, ctr[0])
ind = 0
for k, ck in enumerate(ctr[1:], 1):
d0 = dist(dt, ck)
if d0 < d:
ind = k
d = d0
kls[ind].append(dt) # data l is in ind-th cluster
# Update step
S = 0
newctr = []
for k in range(K): # calculate new centroids
ck = np.mean(np.array(kls[k]), 0)
S += dist(ck, ctr[k])
newctr.append(ck)
if S <= tol:
break
else:
ctr = newctr # update the centroids
return kls, ctr
# test
data = np.random.rand(100,2)
K = 4
kls, ctr = kmeans(data, K)
clr=['r','k','g','b']
import matplotlib.pyplot as plt
for k in range(K):
for kk in kls[k]:
plt.plot(kk[0], kk[1], clr[k]+'o')
plt.plot(ctr[k][0], ctr[k][1], clr[k]+'+')
plt.show()