2018.1.10
def my_k_means(data_set, k):
m = np.shape(data_set)[0]
clusterAssment = np.mat(np.zeros((m, 2)))
centroids = randCent(data_set, k)
cluster_changed = True
while cluster_changed:
cluster_changed = False
for i in range(m):
mindist = np.inf
for j in range(k):
distj = distEclud(data_set[i, :], centroids[j, :])
if distj < mindist:
mindist = distj
minj = j
if clusterAssment[i, 0] != minj:
cluster_changed = True
clusterAssment[i, :] = minj, mindist**2
for cent in range(k):
data_cent = data_set[np.nonzero(clusterAssment[:, 0].A == cent)[0]]
centroids[cent, :] = np.mean(data_cent, axis=0)
return centroids, clusterAssment