基于python实现k-means聚类算法-矩阵运算
说明:本文仅仅用代码实现自己对Kmeans聚类算法理解。本次实现采用矩阵计算的方式实现kmeans聚类算法,算是对基于python实现k-means聚类算法的补充。
import numpy as np
import matplotlib.pyplot as plt
# # 计算Euclidean distance
def eucdistance(v1, v2):
# if v1.ndim == 1:
# v1 = v1.reshape(1, -1)
v1 = v1.reshape(1, -1) if v1.ndim == 1 else v1
v2 = (v2.reshape(1, -1) if v2.ndim == 1 else v2)
dist = np.sqrt(np.sum(np.power(v1 - v2, 2), -1))
return dist
def init_k_centers(x, k):
assert x.ndim == 2, "x's dim not is 2"
k_centers = x[0: k]
return k_centers
def k_means_v2(x_data, k=2, ITERS=20, showfig=True, colors=["g", "y", "b"]):
n, f_n = x_data.shape
last_labels = 0
results = {}
for iter in range(ITERS):
init_centers = init_k_centers(x_data, k=k)
data_kn = x_data[None, :, :].repeat(repeats=k, axis=0) # (n, f_n) -> (K, n, f_n)
kn_centers = init_centers[:, None, :].repeat(repeats=n, axis=1) # (k, f_n) -> (k, n, f_n)
dist = np.sqrt(np.power(data_kn - kn_centers, 2).sum(axis=2))
labels = dist.argmin(axis=0)
# 更新聚类中心
for i in range(k):
init_centers[i] = x_data[labels == i].mean(axis=0).reshape(-1, f_n)
x_data_k = x_data[np.array(labels) == i]
results[i] = x_data_k
if np.sum(labels != last_labels) == 0 or (iter == ITERS - 1):
return results
last_labels = labels
if showfig:
plt.ion()
plt.show()
plt.cla()
for key in set(labels):
k_samples = x_data[labels == key]
plt.scatter(k_samples[:, 0], k_samples[:, 1], color=colors[key], marker=".", s=8)
plt.scatter(init_centers[key][0], init_centers[key][1], color=colors[key], marker="*", s=15)
plt.text(1.5, 0.5, "iter=%.2i" % (iter))
plt.pause(0.1)
plt.ioff()
plt.show()
if __name__ == '__main__':
# x = np.random.randn(100, 2)
x_data = np.array([[3, 10], [2, 9], [1, 9], [3, 7], [4, 8], [3.5, 6], [9, 0.5],
[8, 3], [9, 2], [8, 1], [10, 0.5], [6.5, 2], [6.5, 4]])
# results = k_means_v1(x_data, k=2, ITERS=20, error_thred=1e-3, random_state=42, showfig=True, colors=["g", "b"])
result = k_means_v2(x_data, k=2, ITERS=20, showfig=True, colors=["g", "y", "b"])
print("*." * 20 + " k-means done " + "*." * 20)