代码段:
import numpy as np
import matplotlib.pyplot as plt
max_iter = 200
# 获取数据
fp = open("exercise.txt")
data = []
for word in fp.readlines():
word = word.strip('\n').split(' ')
digit = list(map(float, word))
data.append(digit)
data = np.array(data)
x = data[:, 0]
y = data[:, 1]
# 初始化聚类质心
k = 4
center = np.zeros((k, 2), dtype=float)
for i in range(k):
center[i, 0] = x[i]
center[i, 1] = y[i]
n = len(x)
dis = np.zeros((n, k + 1), dtype=float)
for j in range(max_iter):
# 获取距离,并得到聚类结果,用dis[:4]中储存分类结果,下标表示
for i in range(n):
dis[i, 0] = np.sqrt((x[i] - center[0, 0]) ** 2 + (y[i] - center[0, 1]) ** 2)
dis[i, 1] = np.sqrt((x[i] - center[1, 0]) ** 2 + (y[i] - center[1, 1]) ** 2)
dis[i, 2] = np.sqrt((x[i] - center[2, 0]) ** 2 + (y[i] - center[2, 1]) ** 2)
dis[i, 3] = np.sqrt((x[i] - center[3, 0]) ** 2 + (y[i] - center[3, 1]) ** 2)
dis[i, 4] = np.argmin(dis[i, :4])
index1 = dis[:, 4] == 0
index2 = dis[:, 4] == 1
index3 = dis[:, 4] == 2
index4 = dis[:, 4] == 3
center1_new = np.array([x[index1].mean(), y[index1].mean()])
center2_new = np.array([x[index2].mean(), y[index2].mean()])
center3_new = np.array([x[index3].mean(), y[index3].mean()])
center4_new = np.array([x[index4].mean(), y[index4].mean()])
center[0, :] = center1_new
center[1, :] = center2_new
center[2, :] = center3_new
center[3, :] = center4_new
index1 = dis[:, 4] == 0
index2 = dis[:, 4] == 1
index3 = dis[:, 4] == 2
index4 = dis[:, 4] == 3
plt.scatter(x[index1], y[index1], color='r')
plt.scatter(x[index2], y[index2], color='b')
plt.scatter(x[index3], y[index3], color='g')
plt.scatter(x[index4], y[index4], color='y')
plt.show()
效果: