理论部分 https://jkchen.blog.csdn.net/article/details/103334497
这个比神经网络简单多了,一下午就KO了
顺便一提,可视化过程很有意思,也学到了很多matplotlib.pyplot的知识
代码:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat, savemat
X = loadmat('data_K_Means_2.mat')['X']
# X=np.random.rand(1000).reshape(500,2)
m, n = X.shape
# plt.scatter(X[:,0],X[:,1])
# plt.show()
times = 10 # 运行次数
color = ['red', 'yellow', 'black', 'pink', 'orange', 'purple'] # 每个簇的颜色
K = 6 # 簇数量
Show = True # 是否可视化
# 两个数据集之间的距离平分
def Dis(i, j):
return np.sum(np.multiply(i - j, i - j))
miJ = 1e18
ansU = [] # 簇中心
ansC = [] # 每个点的簇下标
index = np.random.permutation(m)
if Show:
plt.ion()
for T in range(times):
# 随机取样本点作为中心点
subindex = np.random.choice(m, K)
u = np.mat(np.zeros([K, n]))
for i in range(K):
u[i, :] = X[subindex[i], :]
# 上一次的c
cP = np.zeros([m])
while True:
if Show:
plt.cla()
c = np.zeros([m], int) # 选择
ct = np.zeros([K]) # 每个簇的点数
sum = np.mat(np.zeros([K, n])) # 求平均值
# 找对应的c
for i in range(m):
mi = 1e18
for j in range(K):
dis = Dis(X[i], u[j])
if dis < mi:
mi = dis
c[i] = j
ct[c[i]] += 1
sum[c[i]] += X[i]
if Show:
plt.plot(X[i, 0], X[i, 1], 'o', color=color[c[i]])
# 找新的中心
for i in range(K):
if ct[i] > 0:
u[i] = sum[i] / ct[i]
# 画出中心
if Show:
for i in range(K):
plt.plot(u[i, 0], u[i, 1], 'X', color='green')
# 查看是否变化
change = np.sum(np.abs(cP - c))
cP = c
if change == 0:
J = 0 # 畸变函数
for i in range(m):
J += Dis(X[i], u[c[i]])
if J < miJ:
ansC = c
ansU = u
miJ = J
if Show:
name = 'Result_' + str(T) + '.png'
plt.title('Finished! J is ' + str(J))
print('save as :' + name)
plt.savefig(name)
plt.pause(1)
else:
print("Over!")
break
elif Show:
plt.pause(0.01)
savemat('K_Means_Answer_2.mat', mdict={'U': ansU, 'C': ansC, 'J': miJ})
if Show:
plt.ioff()
for i in range(m):
plt.plot(X[i, 0], X[i, 1], 'o', color=color[ansC[i]])
for i in range(K):
plt.plot(ansU[i, 0], ansU[i, 1], 'X', color='green')
plt.title('Result : J is ' + str(miJ))
name = 'Result Answer.png'
plt.savefig(name)
plt.show()
可视化效果: