K均值聚类前半部分代码
import random
import numpy
import matplotlib.pyplot as plt
#为方便测试,我直接把DataMatrix加载出来了,也可以直接文件导入你的数据集
#DataMatrix=numpy.loadtxt('正确路径')
DataMatrix=numpy.array([[-0.5200,1.8539],[2.5849,2.2481],[0.9919,1.9234],
[2.9443,3.7382],[-0.4240,3.6220],[1.7762,2.6264],
[2.0581,2.0918],[1.5754,1.1924],[1.7971,1.5387],
[0.4869,0.5940],[7.8736,7.6255],[8.1850,7.5291],
[9.3666,9.7513],[8.4139,8.7532],[10.5374,8.0650],
[9.1401,7.7072],[7.1372,8.0828],[8.5458,8.7662],
[8.3479,10.2368],[9.1033,8.3269],[3.7794,4.8633],
[3.7210,4.6794],[3.2663,4.5548],[3.9355,5.0016],
[2.5560,5.2594],[4.6123,4.0442],[2.6765,3.6859],
[3.3384,4.2267],
])
def EucDis(vec1,vec2):
'''
计算两个向量间的欧式距离并返回结果
'''
return numpy.sqrt(numpy.sum((vec1-vec2)**2))
def Scatter_2D(num,K,CenterMatrix,DataMatrix):
'''
画出分类结果的散点图
不同聚类结果用不同颜色实心圆表示
并且标出聚类中心
如果向量不是二维的,不可绘制
'''
dim=DataMatrix.shape[1]
if dim!=2:
print("数据必须为二维情况")
return
plt.figure
plt.xlabel('x1',size=20)
plt.ylabel('x2',size=20)
Color_cycle=['g','r','b','c','m','y','k','w']
for i in range(num):
plt.scatter(DataMatrix[i,0],DataMatrix[i,1],
s=50,c=Color_cycle[Clu_list[i]])
for j in range(K):
plt.scatter(CenterMatrix[j,0],CenterMatrix[j,1],s=100,marker='x',
c=Color_cycle[j],label='Center'+str(j+1))
plt.legend()
plt.show()