Kmeans聚类的python实现

选择txt文件作为数据集进行测试

import numpy as np
import operator
import matplotlib.pyplot as plt
#求样本点和每个簇的中心的距离,并返回距离最小的样本点
def mindistance(X, Y):
    Dist = float('inf')
    k = 0
    for i in range(len(Y)):
        dist = np.linalg.norm(X - Y[i])
        if dist < Dist:
            k = i
            Dist = dist
    return k

def k_means(k, Cluster, examples, C):
    C = []
    for i in range(3):
        C.append([])
    for i in range(len(examples)):
        index = mindistance(examples[i], Cluster)
        C[index].append(examples[i])
        # 更新中心点
    for i in range(len(C)):
        sum = np.array([0.0, 0.0])
        for j in range(len(C[i])):
            sum += C[i][j]
        Cluster[i] = sum / len(C[i])
    flag = []
    for j in range(k):
        flag.append(Cluster[i])
    return C
def dataload(filename):
    X = []
    with open(filename) as f:
        fr = f.readlines()
        for line in fr:
            currLine = line.strip().split('	')
            linearr = []
            for i in range(2):
                linearr.append(float(currLine[i]))
            X.append(np.array(linearr))
    return X

dataSet = dataload('testSet.txt')
C = []
for i in range(3):
    C.append([])
Cluster = []
#以前三个样本点为初始样本中心点
for i in range(3):
    Cluster.append(dataSet[i])
C1 = k_means(3, Cluster, dataSet, C)
while(operator.eq(C1, k_means(3, Cluster, dataSet, C1)) == False):
    C1 = k_means(3, Cluster, dataSet, C1)
picture = []
for i in range(3):
    print('第' + str(i+1) +'类有' + str(len(C1[i])) + "个样本,分别是:")
    for j in range(len(C1[i])):
        print(C1[i][j])
        picture.append(np.array(C1[i][j]))
x_1,x_2,x_3 = [],[],[]
y_1,y_2,y_3 = [],[],[]
for i in range(len(C1[0])):
    x_1.append(picture[i][0])
    y_1.append(picture[i][1])
plt.scatter(x_1,y_1,color = 'r',label = 'first kind')
for i in range(len(C1[0]),len(C1[1])+len(C1[0])):
    x_2.append(picture[i][0])
    y_2.append(picture[i][1])
plt.scatter(x_2,y_2,color = 'y',label = 'second kind')
for i in range(len(C1[1])+len(C1[0]),len(C1[0])+len(C1[1])+len(C1[2])):
    x_3.append(picture[i][0])
    y_3.append(picture[i][1])
plt.scatter(x_3,y_3,color = 'b',label = 'third kind')
plt.legend()
plt.show()


数据集可以自己找,我这里是分成了三类

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值