【python】手动实现k-means算法

要求是不使用sk-learn等机器学习框架。

数据使用的是uci里的iris数据。
iris数据

# -coding: utf-8
import random
import matplotlib.pyplot as plt

f = open(r"C:\Users\65465\Documents\data\iris.txt")
line = f.readline()
data_list = []
while line:
    nume = list(map(str,line.split()))
    data_list.append(nume)
    line = f.readline()
f.close()

result = []
for element in data_list:
    for e in element:
        result.append(e)
ala = []
i = 0
for ele in result:
    strm = ele
    strm.split(",")
    ala.insert(i,strm.split(","))
    i += 1
data_value = []
for al in ala:
    for a in al:
        data_value.append(a)

num = 1
data1 = []
data2 = []
data3 = []
data4 = []
name = []
for k in range(len(data_value)-int(len(data_value) % 5)):
    if num == 1:
        data1.append(float(data_value[k]))
    elif num == 2:
        data2.append(float(data_value[k]))
    elif num == 3:
        data3.append(float(data_value[k]))
    elif num == 4:
        data4.append(float(data_value[k]))
    else:
        num = 0
        name.append(data_value[k])
    num +=1

def ran(data1, k):
    rand = []
    for i in range(0, k):
        rand.append(random.randint(0,len(data1)))
    return rand

def kmeans(k, data1, data2):
    change_bar = 1
    ch = []
    for i in range(6):
        ch.append(1)
    randint = ran(data1, k)
    sample1 = []
    sample2 = []
    for i in range(0,k):
        sample1.append(data1[randint[i]])
        sample2.append(data2[randint[i]])
    while change_bar == 1:
        mean = []
        for i in range(0,2*k):
            mean.append([])
        for i in range(0,2*k):
            if (i%2) == 0:
                mean[i].append(sample1[i//2])
            else:
                mean[i].append(sample2[i//2])
        for i in range(0,len(data1)):
            minim = 9999
            for j in range(0,k):
                dis = pow(pow(data1[i]-sample1[j],2)+pow(data2[i]-sample2[j],2),0.5)
                if dis < minim:
                    minim = dis
                    sort_s = j
            mean[sort_s].append(data1[i])
            mean[sort_s+k].append(data2[i])

        for i in range(0,k):
            if sample1[i] != sum(mean[i])/len(mean[i]):
                sample1[i] = sum(mean[i]) / len(mean[i])
                ch[i] = 1
            else:
                ch[i] = 0
            if sample2[i] != sum(mean[i+k])/len(mean[i+k]):
                sample2[i] = sum(mean[i+k])/len(mean[i+k])
                ch[i+k] = 1
            else:
                ch[i+k] = 0
        if ch[0] & ch[1] & ch[2] & ch[3] & ch[4] & ch[5]:
            change_bar = 0


    # 画图
    plt.scatter(mean[0], mean[3], c = 'red', marker = 'o' , label = 'Cluster1')
    plt.scatter(mean[1], mean[4], c = 'green', marker = '*' , label = 'Cluster2')
    plt.scatter(mean[2], mean[5], c = 'blue', marker = '+' , label = 'Cluster3')
    plt.xlabel('data1')
    plt.ylabel('data2')
    plt.legend()
    plt.show()



# 调用kmeans函数
k = 3              # 聚类簇数k
kmeans(k,data1,data2)

运行结果如图:k-means聚类结果

可以看出数据拟合情况较好。

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值