K-means算法实现

该博客介绍了如何实现K-means聚类算法,并通过二分法进一步优化K均值聚类过程。作者展示了如何计算欧氏距离、随机生成初始中心以及如何迭代更新簇中心。最后,应用这些算法对'BuddyMove.csv'数据集进行了聚类,展示了聚类结果。
摘要由CSDN通过智能技术生成

K-means算法实现

# -*- coding: utf-8 -*-
"""
Created on Tue Apr 27 21:20:07 2021

@author: ***
"""

from numpy import *
from math import *
import csv

def dist(A,B):#欧式距离计算
    return sqrt(sum(power(A-B,2)))

def createR(dataMat,k):#随机生成中心
    n=shape(dataMat)[1]
    center = mat(zeros((k,n)))
    for j in range(n):
        minj = min(dataMat[:,j])
        maxj = max(dataMat[:,j])
        rangej =float(maxj-minj)
        print(maxj)
        center[:,j] = mat(minj + rangej*(random.rand(k,1)))
    return center

def Kmeans(dataMat,k,distF=dist,createF=createR):
    m = shape(dataMat)[0]   #获得行
    cluster = mat(zeros((m,2))) #存储簇分配结果
    center = createF(dataMat,k)
    print(center)
    flag = True
    while flag:
        flag = False
        for i in range(m):
            mind = float('inf')
            minj = -1
            for j in range(k):
                d = distF(dataMat[i,:],center[j,:])#计算距离
                if d < mind:
                    mind = d
                    minj = j
            if cluster[i,0] != minj:#是否簇分配存在改变
                cluster[i,:] = minj,mind
                flag = True
        #print(cluster[:,0])
        for centj in range(k):#重新计算中心
            ss=dataMat[nonzero(cluster[:,0]==centj)[0]]
            #print(ss)
            center[centj,:] = mean(ss,axis=0)
    return center,cluster

#二分法K均值聚类算法
def bitKmeans(dataSet,k,distF=dist):
    m = shape(dataSet)[0]
    cluster = mat(zeros((m,2)))
    center0 = mean(dataSet,axis=0).tolist()[0]
    centerL = [center0]
    for j in range(m):#获得初始中心簇的距离
        cluster[j,1] = distF(mat(center0),dataSet[j,:])**2
    while (len(centerL)<k) :#如果簇数小于要求簇数
        lowEss = float('inf')
        for i in range(len(centerL)):#依次遍历所有的簇进行划分
            indata = dataSet[nonzero(cluster[:,0]==i)[0],:]
            if indata.size==0:
                continue
            centerM,clusterM = Kmeans(indata,2)#利用Kmeans进行簇划分
            inlowEss = sum(clusterM[:,1])
            outlowEss = sum(cluster[nonzero(cluster[:,0]!=i)[0],1])
            if (inlowEss+outlowEss) < lowEss:#若误差小于当前最小误差,则更新最佳划分
                lowEss = inlowEss + outlowEss
                bestCenter = i
                bestcluster = clusterM.copy()
                bestCenters = centerM
            
        bestcluster[nonzero(bestcluster[:,0]==0)[0],0] = bestCenter
        bestcluster[nonzero(bestcluster[:,0]==1)[0],0] = len(centerL)
        #更新CenterL,和cluster
        centerL[bestCenter] = bestCenters[0,:].tolist()[0]
        centerL.append(bestCenters[1,:].tolist()[0])
        cluster[nonzero(cluster[:,0]==bestCenter)[0],:] = bestcluster
    return mat(centerL),cluster
        
            
    
datas=[]
with open('BuddyMove.csv') as f:#导入数据
    data = csv.reader(f)
    for row in data:
        arow = list(map(float,row))
        datas.append(arow)
    dat = mat(datas)
    [center,cluster]=bitKmeans(dat[:,1:3],6)
    print(transpose(cluster)[0,:])
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值