Python:获取K-Means中心点最近的样本

import numpy as np
import pandas as pd
from sklearn import datasets as DS
import matplotlib.pyplot as plt


def euclideanDist(A, B):
    return np.sqrt(sum((A - B) ** 2))
def RandomCenters(dataSet, k):
    n = dataSet.shape[0]
    centerIndex = np.random.choice(range(n), size=k, replace=False)
    centers = dataSet[centerIndex]
    return centers
def KMeans(dataSet, k):
    Centers = RandomCenters(dataSet, k)
    n, m = dataSet.shape
    DistMatrix = np.zeros((n, 2))  #n*2的矩阵用于存储 类簇索引
    centerChanged = True
    while centerChanged == True:
        centerChanged = False
        for i in range(n):
            minDist = np.inf
            minIndex = -1
            for j in range(k):
                dist = euclideanDist(dataSet[i, :], Centers[j, :])
                if dist < minDist:    #获取每个样本聚类最近的聚类中心点及其聚类
                    minDist = dist
                    minIndex = j
            if DistMatrix[i, 0] != minIndex:
                centerChanged = True
            DistMatrix[i, 0] = minIndex   #存储的是索引
            DistMatrix[i, 1] = minDist    #存储的是距离
        if centerChanged == True:  # 如何聚类中心有变化,那么接下来就要更新聚类中心
            for i in range(k):
                dataMean = dataSet[DistMatrix[:, 0] == i]  # dataMean中是相同类簇的样本
                Centers[i] = np.mean(dataMean, axis=0)
    return Centers, DistMatrix

def PointSelection(DistMatrix,k,n):
    points = []
    for i in range(k):
        minDist = np.inf
        closeIndex = -1
        for j in range(n):
            if DistMatrix[j,0] == i:
                if DistMatrix[j,1] < minDist:
                    minDist = DistMatrix[j,1]
                    closeIndex = j
        points.append(closeIndex)
    return points

if __name__ == "__main__":
    path = r"D:\dataset\clusterData\bolbs_1.csv"
    Data = np.array(pd.read_csv(path, header=None))
    X = Data[:, :2]
    n = len(X)
    k = 2
    Center, DistMat = KMeans(X, k)
    Points = PointSelection(DistMat,k,n)
    plt.scatter(X[:,0],X[:,1], c=DistMat[:,0] )
    CP = X[Points]
    plt.scatter(CP[:,0],CP[:,1],marker="*",s=200)
    plt.show()

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

DeniuHe

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值