k-Means算法实现

import numpy as np

# 计算欧式距离
def euclDistance(vector1, vector2):
    return np.sqrt(sum((vector2 - vector1) ** 2))


# 初始化质心
def initCentroids(dataSet, k):
    n_samples, n_feature = dataSet.shape
    # 初始化一个n_feature维数组记录每一个质心
    centroids = np.zeros((k, n_feature))
    # 随机选出k个质心
    for i in range(k):
        # 随机选出一个样本的索引
        index = int(np.random.uniform(0, n_samples))
        # 记录质心
        centroids[i, :] = dataSet[index, :]
    return centroids


def kmeans(dataSet, k):
    n_samples = dataSet.shape[0]
    # 一个二维数组,第一列记录样本所在的簇,第二列记录此样本到质心的距离
    samples_info = np.array(np.zeros((n_samples, 2)))
    cluster_changed = True

    # 初始化质心
    centroids = initCentroids(dataSet, k)

    while cluster_changed:
        cluster_changed = False
        # 遍历每一个样本
        for i in range(n_samples):
            # 样本到质心最小距离
            min_dist = np.inf
            # 样本所在的簇
            min_index = 0
            # 遍历每一个质心
            for j in range(k):
                distance = euclDistance(centroids[j, :], dataSet[i, :])
                if distance < min_dist:
                    min_dist = distance
                    samples_info[i, 1] = min_dist

            # 如果样本的所属的簇发生了变化
            if samples_info[i, 0] != min_index:
                cluster_changed = True
                samples_info[i, 0] = min_index
        # 更新质心
        for j in range(k):
            # 获取所在类别为j的簇内所有样本的索引
            cluster_index = np.nonzero(samples_info[:, 0] == j)
            # 类别为j的簇内所有样本
            temp = dataSet[cluster_index]
            # 计算质心
            centroids[j, :] = np.mean(temp, axis=0)

    return centroids, samples_info

if __name__ == '__main__':
    data = np.genfromtxt("kmeans.txt", delimiter=" ")
    centroids, samples_info = kmeans(data, k)
    print(centroids)
    print(samples_info)

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值