聚类西瓜

白羊杆菌

已于 2023-12-12 14:25:06 修改

阅读量425

点赞数 9

文章标签：聚类机器学习人工智能

于 2023-12-12 14:10:55 首次发布

本文链接：https://blog.csdn.net/weixin_64809211/article/details/134948196

版权

import numpy as np
import time


class KMeans:
    def __init__(self):
        return

    def kmeans(self, data, k):
        # input: data and k
        # output: clusters and nearest
        np.random.seed()
        n, m = len(data), len(data[0])  # n * m
        # initial clusters and nearest
        clusters = np.empty((k, m))
        nearest = np.empty((n))  # n
        # idxs = np.random.choice(n, k)
        idxs = [5, 11, 23]
        clusters = data[idxs]  # k * m
        # expand data and cluster to avoid for-loop
        data_expd = np.repeat(np.expand_dims(data, axis=1), k, axis=1)  # n * k * m

        while True:
            clusters_expd = np.repeat(np.expand_dims(clusters, axis=0), n, axis=0)  # n * k * m
            # cal dist between each sample and each cluster
            distances = np.sqrt(np.sum(np.power(data_expd - clusters_expd, 2), axis=2))  # n * k
            # cal cur nearest and cmp the last and the cur
            new_nearest = np.argmin(distances, axis=1)
            if (new_nearest == nearest).all():
                break
            else:
                # if need update, updata the clusters and the nearest
                nearest = new_nearest
                for i in range(k):
                    clusters[i] = np.mean(data[nearest == i], axis=0)
        return clusters, nearest


data = [[0.697, 0.460], [0.774, 0.376], [0.634, 0.264], [0.608, 0.318], [0.556, 0.215],
        [0.403, 0.237], [0.481, 0.149], [0.437, 0.211], [0.666, 0.091], [0.243, 0.267],
        [0.245, 0.057], [0.343, 0.099], [0.639, 0.161], [0.657, 0.198], [0.360, 0.370],
        [0.593, 0.042], [0.719, 0.103], [0.359, 0.188], [0.339, 0.241], [0.282, 0.257],
        [0.748, 0.232], [0.714, 0.346], [0.483, 0.312], [0.478, 0.437], [0.525, 0.369],
        [0.751, 0.489], [0.532, 0.472], [0.473, 0.376], [0.725, 0.445], [0.446, 0.459]]
data = np.array(data)


print("当K=3时：")
k = 3
KM = KMeans()
print("聚类结果为：")
print(KM.kmeans(data, 3))