python简单思维实现K-means

自己定义了几个点,随机选择初始中心,也可以random去选取

#!/usr/bin/python3
# -*- coding:utf-8 -*-
# Author:ChenYuan
class KMeans(object):

    def __init__(self):
        self._final_center = None
        self._final_result = None
        self._final_point = None

    @staticmethod
    def _call_distance(p1, p2, distance_function='cosine'):
        distance = 0
        if distance_function == 'cosine':
            distance = np.dot(p1, p2.T) / (np.linalg.norm(p1) * np.linalg.norm(p2))
        elif distance_function == 'euclidean':
            distance = np.linalg.norm(p1 - p2)

        return distance

    @staticmethod
    def _choice_random_center(X, k):
        random_index = []
        random_center = []
        while len(random_index) != k:
            index = int(len(X) * random.random())
            if index not in random_index:
                random_index.append(index)
        for index in random_index:
            random_center.append(X[index])

        return random_center

    @staticmethod
    def _check_stop(result, last_result):
        result = result.tolist()
        last_result = last_result.tolist()
        if sorted(result) == sorted(last_result):
            return False
        else:
            return True

    def fit(self, X, k=2, start_center=None, distance_function='cosine', verbose=0):
        if start_center is None:
            start_center = self._choice_random_center(X, k)
        print(start_center)
        label = ['Doc{}'.format(i+1) for i in range(len(X))]
        last_result = np.array([])
        result = np.array(start_center)
        count = 1
        # [array([0, 2, 4, 0, 2, 3, 0, 0]), array([1, 1, 0, 2, 0, 1, 1, 3])]
        final_result = None
        while self._check_stop(result, last_result):
            point_result = [[] for i in range(k)]
            clusters = [[] for i in range(k)]
            for index, point in enumerate(X):
                distance = [self._call_distance(point, center, distance_function) for center in result]
                clusters[distance.index(min(distance))].append(point.tolist())
                point_result[distance.index(min(distance))].append(label[index])
            temp_result = []
            for cluster in clusters:
                center = np.zeros(len(X[1]))
                for p in cluster:
                    center += p
                temp_result.append(center / len(cluster))
            last_result = result.copy()
            result = np.array(temp_result)
            if verbose != 0:
                print('第{}次循环的中心点为:{}'.format(count, result))
                print('第{}次循环的中心为:{}'.format(count, point_result))
                print('\n')
                count += 1
            final_result = point_result
            self._final_point = clusters
        print('最终的聚类结果为:')
        print(final_result)
        self._final_center = result
        self._final_result = final_result

    def predict(self, x):
        predict_result = []
        for data in x:
            distance = []
            for center in self._final_center:
                distance.append(self._call_distance(data, center))
            min_distance = min(distance)
            min_index = distance.index(min_distance)
            predict_result.append(min_index)
        return predict_result

    def evaluate(self):
        sse = 0
        for points, center in zip(self._final_point, self._final_center):
            for point in points:
                distance = self._call_distance(point, center)
                sse += distance
            sse /= len(points)
        sse /= len(self._final_center)
        print(sse)

这里提供一个例子,调用我写的类:

x = [[2, 0, 4, 3, 0, 1, 0, 100],
     [0, 2, 4, 0, 2, 3, 0, 0],
     [4, 0, 1, 3, 0, 1, 0, 1],
     [0, 1, 0, 2, 0, 0, 1, 0],
     [0, 0, 2, 0, 0, 4, 0, 0],
     [1, 1, 0, 2, 0, 1, 1, 3],
     [2, 1, 3, 4, 0, 2, 0, 2]]
x = np.array(x)
k = 2
km = KMeans()
km.fit(x, k=2, start_center=None, verbose=1)
km.evaluate()

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值