机器学习/深度学习入门:python实现聚类k-mean

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2018/3/25 14:07
# @Author  : HJH
# @Site    : 
# @File    : clauster.py
# @Software: PyCharm

import math
import random


class Clauster(object):
    def __init__(self,samples):
        if len(samples)==0:
            raise Exception('【错误】一个空的聚类')
        self.samples=samples
        self.n_dim=samples[0].n_dim

        for sample in samples:
            if sample.n_dim!= self.n_dim:
                raise Exception('【错误】聚类中样本点的个数不一致')
        self.centroid=self.cal_centroid()

    def __repr__(self):
        return str(self.samples)

    def update(self,samples):
        old_centroid=self.centroid
        self.samples=samples
        self.centroid=self.cal_centroid()
        shift=get_distance(old_centroid,self.centroid)
        return shift

    def cal_centroid(self):
        n_samples=len(self.samples)
        coords=[sample.coords for sample in self.samples]
        unzipped=zip(*coords)
        centroid_coords=[math.fsum(d_list)/n_samples for d_list in unzipped]
        return Sample(centroid_coords)






class Sample(object):
    def __init__(self,coords):
        self.coords=coords
        self.n_dim=len(coords)
    def __repr__(self):
        return str(self.coords)



def get_distance(a, b):
    if a.n_dim != b.n_dim:
        raise Exception('【错误】维度不同,不能计算距离')
    acc_diff = 0.0
    for i in range(a.n_dim):
        square = pow((a.coords[i] - b.coords[i]), 2)
        acc_diff += square
    distance = math.sqrt(acc_diff)
    return distance


def gen_random_sample(n_dim, lower, upper):
    sample = Sample([random.uniform(lower, upper) for _ in range(n_dim)])
    return sample
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2018/3/24 21:41
# @Author  : HJH
# @Site    : 
# @File    : main.py
# @Software: PyCharm

import random
import matplotlib.pyplot as plt
from matplotlib import colors as mcolors
from clauster import Clauster, get_distance, gen_random_sample


def kmeans(samples, k, threshold):
    init_sample = random.sample(samples, k)
    print(init_sample)
    # get_distance(1,2)
    clusters = [Clauster([sample]) for sample in init_sample]
    # print(clusters)

    n_loop = 0
    while True:
        list = [[] for _ in clusters]
        n_loop += 1
        for sample in samples:
            small_distance = get_distance(sample, clusters[0].centroid)

            cluster_index = 0

            for i in range(k - 1):
                distance = get_distance(sample, clusters[i + 1].centroid)
                if distance < small_distance:
                    small_distance = distance
                    cluster_index = i + 1

            list[cluster_index].append(sample)
        biggest_shift = 0.0
        for i in range(k):
            shift = clusters[i].update(list[i])
            biggest_shift = max(biggest_shift, shift)

        if biggest_shift < threshold:
            print('第{}次迭代后,聚类稳定'.format(n_loop))
            break
    return clusters


def run_main():
    n_samples = 1000
    # 特征维数
    n_feature = 2
    # 特征数范围
    lower = 0
    upper = 200
    # 聚类个数
    n_cluster = 3

    # 随机生成数据集
    samples = [gen_random_sample(n_feature, lower, upper) for _ in range(n_samples)]
    # print(len(samples),"--------------")

    threshold = 0.2

    clusters = kmeans(samples, n_cluster, threshold)
    for i, c in enumerate(clusters):
        for sample in c.samples:
            print('聚类---{},样本点---{}'.format(i, sample))

    plt.subplot()
    color_names = list(mcolors.cnames)
    for i, c in enumerate(clusters):
        x = []
        y = []
        # random.choice
        color = [color_names[i]] * len(c.samples)
        for sample in c.samples:
            x.append(sample.coords[0])
            y.append(sample.coords[1])
        plt.scatter(x, y, c=color)
    plt.show()


if __name__ == '__main__':
    run_main()

                
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值