【一般聚类/时序聚类】python实现多密度自适应聚类:Multi-DBSCAN

本文代码基于该篇进行魔改,功能调用更加方便,速度经测试快了几十倍

import math
import copy
import numpy as np
from sklearn.cluster import DBSCAN
import sklearn.metrics.pairwise as pairwise


class Adapter_DBSCAN():

    # 默认统计聚类个数在2-25之间的聚类情况, 参数符合python左闭右开
    def __init__(self,num_cluster_range=(2,26)):
        self.num_cluster_range = num_cluster_range
    

    def returnEpsCandidate(self,dataSet):
        """
        :param dataSet: 数据集
        :return: eps候选集合
        """
        #self.DistMatrix = self.CalculateDistMatrix(dataSet)
        self.DistMatrix = pairwise.euclidean_distances(dataSet)
        tmp_matrix = copy.deepcopy(self.DistMatrix)
        for i in range(len(tmp_matrix)):
            tmp_matrix[i].sort()
        EpsCandidate = []
        for k in range(1,len(dataSet)):
            #Dk = self.returnDk(tmp_matrix,k)
            Dk = tmp_matrix[:,k]
            # DkAverage = self.returnDkAverage(Dk)
            # 快160+倍
            DkAverage = np.mean(Dk)
            EpsCandidate.append(DkAverage)
        return EpsCandidate
    

    def returnMinptsCandidate(self,DistMatrix,EpsCandidate,X):
        """
        :param DistMatrix: 距离矩阵
        :param EpsCandidate: Eps候选列表
        :return: Minpts候选列表
        """
        MinptsCandidate = []
        for k in range(len(EpsCandidate)):
            tmp_eps = EpsCandidate[k]
            tmp_count = 0
            # for i in range(len(DistMatrix)):
            #     for j in range(len(DistMatrix[i])):
            #         if DistMatrix[i][j] <= tmp_eps:
            #             tmp_count = tmp_count + 1
            # 快250+倍
            tmp_count = np.sum(DistMatrix <= tmp_eps)
            MinptsCandidate.append(tmp_count/len(X))
        return MinptsCandidate
    

    def fit(self,X):
        self.EpsCandidate = self.returnEpsCandidate(X)
        self.MinptsCandidate = self.returnMinptsCandidate(self.DistMatrix,self.EpsCandidate,X)
        self.do_multi_dbscan(X)
        self.set_num_clusters_range(self.num_cluster_range)


    def do_multi_dbscan(self,X):
        self.all_predict_dict = {}
        self.all_param_dict = {}

        for i in range(len(self.EpsCandidate)):
            eps = self.EpsCandidate[i]
            minpts = self.MinptsCandidate[i]
            db = DBSCAN(eps=eps,min_samples=minpts).fit(X)
            num_clusters = max(db.labels_) + 1
            # 统计符合范围的聚类情况

            if num_clusters not in self.all_predict_dict.keys():
                self.all_predict_dict[num_clusters] = []
                self.all_param_dict[num_clusters] = []

            self.all_predict_dict[num_clusters].append(db.labels_)
            self.all_param_dict[num_clusters].append({"eps":eps,"minpts":minpts})


    # 筛选聚类个数,比如Multi-DBSCAN共产生了3聚类、15聚类、136聚类三种情况
    # 我想只看10~20的聚类情况,就可以设置set_num_clusters_range(10~21)后调用get_predict_dict()
    def set_num_clusters_range(self,num_cluster_range:tuple):
        self.predict_dict = {}
        self.param_dict = {}
        # 统计符合范围的聚类情况

        for num_cluster, labels, params in zip(self.all_predict_dict.keys(),\
            self.all_predict_dict.values(), self.all_param_dict.values()):
            if num_cluster >= num_cluster_range
评论 53
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值