Python：正宗的密度峰值聚类

最新推荐文章于 2024-04-27 16:11:04 发布

DeniuHe

最新推荐文章于 2024-04-27 16:11:04 发布

阅读量2.1k

点赞数 1

分类专栏： Python学习算法

本文链接：https://blog.csdn.net/DeniuHe/article/details/103274125

版权

Python学习同时被 2 个专栏收录

239 篇文章 13 订阅

订阅专栏

算法

193 篇文章 2 订阅

订阅专栏

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist,squareform
from collections import OrderedDict
from itertools import combinations,product
from sklearn.cluster import SpectralClustering
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.semi_supervised import LabelPropagation
from sklearn import metrics
from sklearn import datasets
from sklearn.metrics import mean_squared_error,accuracy_score,mean_absolute_error,f1_score

def getDistCut(distList,distPercent):
    return max(distList) * distPercent / 100

def getRho(n,distMatrix,distCut):
    rho = np.zeros(n,dtype=float)
    for i in range(n-1):
        for j in range(i+1,n):
            rho[i] = rho[i] + np.exp(-(distMatrix[i, j] / distCut) ** 2)
            rho[j] = rho[j] + np.exp(-(distMatrix[i, j] / distCut) ** 2)
    return rho
#------------密度峰值聚类------------------#
def DPCA(n,distMatrix,rho,blockNum):
    rhoOrdIndex = np.flipud(np.argsort(rho))
    delta = np.zeros(n,dtype=float)
    leader = np.ones(n,dtype=int) * int(-1)
    '''获取密度最大样本的Delta和Leader'''
    maxdist = 0
    for ele in range(n):
        if distMatrix[rhoOrdIndex[0],ele] > maxdist:
            maxdist = distMatrix[rhoOrdIndex[0],ele]
    delta[rhoOrdIndex[0]] = maxdist
    '''获取非密度最大样本的Delta和Leader'''
    for i in range(1,n):
        mindist = np.inf
        minindex = -1
        for j in range(i):
            if distMatrix[rhoOrdIndex[i],rhoOrdIndex[j]] < mindist:
                mindist = distMatrix[rhoOrdIndex[i],rhoOrdIndex[j]]
                minindex = rhoOrdIndex[j]
        delta[rhoOrdIndex[i]] = mindist
        leader[rhoOrdIndex[i]] = minindex
    gamma = delta * rho
    gammaOrdIdx = np.flipud(np.argsort(gamma))
    '''开始聚类'''
    clusterIdx = np.ones(n,dtype=int) * (-1)
    #------初始化聚类中心-------#
    for k in range(blockNum):
        clusterIdx[gammaOrdIdx[k]] = k
    #------对中心点以外样本进行聚类-----------#
    for i in range(n):
        if clusterIdx[rhoOrdIndex[i]] == -1:
            clusterIdx[rhoOrdIndex[i]] = clusterIdx[leader[rhoOrdIndex[i]]]
    #-----------使用字典存储类簇----------------#
    clusterSet = OrderedDict()
    for k in range(blockNum):
        clusterSet[k] = []
    for i in range(n):
        clusterSet[clusterIdx[i]].append(i)
    return clusterSet

    X, y = datasets.make_blobs(n_samples=500, n_features=2, centers=3, cluster_std=[1, 1, 1], random_state=104)

    n = len(X)
    Gamma = 0.5
    distPercent = 5
    distList = pdist(X,metric='euclidean')
    distMatrix = squareform(distList)
    distCut = getDistCut(distList,distPercent)
    rho = getRho(n,distMatrix,distCut)
    blockNum = 3
    # clusterSet = DPCA(n,distMatrix,rho,blockNum)
    # clusterSet = K_means(n,X,blockNum)
    clusterSet = SpecClust(n,X,Gamma,blockNum)

DeniuHe

关注

1
点赞
踩
13

收藏

觉得还不错? 一键收藏
打赏
13
评论
Python：正宗的密度峰值聚类

import numpy as npimport pandas as pdimport matplotlib.pyplot as pltfrom scipy.spatial.distance import pdist,squareformfrom collections import OrderedDictfrom itertools import combinations,produ...
复制链接

扫一扫