Python:密度峰值聚类DPCA,分裂两簇(版本:1)

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from scipy.spatial.distance import pdist,squareform

def getDistCut(distList,distPercent):
    maxDist = max(distList)
    return maxDist * distPercent / 100

def getRho(n,distMatrix,distCut):
    rho = np.zeros(n,dtype=float)
    for i in range(n-1):
        for j in range(i+1,n):
            if distMatrix[i,j] < distCut:
                rho[i] += 1
                rho[j] += 1
    return rho

#############计算当前块的Gamma和Leader##################

def getinformationBlock(X,y,rho,distMatrix,Block):
    m = len(Block)
    blockRho = rho[Block]
    blockRhoOrdIndex = np.flipud(np.argsort(blockRho))
    blockDelta = np.zeros(m,dtype=float)
    blockLeader = np.ones(m,dtype=int) * (-1)
    #-------计算密度最大点的Delta和Leader-----------#
    maxdist = 0
    for ele in Block:
        if distMatrix[Block[blockRhoOrdIndex[0]],ele] > maxdist:
            maxdist = distMatrix[Block[blockRhoOrdIndex[0]],ele]
    blockDelta[blockRhoOrdIndex[0]] = maxdist    #密度最大点的距离
    blockLeader[blockRhoOrdIndex[0]] = -1
    # -------计算非密度最大点的Delta和Leader-----------#
    for i in range(1,m):
        mindist = np.inf
        minindex = -1
        for j in range(i):
            if distMatrix[Block[blockRhoOrdIndex[i]],Block[blockRhoOrdIndex[j]]] < mindist:
                mindist = distMatrix[Block[blockRhoOrdIndex[i]],Block[blockRhoOrdIndex[j]]]
                # minindex = Block[blockRhoOrdIndex[j]]
                minindex = blockRhoOrdIndex[j]
        blockDelta[blockRhoOrdIndex[i]] = mindist
        blockLeader[blockRhoOrdIndex[i]] = minindex   #存储的是索引,和正常的不一样
    #-------------计算块中样本的Gamma------------------#
    blockGamma = blockDelta * blockRho
    blockGammaOrdIndex = np.flipud(np.argsort(blockGamma))
    '''聚类部分:上面的Leader搞不好就极易出错'''
    #--------聚类:生成两个信息块-----------------------#
    # --------给聚类中心分配簇标签----------------------#
    clusterIndex = np.ones(m,dtype=int) * (-1)
    for i in range(2):
        clusterIndex[blockGammaOrdIndex[i]] = i
    for i in range(1,m):
        if clusterIndex[blockRhoOrdIndex[i]] == -1:
            clusterIndex[blockRhoOrdIndex[i]] = clusterIndex[blockLeader[blockRhoOrdIndex[i]]]
    # --------检验:有问题则抛个异常---------------#
    if len(set(clusterIndex)) != 2:
        print("密度峰值聚类环节出错了:类簇索引不是两个:", set(clusterIndex))
    leftBlock = []
    rightBlock = []
    for i in range(m):
        if clusterIndex[i] == 0:
            leftBlock.append(Block[i])
        elif clusterIndex[i] == 1:
            rightBlock.append(Block[i])
        elif clusterIndex[i] == -1:
            print("问题警告:还没有聚完:有样本类簇标号为-1")
        else:
            print("问题警告:有{-1,0,1}以外的类簇标号")
    return leftBlock,rightBlock

if __name__ == "__main__":
    X, y = datasets.make_blobs(n_samples=500, n_features=2, centers=3, cluster_std=[1.0, 1.0, 1.0], random_state=100)
    n = len(X)
    distPercent = 2
    blockNum = 2
    distList = pdist(X,metric='cityblock')
    distMatrix = squareform(distList)
    distCut = getDistCut(distList,distPercent)
    rho = getRho(n,distMatrix,distCut)
    currentBlock = [i for i in range(n)]
    leftBlock, rightBlock = getinformationBlock(X,y,rho,distMatrix,currentBlock)

    A = X[leftBlock]
    B = X[rightBlock]
    print("A块的长度:",len(A),"B块的长度:",len(B))
    plt.scatter(A[:,0],A[:,1],marker='+')
    plt.scatter(B[:,0],B[:,1],marker='o')
    plt.show()

    ll,rr = getinformationBlock(X,y,rho,distMatrix,leftBlock)
    C = X[ll]
    D = X[rr]
    plt.scatter(B[:, 0], B[:, 1], marker='o')
    plt.scatter(C[:, 0], C[:, 1], marker='*')
    plt.scatter(D[:, 0], D[:, 1], marker='+')
    plt.show()

非常规写法,读者慎用!

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

DeniuHe

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值