层次聚类算法_ML之HierarchicalClustering:自定义Hierarchical层次聚类算法

ML之HierarchicalClustering:自定义HierarchicalClustering层次聚类算法

目录

输出结果

实现代码


输出结果

更新……

实现代码

# -*- encoding=utf-8 -*-

from numpy import *

class cluster_node:  #定义cluster_node类,类似Java中的构造函数
    def __init__(self,vec,left=None,right=None,distance=0.0,id=None,count=1):
        self.left=left  
        self.right=right
        self.vec=vec
        self.id=id
        self.distance=distance
        self.count=count #only used for weighted average 

def L2dist(v1,v2):  
    return sqrt(sum((v1-v2)**2))
    
def L1dist(v1,v2):  
    return sum(abs(v1-v2))

def hcluster(features,distance=L2dist): 
    #cluster the rows of the "features" matrix
    distances={}     
    currentclustid=-1 

    # clusters are initially just the individual rows
    clust=[cluster_node(array(features[i]),id=i) for i in range(len(features))]


    while len(clust)>1:  
        lowestpair=(0,1) 
        closest=distance(clust[0].vec,clust[1].vec)
    
        for i in range(len(clust)):
            for j in range(i+1,len(clust)):
                # distances is the cache of distance calculations
                if (clust[i].id,clust[j].id) not in distances: 
                    distances[(clust[i].id,clust[j].id)]=distance(clust[i].vec,clust[j].vec)
        
                d=distances[(clust[i].id,clust[j].id)]  
        
                if d<closest:  
                    closest=d
                    lowestpair=(i,j) 
        
        mergevec=[(clust[lowestpair[0]].vec[i]+clust[lowestpair[1]].vec[i])/2.0 
            for i in range(len(clust[0].vec))]
        
        newcluster=cluster_node(array(mergevec),left=clust[lowestpair[0]],
                             right=clust[lowestpair[1]],
                             distance=closest,id=currentclustid)
        
        currentclustid-=1  
        del clust[lowestpair[1]]
        del clust[lowestpair[0]]
        clust.append(newcluster)

    return clust[0]

def extract_clusters(clust,dist):  #(clust上边的树形结构,dist阈值)
    # extract list of sub-tree clusters from hcluster tree with distance<dist
    clusters = {}
    if clust.distance<dist:
        # we have found a cluster subtree
        return [clust] 
    else:
        # check the right and left branches
        cl = []   
        cr = []
        if clust.left!=None:  
            cl = extract_clusters(clust.left,dist=dist)
        if clust.right!=None: 
            cr = extract_clusters(clust.right,dist=dist)
        return cl+cr  
        
def get_cluster_elements(clust):  #用于取出算好聚类的元素
    # return ids for elements in a cluster sub-tree
    if clust.id>=0:  
        # positive id means that this is a leaf
        return [clust.id]
    else:
        # check the right and left branches
        cl = []
        cr = []
        if clust.left!=None: 
            cl = get_cluster_elements(clust.left)
        if clust.right!=None: 
            cr = get_cluster_elements(clust.right)
        return cl+cr


def printclust(clust,labels=None,n=0):  #将值打印出来
    # indent to make a hierarchy layout
    for i in range(n): print (' '),
    if clust.id<0: 
        # negative id means that this is branch
        print ('-')
    else:           
        # positive id means that this is an endpoint
        if labels==None: print (clust.id)
        else: print (labels[clust.id])
    
    if clust.left!=None: printclust(clust.left,labels=labels,n=n+1)
    if clust.right!=None: printclust(clust.right,labels=labels,n=n+1)



def getheight(clust):  #树的高度,递归方法
    # Is this an endpoint? Then the height is just 1
    if clust.left==None and clust.right==None: return 1
    
    # Otherwise the height is the same of the heights of
    # each branch
    return getheight(clust.left)+getheight(clust.right)

def getdepth(clust):   #树的深度,递归方法
    if clust.left==None and clust.right==None: return 0
    
    return max(getdepth(clust.left),getdepth(clust.right))+clust.distance
      
      

6f46f41188f476a6ef68f8839f025020.png

相关文章
ML之H-clustering:自定义HierarchicalClustering层次聚类算法

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值