重叠社区发现-LFM算法

最新推荐文章于 2022-04-04 11:00:40 发布

锦堇年

最新推荐文章于 2022-04-04 11:00:40 发布

阅读量1w

点赞数 7

分类专栏：社区发现

本文链接：https://blog.csdn.net/qjc937044867/article/details/51290170

版权

社区发现专栏收录该内容

4 篇文章 3 订阅

订阅专栏

#coding=utf-8
from numpy import *
#文件读取
def LoadAdjacentMatrixData(filename,vertices):
    Adjmartrix = [[0 for col in range(vertices)] for row in range(vertices)]
    file_object = open(filename, 'r')
    for x, line in enumerate(file_object):
        line=line.strip()
        t_list = line.split('\t')
        for y in range(len(t_list)):
            Adjmartrix[x][y] = int(t_list[y])   
    #Adjmartrix = mat(Adjmartrix)
    return Adjmartrix
#获取队列
def Degree_Sorting(Adjmartrix,vertices):
    degree_s = [[i,0] for i in range(vertices)]
    neighbours = [[] for i in range(vertices)]
    sums = 0
    for i in range(vertices):
        for j in range(vertices):
            if Adjmartrix[i][j] == 1:
                degree_s[i][1] += 1
                sums += 1
                neighbours[i].append(j)
                #degree_s = sorted(degree_s, key=lambda x: x[1], reverse=True)
    return degree_s,neighbours,sums/2 
#获取graph的所有邻居节点
def get_allneighbours(coms,neighbours):
    ne = []
    for each in coms:
        for eachne in neighbours[each]:
            if eachne not in ne and eachne not in coms:
                ne.append(eachne)
    return ne
#获取所有graph邻居加入后的F值列表
def get_allfitness(A,coms,neighbours,Func='F'):
    nel = get_allneighbours(coms,neighbours)
    #print 'coms',coms,'neigh',nel
    nelf = []
    if nel:
        if Func == 'Q':
            s,v = get_sumver(A,coms)
            fib = Modulartiy(A,coms,s,v)
        else:
            fib = get_Fitness(A, coms, neighbours)       
        for eachne in nel:
            coms.add(eachne)
            if Func == 'Q':
                s,v = get_sumver(A,coms)
                fia = Modulartiy(A,coms,s,v)
            else:
                fia = get_Fitness(A, coms, neighbours)
            fi = fia - fib
            #print 'coms',coms,fi
            nelf.append(fi)
            coms.remove(eachne)
    return nel,nelf
#清洗graph内部F小于0的点（返回F值列表）
def get_infitness(A,coms,neighbours,Func='F'):
    if Func == 'Q':
        s,v = get_sumver(A,coms)
        fia = Modulartiy(A,coms,s,v)
    else:
        fia = get_Fitness(A, coms, neighbours)
    for each in coms:
        coms.remove(each)
        if Func == 'Q':
            s,v = get_sumver(A,coms)
            fib = Modulartiy(A,coms,s,v)
        else:
            fib = get_Fitness(A, coms, neighbours)
        fi = fia - fib
        coms.add(each)
        if fi < 0:
            return each
    return -1
#迭代过程
def Propagate(A,coms,neighbours,Func='F'):
    nel, nelf = get_allfitness(A,coms,neighbours)
    #stops when the nodes all have negative fitness
    if max(nelf) >= 0:
        t = nel[nelf.index(max(nelf))]
        coms.add(t)
        #print 'r',coms
        while(1):
            negative = get_infitness(A,coms,neighbours)
            if negative != -1:
                coms.remove(negative)
                return Propagate(A,coms,neighbours)
            else:
                return Propagate(A,coms,neighbours)
    else:
        return coms
#获取自然子图结构，重叠节点
def get_naturalcoms(A, neighbours,vertices,Func='F'):
    nodes = [i for i in range(vertices)]
    graph = {}
    gnums = 0
    while(1):
        a = nodes[random.randint(len(nodes))]
        graph[gnums] = {a}
        print 'before:','graphid',gnums,',seed',list(graph[gnums])
        graph[gnums] = Propagate(A,graph[gnums],neighbours,Func)
        print 'after:',list(graph[gnums])
        for node in graph[gnums]:
            if node in nodes:
                nodes.remove(node)
        if len(nodes) == 0:
            return graph
        gnums += 1
    return graph
#获取graph的边数和节点数
def get_sumver(A,coms):
    s = 0
    v = len(coms)
    for i in coms:
        for j in coms:
            if Adjmartrix[i][j] == 1:
                s += 1
    return s/2,v
#Q函数
def Modulartiy(A, coms, sums,vertices):
    Q = 0.0
    for eachc in coms:
        li = 0
        for eachp in coms[eachc]:
            for eachq in coms[eachc]:
                li += A[eachp][eachq]
        li /= 2
        di = 0
        for eachp in coms[eachc]:
            for eachq in range(vertices):
                di += A[eachp][eachq]
        Q = Q + (li - (di * di) /(sums*4))
    Q = Q / float(sums)
    return Q
#F函数
def get_Fitness(A, coms, neighbours, alpha=1):
    #获得内部度数
    kin = 0
    for eachp in coms:
        for eachq in coms:
            kin += A[eachp][eachq]
    #获得外部度数
    kout = 0
    #只算边数，没考虑连接的外部节点数
    for eachp in coms:
        for each in neighbours[eachp]:
            if each not in coms:
                kout += 1
    fitness = pow((kin + kout),alpha)
    fitness = kin / float(fitness)
    return fitness
#主函数
if __name__ == '__main__':
     #节点个数,V
    vertices = [34,115,105,62]
    txtlist = ['karate.txt','football.txt','books.txt','dolphins.txt']
    #vertices = [64,128,256,512]
    #txtlist = ['RN1.txt','RN2.txt','RN3.txt','RN4.txt']
    testv = [1,2,3,4,5]
    #testv = [i+1 for i in range(34)]
    for i in range(len(txtlist)):
        print txtlist[i],vertices[i]    
        A = LoadAdjacentMatrixData(txtlist[i],vertices[i])
        degree_s, neighbours, sums = Degree_Sorting(A, vertices[i])
        graph = get_naturalcoms(A,neighbours,vertices[i],Func='Q')
        #获得重叠节点
        prenode = {}
        for p in graph:
            for q in graph:
                if p != q:
                    if graph[p]&graph[q]:
                        prenode[str(p)+'_'+str(q)] = graph[p] & graph[q]
        #获得分区结果
        print '子图结构',graph 
        coms = {}
        for eg in graph:
            coms[eg] = list(graph[eg])
        print 'Q=',Modulartiy(A,coms,sums,vertices[i])
        print

算法名称：复杂网络中重叠及层次社区发现算法LFM（改） 
算法输入：无向无权图邻接矩阵AdjacentMatrix，节点个数VerticeNum，各节点邻居集合Neighbor
算法输出：存储节点标签的分类数组graph


//初始化节点队列和标记数组
For i <- 0 to VerticeNum Do
    nodes[i] <- i
    hindex[i] <- 0
/*
pr = get_PR(neighbours, vertices, dtype=1)//pr 为不同方式获得的PR评分队列
*/ 
while (nodes) Do
    //1>随机选择一个未扩散节点种子节点
    seed = nodes[random.randint(len(nodes))]
    /*
    2>选择未扩散点中度数最大的节点作为种子
    seed = get_seedbydegree(nodes, Neighbor, hindex)
    3>PageRank方式选择种子
    seed = get_seedbyPR(nodes, pr, hindex)
    */
    comm = Propagate(AdjacentMatrix,seed,Neighbor,Func)//Func为评价函数：原LFM中f函数/Q函数
    graph.add(comm)
    nodes.remove(comm)



Propagate(A, coms, neighbours, Func):
    nelf = get_allfitness(A,coms,neighbours,Func)//获取当前子图所有邻居加入后的F值列表
    //当所有节点的Func为负值时停止
    If max(nelf) >= 0 
        coms.add(argmax(nelf))
        while (True) Do
            //去除当前子图内部Func为负的点
            negative = get_infitness(A,coms,neighbours,Func)
            If negative:
                coms.remove(negative)
                return Propagate(A,coms,neighbours,Func)
            Else:
                return Propagate(A,coms,neighbours,Func)
    Else:
        Return coms   

//dtype： 1:Wi = E(Wj/DWj)   2:Wi = (E(Wj))/DWi   3: wi = E(Wj/(EWj)*Wj)   
//说明：  E:求和  Wi:节点i的pr值 Wj:节点i的邻居的pr值 DWi:节点i的度数
get_PR( neighbours, vertices, dtype=1):
    //初始化pr队列为节点度数
    For i <- 0 to VerticeNum Do
        pr[i] <- len(neighbours[i])
    If dtype == 1
        For i <- 0 to VerticeNum Do
            If neighbours[i] 
                For en in neighbours[each] Do
                    pr[i] += pr[en]/len(neighbours[en])
    Else If dtype == 2:
        For i <- 0 to VerticeNum Do
            If neighbours[i] 
                For en in neighbours[each] Do
                    pr[i] += pr[en]
                pr[i] = pr[i]/len(neighbours[i])
    Else If dtype == 3
        For i <- 0 to VerticeNum Do
            If neighbours[i] 
                For en in neighbours[each] Do
                    tw += pr[en]
                    tp += pr[en]**2
                pr[i] = tw/tp
    Return pr