【复杂网络社团发现】GN算法步骤详解（附python代码实现）

最新推荐文章于 2022-09-21 08:45:42 发布

不戒修仙不改名

最新推荐文章于 2022-09-21 08:45:42 发布

阅读量1.6w

点赞数 25

分类专栏：复杂网络文章标签： python 数据分析社团发现 GN算法复杂网络

本文链接：https://blog.csdn.net/weixin_44704845/article/details/102682969

版权

复杂网络专栏收录该内容

4 篇文章

订阅专栏

GN算法

作为一种分裂的层次聚类方法，GN算法通过不断从原图中删除边从而达到生成分裂树的目的（从而形成社团）

那么关键点就在于，如何评判删除标准了（本文暂仅详述步骤，对于这一点不作详细解答）

不带权

通过不断删除边介数最大的边并循环（每次都重新计算），直到图中没有边存在

步骤：

计算网络内所有边相对于源节点的边介数；
删除边介数最高的边；
重复步骤1和步骤2；
直到网络中没有边存在，最后生成的分裂树即为划分的社团；

代码实现

python

源码

# -*- coding: utf-8 -*-

import networkx as nx
import matplotlib.pyplot as plt
import sys
sys.path.append('../')

class GN:
    def __init__(self, G):
        self.G_copy = G.copy()
        self.G = G
        self.partition = [[n for n in G.nodes()]]
        self.all_Q = [0.0]
        self.max_Q = 0.0
#         self.zidian={0.0:[100]}
        self.zidian={0:[0]}
#     def GG(self):
#         return self.G_copy
    
    
    #Using max_Q to divide communities 
    def run(self):
		#Until there is no edge in the graph
        while len(self.G.edges()) != 0:
			#Find the most betweenness edge
            edge = max(nx.edge_betweenness_centrality(self.G).items(),key=lambda item:item[1])[0]
			#Remove the most betweenness edge
            self.G.remove_edge(edge[0], edge[1])
			#List the the connected nodes
            components = [list(c) for c in list(nx.connected_components(self.G))]
            if len(components) != len(self.partition):
				#compute the Q
                cur_Q = self.cal_Q(components, self.G_copy)
                if cur_Q not in self.all_Q:
                    self.all_Q.append(cur_Q)
                if cur_Q > self.max_Q:
                    self.max_Q = cur_Q
                    self.partition = components
                    for i in range(len(self.partition)):
                        self.zidian[i]=self.partition[i]
        print('-----------the Divided communities and the Max Q-----------')
        print('Max_Q:', self.max_Q)
        print('The number of Communites:', len(self.partition))
        print("Communites:", self.partition)
        return self.partition

    def cal_Q(self,partition,G):
        m = len(G.edges(None, False))
        a = []
        e = []
        for community in partition:
            t = 0.0
            for node in community:
                t += len([x for x in G.neighbors(node)])
            a.append(t/(2*m))
#             self.zidian[t/(2*m)]=community
        for community in partition:
            t = 0.0
            for i in range(len(community)):
                for j in range(len(community)):
                    if(G.has_edge(community[i], community[j])):
                        t += 1.0
            e.append(t/(2*m))
        
        q = 0.0
        for ei,ai in zip(e,a):
            q += (ei - ai**2) 
        return q 
    
    def add_group(self):
        num = 0
        nodegroup = {}
        for partition in self.partition:
            for node in partition:
                nodegroup[node] = {'group':num}
            num = num + 1  
        nx.set_node_attributes(self.G_copy, nodegroup)
        
    def to_gml(self):
        nx.write_gml(self.G_copy, 'outtoGN.gml')
        
if __name__ == '__main__':
    G=nx.read_gml('data.gml',label='id') #Using max_Q to divide communities 
    algorithm = GN(G)
    algorithm.run()
    algorithm.add_group()
    algorithm.to_gml()

数据集

data.gml

格式

graph
[
node
[
id "a"
]
node
[
id "b"
]
edge
[
source "a"
target "b"
]
]

注意

缩进无影响
需为gml文件

附一段格式生成傻瓜代码

with open('data.gml','w') as f:
    f.write("graph\n")
    f.write("[\n")
    for i in range(len(vertex)):
        f.write("node\n")
        f.write("[\n")
        f.write("id \""+vertex[i]+"\"\n")
        f.write("]\n")
    for i in range(len(edges)):
        f.write("edge\n")
        f.write("[\n")
        f.write("source \""+edges[i][0]+"\"\n")
        f.write("target \""+edges[i][1]+"\"\n")
        f.write("]\n")
    f.write("]")

其中vertex为节点集，edges为边集

带权

通过不断删除边权比最大的边并循环（每次都重新计算），直到图中没有边存在

步骤：

计算网络内所有边相对于源节点的边介数；
将每条边的边介数除以其权值得到边权比；
删除边权比最高的边；
重复步骤1、步骤2和步骤3；
直到网络中没有边存在，最后生成的分裂树即为划分的社团；

代码实现

python

源码

# -*- coding: utf-8 -*-
#GN_weight
import networkx as nx
import matplotlib.pyplot as plt
import sys
sys.path.append('../')

class GN_w:
    def __init__(self, G):
        self.G_copy = G.copy()
        self.G = G
        self.partition = [[n for n in G.nodes()]]
        self.all_Q = [0.0]
        self.max_Q = 0.0
		
    #Using max_Q to divide communities 
    def run(self):
        while len(self.G.edges()) != 0:
            edges={}
            edges_betweenness_centrality = nx.edge_betweenness_centrality(self.G)
            
            for e, ebc in edges_betweenness_centrality.items():
                edge_weight = ebc/self.G.get_edge_data(e[0],e[1])['value']
                edges[e]=edge_weight
                
            edge = max(edges.items(), key=lambda item:item[1])[0]
            self.G.remove_edge(edge[0], edge[1])
            components = [list(c) for c in list(nx.connected_components(self.G))]
            if len(components) != len(self.partition):
    				#compute the Q
                cur_Q = self.cal_Q(components, self.G_copy)
                if cur_Q not in self.all_Q:
                    self.all_Q.append(cur_Q)
                if cur_Q > self.max_Q:
                    self.max_Q = cur_Q
                    self.partition = components
                    
        print('-----------the divided communities and the Max Q------------')
        print('The number of Communites:', len(self.partition))
        print('Max_Q:', self.max_Q)
        print(self.partition)
        return self.partition, self.all_Q, self.max_Q
    
    def group(self,node1):
        num1 = 0
        nodegroup1 = {}
        for partition in self.partition:
            for node in partition:
                nodegroup1[node] = {'group':num1}
            num1 = num1 + 1  
        nx.set_node_attributes(self.G_copy, nodegroup1)
        for partition in self.partition:
            for node in partition:
                if node==node1:
                    return nodegroup1[node]['group']
                else:pass
    
    
    def add_group(self):
        num = 0
        nodegroup = {}
        for partition in self.partition:
            for node in partition:
                nodegroup[node] = {'group':num}
            num = num + 1  
        nx.set_node_attributes(self.G_copy, nodegroup)
        return self.partition
        
    def to_gml(self):
        nx.write_gml(self.G_copy, 'outtoGN_weighted.gml')
    
	#Computing the Q
    def cal_Q(self,partition,G):
        m = len(G.edges(None, False))
        a = []
        e = []
    
        for community in partition:
            t = 0.0
            for node in community:
                t += len([x for x in G.neighbors(node)])
            a.append(t/(2*m))
        
        for community in partition:
            t = 0.0
            for i in range(len(community)):
                for j in range(len(community)):
                    if(G.has_edge(community[i], community[j])):
                        t += 1.0
            e.append(t/(2*m))
        
        q = 0.0
        for ei,ai in zip(e,a):
            q += (ei - ai**2) 
        return q 
        
if __name__ == '__main__':
    G=nx.read_gml('data.gml')
    
    algorithm = GN_w(G)
    algorithm.run()
    zidian=algorithm.add_group()
    algorithm.to_gml()

数据集

data.gml

格式

graph
[
node
[
id 1
label "a"
]
node
[
id 2
label "b"
]
edge
[
source 1
target 2
value 0.6153846153846154
]
]

注意

缩进无影响
需为gml文件

附一段格式生成傻瓜代码

#GN_weight
with open('data.gml','w') as f:    #设置文件对象
    f.write("graph\n")
    f.write("[\n")
    for i in range(len(vertex)):
        f.write("node\n")
        f.write("[\n")
        f.write("id "+str(vertexxuhao[vertex[i]])+"\n")
        f.write("label \""+vertex[i]+"\"\n")
        f.write("]\n")
    for i in range(len(edges)):
        f.write("edge\n")
        f.write("[\n")
        f.write("source "+str(edgesxuhao[edges[i][0]])+"\n")
        f.write("target "+str(edgesxuhao[edges[i][1]])+"\n")
        f.write("value "+str(edgesvalue[(edges[i][0],edges[i][1])])+"\n")
        f.write("]\n")
    f.write("]")