Louvain算法是一种用于社区发现的算法,其主要思想是将网络中的节点划分为不同的社区,使得同一社区内的节点之间具有更高的相似度,而不同社区之间的相似度则较低。下面是在Python中实现Louvain算法的步骤:
导入所需的库
import networkx as nx
from networkx.algorithms.community import greedy_modularity_communities
构建网络
G = nx.Graph()
G.add_edges_from([(1, 2), (1, 3), (2, 3), (2, 4), (3, 4), (4, 5), (4, 6), (5, 6)])
运行Louvain算法
communities = greedy_modularity_communities(G)
输出结果
print(list(communities))
以上代码将节点划分为不同的社区,并输出结果。需要注意的是,这里的网络是一个无向图,如果是有向图,需要使用DiGraph()函数构建网络。此外,还可以通过调整参数来优化社区划分的效果,例如调整分辨率参数等。
louvain算法:
import networkx as nx
def louvain(G):
# 初始化每个节点所属的社区
node2cp = dict((n, i) for i, n in enumerate(G.nodes()))
# 计算每条边的初始模块度增益
edgeGain = calcInitialGain(G, node2cp)
# 当模块度增益不再上升时退出循环
while True:
modChanged = False
for node in G.nodes():
curCp = node2cp[node]
curMod = calcModularity(G, node2cp)
bestCp, bestGain = curCp, 0
for neighbor in G.neighbors(node):
if node == neighbor:
continue
# 计算当前节点移动到邻居所属的社区时的模块度增益
newCp = node2cp[neighbor]
newMod = calcModularity(G, node2cp, node, newCp, edgeGain)
gain = newMod - curMod
if gain > bestGain:
bestGain = gain
bestCp = newCp
if bestCp != curCp:
modChanged = True
node2cp[node] = bestCp
# 更新所有当前节点所在社区的邻接点的模块度增益
for neighbor in G.neighbors(node):
if node == neighbor:
continue
if node2cp[neighbor] == curCp:
edgeGain[(min(node, neighbor), max(node, neighbor))] += 2 * bestGain
# 计算当前社区的数量
curCpNum = len(set(node2cp.values()))
# 合并相邻的社区,直到模块度增益不再上升为止
if not modChanged or curCpNum == 1:
break
# 构建邻接表
adjDict = {cp:set() for cp in range(curCpNum)}
for n1, n2, d in G.edges(data=True):
if node2cp[n1] != node2cp[n2]:
adjDict[node2cp[n1]].add(node2cp[n2])
adjDict[node2cp[n2]].add(node2cp[n1]])
# 找到当前社区可以合并的社区编号
mergeDict = {}
for cp, adj in adjDict.items():
maxGain, bestCp = 0, cp
for adjCp in adj:
gain = calcMergeGain(G, node2cp, cp, adjCp, edgeGain)
if gain > maxGain:
maxGain, bestCp = gain, adjCp
mergeDict[bestCp] = cp
# 更新节点所属社区
for node, cp in node2cp.items():
if cp in mergeDict:
node2cp[node] = mergeDict[cp]
# 更新边的模块度增益
for edge, gain in edgeGain.items():
cp1, cp2 = node2cp[edge[0]], node2cp[edge[1]]
if cp1 == cp2:
edgeGain[edge] = calcEdgeGain(G, node2cp, cp1)
return [set([n for n, cp in node2cp.items() if cp == c]) for c in set(node2cp.values())]
def calcInitialGain(G, node2cp):
edgeGain = {}
for n1, n2, d in G.edges(data=True):
cp1, cp2 = node2cp[n1], node2cp[n2]
if cp1 == cp2:
edgeGain[(min(n1, n2), max(n1, n2))] = d['weight'] - G.degree(n1) * G.degree(n2) / (2 * G.size())
else:
edgeGain[(min(n1, n2), max(n1, n2))] = 0
return edgeGain
def calcModularity(G, node2cp, testNode=None, newCp=None, edgeGain=None):
if edgeGain is None:
edgeGain = calcInitialGain(G, node2cp)
试了一下。不太行。
重新看看
阶段一:
通过局部调整节点所在社区使得网络的模块度达到最优, 进而实现对社区的划分;
初始时, 将每个节点看作一个单独的社区, 计算模块度 Q;
对于每个节点 i, 逐个计算将其移动到邻点所在的社区后模块度增益 ∆Q;
将 i 移动到使得 ∆Q > 0 最大的社区内;
重复进行 2、3 直至移动任何节点都不会增加模块度,
阶段一结束。
阶段二:
将同一社区的节点聚合成一个“超级节点”, 构建新的网络;
化点: 一个社区作为一个“超级节点”;
建边: 原社区间只要有边就给对应的“超级节点”连一条边, 社区内有边则给对应的“超级节点”建一个自环;
赋权: 原社区间所有边上的权重之和赋给对应“超级节点”间的边, 社区内所有边上的权重各自算两边加和赋
回到阶段一
代码如下:
# coding=utf-8
import collections
import random
import read_graph as rg
def load_graph(path):
G = collections.defaultdict(dict) # 设置空白默认字典,无向图
#DG=collections.defaultdict(dict) # 设置空白默认字典,有向图
with open(path) as text:
for line in text:
vertices = line.strip().split()
# .strip()删除字符串line开头结尾处得空格 # .split()按照空格分隔
# 返回一个列表,含有3个字符串元素
v_i = int(vertices[0])
v_j = int(vertices[1])
w = float(vertices[2])
G[v_i][v_j] = w
return G
class Vertex():
def __init__(self, vid, cid, nodes, k_in=0):
self._vid = vid
self._cid = cid
self._nodes = nodes
self._kin = k_in # 结点内部的边的权重
class Louvain():
def __init__(self, G):
self._G = G
self._m = 0 # 边数量
# 下面两个空字典存放信息
self._cid_vertices = {} # 需维护的关于社区的信息(社区编号,其中包含的结点编号的集合)
self._vid_vertex = {} # 需维护的关于结点的信息(结点编号,相应的Vertex实例)
for vid in self._G.keys():
self._cid_vertices[vid] = set([vid])
self._vid_vertex[vid] = Vertex(vid, vid, set([vid]))
self._m += sum([1 for neighbor in self._G[vid].keys() ])
def first_stage(self):
mod_inc = False # 用于判断算法是否可终止
visit_sequence = self._G.keys()
random.shuffle(list(visit_sequence))
while True:
can_stop = True # 第一阶段是否可终止
for v_vid in visit_sequence:
v_cid = self._vid_vertex[v_vid]._cid
k_v = sum(self._G[v_vid].values()) + self._vid_vertex[v_vid]._kin
cid_Q = {}
for w_vid in self._G[v_vid].keys():
w_cid = self._vid_vertex[w_vid]._cid
if w_cid in cid_Q:
continue
else:
tot = sum(
[sum(self._G[k].values()) + self._vid_vertex[k]._kin for k in self._cid_vertices[w_cid]])
if w_cid == v_cid:
tot -= k_v
k_v_in = sum([v for k, v in self._G[v_vid].items() if k in self._cid_vertices[w_cid]])
delta_Q = k_v_in - k_v * tot / self._m # 由于只需要知道delta_Q的正负,所以少乘了1/(2*self._m)
cid_Q[w_cid] = delta_Q
cid, max_delta_Q = sorted(cid_Q.items(), key=lambda item: item[1], reverse=True)[0]
if max_delta_Q > 0.0 and cid != v_cid:
self._vid_vertex[v_vid]._cid = cid
self._cid_vertices[cid].add(v_vid)
self._cid_vertices[v_cid].remove(v_vid)
can_stop = False
mod_inc = True
if can_stop:
break
return mod_inc
def second_stage(self):
cid_vertices = {}
vid_vertex = {}
for cid, vertices in self._cid_vertices.items():
if len(vertices) == 0:
continue
new_vertex = Vertex(cid, cid, set())
for vid in vertices:
new_vertex._nodes.update(self._vid_vertex[vid]._nodes)
new_vertex._kin += self._vid_vertex[vid]._kin
for k, v in self._G[vid].items():
if k in vertices:
new_vertex._kin += v / 2.0
cid_vertices[cid] = set([cid])
vid_vertex[cid] = new_vertex
G = collections.defaultdict(dict)
for cid1, vertices1 in self._cid_vertices.items():
if len(vertices1) == 0:
continue
for cid2, vertices2 in self._cid_vertices.items():
if cid2 <= cid1 or len(vertices2) == 0:
continue
edge_weight = 0.0
for vid in vertices1:
for k, v in self._G[vid].items():
if k in vertices2:
edge_weight += v
if edge_weight != 0:
G[cid1][cid2] = edge_weight
G[cid2][cid1] = edge_weight
self._cid_vertices = cid_vertices
self._vid_vertex = vid_vertex
self._G = G
def get_communities(self):
communities = []
for vertices in self._cid_vertices.values():
if len(vertices) != 0:
c = set()
for vid in vertices:
c.update(self._vid_vertex[vid]._nodes)
communities.append(c)
return communities
def execute(self):
iter_time = 1
while True:
iter_time += 1
mod_inc = self.first_stage()
if mod_inc:
self.second_stage()
else:
break
return self.get_communities()
if __name__ == '__main__':
G = load_graph('data_graph/karate_34_graph.txt')
algorithm = Louvain(G)
communities = algorithm.execute()
# 按照社区大小从大到小排序输出
communities = sorted(communities, key=lambda b: -len(b)) # 按社区大小排序
count = 0
for communitie in communities:
count += 1
print("社区", count, " ", communitie)
结束
如有问题可联系我。