基于LFM的重叠社区发现算法python代码实现
import random
import networkx as nx
import matplotlib.pyplot as plt
import zipfile
#import urllib.request as urllib
class Community():
''' use set operation to optimize calculation '''
def __init__(self,G,alpha=1.0):
self._G = G
self._alpha = alpha
self._nodes = set()
self._k_in = 0
self._k_out = 0
def add_node(self,node):
neighbors = set(self._G.neighbors(node))
#print("添加令居节点",neighbors , self._nodes,neighbors & self._nodes)
node_k_in = len(neighbors & self._nodes)#neighbor和self._nodes公有节点的数目存入node_k_in
#print("node_k_in",node_k_in)
node_k_out = len(neighbors) - node_k_in
#print("node_k_out",node_k_out)
self._nodes.add(node)
self._k_in += 2*node_k_in
self._k_out = self._k_out+node_k_out-node_k_in
def remove_node(self,node):
neighbors = set(self._G.neighbors(node))
community_nodes = self._nodes
#print("community_nodes",community_nodes)
node_k_in = len(neighbors & community_nodes)
node_k_out = len(neighbors) - node_k_in
self._nodes.remove(node)
self._k_in -= 2*node_k_in
self._k_out = self._k_out - node_k_out+node_k_in
def cal_add_fitness(self,node):#fitness适应度
neighbors = set(self._G.neighbors(node))
old_k_in = self._k_in
old_k_out = self._k_out
vertex_k_in = len(neighbors & self._nodes)#vertex顶点
vertex_k_out = len(neighbors) - vertex_k_in
new_k_in = old_k_in + 2*vertex_k_in
new_k_out = old_k_out + vertex_k_out-vertex_k_in
new_fitness = new_k_in/(new_k_in+new_k_out)**self._alpha#幂次
old_fitness = old_k_in/(old_k_in+old_k_out)**self._alpha
return new_fitness-old_fitness
def cal_remove_fitness(self,node):
neighbors = set(self._G.neighbors(node))
new_k_in = self._k_in
new_k_out = self._k_out
node_k_in = len(neighbors & self._nodes)
node_k_out = len(neighbors) - node_k_in
old_k_in = new_k_in - 2*node_k_in
old_k_out = new_k_out - node_k_out + node_k_in
old_fitness = old_k_in/(old_k_in+old_k_out)**self._alpha
new_fitness = new_k_in/(new_k_in+new_k_out)**self._alpha
return new_fitness-old_fitness
def recalculate(self):
for vid in self._nodes:
fitness = self.cal_remove_fitness(vid)
if fitness < 0.0:
return vid
return None
def get_neighbors(self):
neighbors = set()
for node in self._nodes:
neighbors.update(set(self._G.neighbors(node)) - self._nodes)
return neighbors
def get_fitness(self):
return float(self._k_in)/((self._k_in+self._k_out) ** self._alpha)
class LFM():
def __init__(self, G, alpha):
self._G = G
self._alpha = alpha
def execute(self):
communities = []
print("嘿嘿",list(self._G.node.keys()))
print("---------------------")
node_not_include = list(self._G.node.keys())
while(len(node_not_include) != 0):
c = Community(self._G, self._alpha)
#print("self._alpha",self._alpha)#0.9
# randomly select a seed node
seed = random.choice(node_not_include)
c.add_node(seed)
print("随机选取节点是:",seed)
to_be_examined = c.get_neighbors()
print("c.get_neighbors()",c.get_neighbors())
while(to_be_examined):
#largest fitness to be added
m = {}
for node in to_be_examined:
fitness = c.cal_add_fitness(node)#计算点的适应度》0加入,小于0删除
m[node] = fitness
to_be_add = sorted(m.items(),key=lambda x:x[1],reverse = True)[0]#啥意思???
#适应度降序排列
#stop condition
if(to_be_add[1] < 0.0):
break
c.add_node(to_be_add[0])
to_be_remove = c.recalculate()
while(to_be_remove != None):
c.remove_node(to_be_remove)
to_be_remove = c.recalculate()
to_be_examined = c.get_neighbors()
for node in c._nodes:
if(node in node_not_include):
node_not_include.remove(node)
communities.append(c._nodes)
return communities
if(__name__ == "__main__"):
#G = nx.karate_club_graph()#一个边集一个点集
# G = nx.florentine_families_graph()
zf = zipfile.ZipFile('football.zip') # zipfile object
txt = zf.read('football.txt').decode() # read info file
gml = zf.read('football.gml').decode() # read gml data
# throw away bogus first line with # from mejn files
gml = gml.split('\n')[1:]
G = nx.parse_gml(gml) # parse gml data
print(txt)
# print degree for each team - number of games
for n, d in G.degree():
print('%s %d' % (n, d))
options = {
'node_color': 'red',
'node_size': 50,
'line_color': 'grey',
'linewidths': 0,
'width': 0.1,
}
nx.draw(G, **options)
#networkx.draw(G, with_labels=True)
plt.show()
algorithm = LFM(G,0.9)
communities = algorithm.execute()
for c in communities:
print (len(c),sorted(c))
社区网络:
重叠社区划分结果: