基于IC模型生成的复杂网络数据
首先生成数据点和带有权值的边,将其写入到对应的文件中
#encoding:utf-8
__author__ = '808'
import networkx as nx
import matplotlib.pyplot as plt
import random
#生成有n个节点,每次加入m条边的BA无标度网络
BA = nx.random_graphs.barabasi_albert_graph(1000,2)
nodes = list(BA.nodes())
edges = list(BA.edges())
# print(nodes)
# print(edges)
file_write1 = open('nodes.txt','w')
file_write2 = open('weight_edges.txt','w')
for node in nodes:
file_write1.write(str(node)+' ')
file_write1.close()
for edge in edges:
file_write2.writelines(str(edge[1])+' '+str(edge[0])+' '+str(random.uniform(0,1))+'\n')
file_write2.close()
pos = nx.spring_layout(BA)
nx.draw(BA,pos,with_labels=False,node_size=30,node_color='red')
plt.show()
生成IC模型数据需要的函数
# encoding:utf-8
__author__ = '10057'
import matplotlib.pyplot as plt
import copy
import networkx as nx
import random
__all__ = ['independent_cascade']
def independent_cascade (G, seeds, steps=0):
"""
:param G: networkx graph #所有节点构成的图
The number of nodes.
:param seeds: list of nodes #子节点集
The seed nodes of the graph
:param steps: int #激活节点的层数(深度),当steps<=0时,返回子节点集能激活的所有节点
The number of steps to diffuse
When steps <= 0, the model diffuses until no more nodes
can be activated
:return:
layer_i_nodes : list of list of activated nodes
layer_i_nodes[0]: the seeds #子节点集
layer_i_nodes[k]: the nodes activated at the kth diffusion step #该子节点集激活的节点集
Notes
"""
if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
raise Exception("independent_cascade() is"
" not defined for graphs with multiedges.")
# make sure the seeds are in the graph
for s in seeds:
if s[0] not in G.nodes():
raise Exception('seed', s, 'is not in graph')
# change to directed graph
if not G.is_directed():
DG = G.to_directed()
else:
# 深复制表示完整复制,而不是建立一个映射,拷贝对象及其子对象
DG = copy.deepcopy(G)
# perform diffusion
A = copy.deepcopy(seeds)
if steps < 0:
# perform diffusion until no more nodes can be activated
return _diffuse_all(DG, A)
# perform diffusion for at most "steps" rounds only
return _diffuse_k_rounds(DG, A, steps)
def _diffuse_all(G,A):
activated_nodes = set() # 已激活的节点
activated_edgelist = set() # 激活的边集合
for node in A:
activated_nodes.add(node[0])
while True:
len_old = len(activated_nodes)
A, activated_nodes, edgelist = _diffuse_one_round(G,A,activated_nodes)
if len(activated_nodes) == len_old: #本轮没有被激活的节点则退出
break
activated_edgelist = activated_edgelist.union(edgelist)
return A, activated_nodes, activated_edgelist
def _diffuse_k_rounds(G,A,steps):
activated_nodes = set() # 已激活的节点
activated_edgelist = set() #激活的边集合
for node in A:
activated_nodes.add(node[0])
while steps>0 and len(activated_nodes) < len(G):
len_old = len(activated_nodes)
A, activated_nodes, edgelist = _diffuse_one_round(G,A,activated_nodes)
if len(activated_nodes) == len_old:
break
steps -= 1
activated_edgelist = activated_edgelist.union(edgelist)
return A, activated_nodes, activated_edgelist
def _diffuse_one_round(G,A,activated_nodes):
cascade = []
edgelist = set()
# f_cascades = [] #用来记录本轮未激活邻居的序列,以后不再参与激活
for i,s in enumerate(A):
# f_cascades = [] #末尾节点未激活任何邻居
activate_label = False #标记是否激活邻居节点
#A中的每一个列表代表一个传播级联
seed = s[-1] #以最后一个节点作为种子节点
nbs = list(G.successors(seed)) #寻找子节点
# s_tmp = s #复制s列表
for nb in nbs:
s_tmp = copy.deepcopy(s) #复制s列表
if nb in activated_nodes: #如果该节点已经被激活
continue
edge_data = G.get_edge_data(seed,nb)
if random.uniform(0,1) >= edge_data['weight']: #大于则激活节点
s_tmp.append(nb)
activated_nodes.add(nb)
activate_label = True #标记置真
edgelist.add((seed,nb))
cascade.append(s_tmp)#不管是否激活新的节点,原来的节点序列都要记录下来不能丢
if not activate_label:
cascade.append(s)
A = copy.deepcopy(cascade)
return A,activated_nodes,edgelist
生成IC模型数据
#encoding:utf-8
__author__ = '808'
from networkx import *
from IC_Model_1 import *
from LT_Model1 import *
import time
import random
import matplotlib.pyplot as plt
need_seed_num = 1#需要的种子数量
nodes_num = 10000#节点数量
epoch = -1#传播轮数
diffusion_model = 'LT'
network_model = 'BA'
def diffusion_model_test(edges_file,nodes_file,write_cascades_file,write_edges_file,need_seed_num,nodes_num,epoch,model):
start = time.time()
edges = []
G = networkx.DiGraph() #建立空的有向图
f = open(edges_file,'r')
data = f.read()
rows = data.split('\n')
del(rows[-1]) #删除掉每行后面的空格
f_nodes = open(nodes_file,'r')
nodes = f_nodes.read()
nodes_t = nodes.split(' ')
del(nodes_t[-1]) #删除掉最后一个空元素
nodes_result = list(map(int,nodes_t))
for row in rows:
split_row = row.split(' ')
name = (int(split_row[0]),int(split_row[1]),float(split_row[2]))
edges.append(name)
# activated_graph = nx.DiGraph() # 被激活的图
G.add_nodes_from(nodes_result) #向有向图中添加节点
G.add_weighted_edges_from(edges) #向有向图中添加边的数据列表
seed_list = random.sample(range(0,nodes_num-1),need_seed_num)
seed_list = [[i] for i in seed_list]
print('seed_list:',seed_list)
file_cascades_write = open(write_cascades_file,'a')
file_write_edges = open(write_edges_file,'a')
if model == 'IC':
cascades, activated_nodes, activated_edges = independent_cascade(G, seed_list, epoch)
elif model == 'LT':
cascades, activated_nodes, activated_edges = linear_threshold(G, [[0]], epoch)
for cascade in cascades: # 每一个cascade又是一个传播列表
for i, node in enumerate(cascade):
# activated_graph.add_node(node) # 加入种子节点激活的节点
file_cascades_write.write(str(node) + " ")
if i == 0:
continue
else:
file_write_edges.write(str(cascade[i - 1]) + ' ' + str(cascade[i]) + '\n')
# activated_graph.add_edge(cascade[i - 1], cascade[i])
file_cascades_write.write('\n')
file_cascades_write.close()
file_write_edges.close()
end = time.time()
print('IC_test Running time: %s Seconds'%(end-start))
return G, activated_nodes, activated_edges
if __name__ == '__main__':
edges_file = network_model + '_weight_edges_' + str(nodes_num) + '.txt'
nodes_file = network_model + '_nodes_' + str(nodes_num) + '.txt'
write_cascades_file = network_model + '_cascade_' + network_model + '_' + str(nodes_num) + '.txt'
write_edges_file = network_model + '_edges_' + network_model + '_' + str(nodes_num) + '.txt'
diffusion_model_test(edges_file,nodes_file,write_cascades_file,write_edges_file,need_seed_num,nodes_num,epoch,diffusion_model)
这样就可以得到基于IC模型生成的cascade了,存储在文件IC_cascade.txt
中,我生成的一部分是这样的:
0 333
1 27 30 37 89 103 182 241 281 350 581 681 721 794 875 985
2 772
3 302
4 533
5 819
6 381 668
7 627
8 673 774 881
9 705
10 108 546
11 450 961
12 880
13 5 10 21 32 33 34 39 42 63 89 140 598 599 614
14 989
15 600
16 941
17 879
18 657
19 48 72 148 307 498 542 607 643
20 872
21 10 56 174 302 491 755 878 957 961
22 275 940
23 812
24 588
25 58 202
26 15 80 92 102 178 272 405 602 684 696 803
27 217 302 743
28 717
29 360 907
30 625
这里只截取了前30个cascade,代码给出的是生成100个。
下面这个可以将节点图画出来
#encoding:utf-8
__author__ = '10057'
from networkx import *
from IC_Model import *
import time
import random
import matplotlib.pyplot as plt
need_seed_num = 100#需要的种子数量
nodes_num = 1000#节点数量
if __name__ == '__main__':
start = time.clock()
datasets = []
G = networkx.DiGraph() #建立空的有向图
f = open('weight_edges.txt','r')
data = f.read()
rows = data.split('\n')
#del(rows[-1]) #删除掉每行后面的空格
f_nodes = open('nodes.txt','r')
nodes = f_nodes.read()
nodes_t = nodes.split(' ')
del(nodes_t[-1]) #删除掉最后一个空格元素
nodes_result = list(map(int,nodes_t))
for row in rows:
split_row = row.split(' ')
name = (int(split_row[0]),int(split_row[1]),float(split_row[2]))
datasets.append(name)
cascades = []
activated_graph = nx.Graph() # 被激活的图
G.add_nodes_from(nodes_result) #向有向图中添加节点
G.add_weighted_edges_from(datasets) #向有向图中添加边的数据列表
seed_list = random.sample(range(0,nodes_num-1),need_seed_num)
sum_nodes = 0
file_write = open('IC_cascade.txt','w')
for i, seed in enumerate(seed_list):
# cascades.append([i])
cascade , layers = independent_cascade(G,[seed],1)
# cascade[i].append(cascade[0])
file_write.write(str(i)+'\t')
activated_graph.add_node(seed) #加入种子节点
for node in cascade[0]:
sum_nodes += 1
file_write.write(str(node)+' ')
activated_graph.add_node(node) #加入种子节点激活的节点
activated_graph.add_edge(seed,node)
file_write.write('\n')
t1 =' %s nodes' % sum_nodes
# 画图
plt.title(t1)
nx.draw(activated_graph, with_labels=True)
plt.show()
file_write.close()
end = time.clock()
print('Running time: %s Seconds'%(end-start))