一、简介
degreediscount算法即度折扣算法,是一个基于节点度的启发式算法
度折扣算法的基本思想是:假设节点j是节点i的邻居,如果j已被选为种子节点,那么在基于度中心性指标考虑节点i是否作为种子节点时,应该对连边(i,j)打折扣,因为i对j不能产生额外的影响。假设所有边的激活概率都相同,均为β。当节点i的邻居中有si个激活种子时,被激活的概率为1-(1-β)的si的次方,此时i节点能被邻居节点激活,其期望影响力与直接将i节点选为种子节点的期望影响力相同,即此时选择节点i作为种子节点不增加额外的影响力 (对期望影响力的贡献为 0)。由于节点 i没有被激活的概率为(1-β)的si的次方,当节点i被选为种子时,其能激活的节点数为1+(di+si)β,其中“1” 表示节点 被激活,“ (di+si)β”表示被激活的邻居数。因此考虑节点i选为种子时,其产生的期望影响力为:
![](https://img-blog.csdnimg.cn/1cdfb61562f94287a7048783c502191b.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBA5Yuk5aWL55qEbHPkuLY=,size_12,color_FFFFFF,t_70,g_se,x_16)
当节点i邻居中没有种子节点时,i作为种子节点产生的期望影响力为:。设γ是对邻居中每个种子节点的度折扣,则
,可以得到
,因此,当节点i有si个种子邻居时,它的度折扣值定义为:
度折扣算法的基本步骤为:第一轮中没有种子节点,所有节点的度都没有被折扣,所以直接选择网络中度最大的节点作为第一个种子;接下来每一轮根据上式计算每个未被激活节点的度折扣值,并选择最大的一个节点加入种子集;循环更新计算直到选出k个种子节点加入种子集 。
二、代码
def degreeDiscountIC(G, k, p=.01):
''' Finds initial set of nodes to propagate in Independent Cascade model (without priority queue)
Input: G -- networkx graph object
k -- number of nodes needed
p -- propagation probability
Output:
S -- chosen k nodes
Note: the routine runs twice slower than using PQ. Implemented to verify results
'''
d = dict()
dd = dict() # degree discount
t = dict() # number of selected neighbors
S = [] # selected set of nodes
for u in G:
d[u] = sum([G[u][v]['weight'] for v in G[u]]) # each edge adds degree 1
# d[u] = len(G[u]) # each neighbor adds degree 1
dd[u] = d[u]
t[u] = 0
for i in range(k):
# dd saves tuples, max function of a tuple compares the first value in the tuple, if it the same then compare the second,
# we want to compare only the second, so x being a tuple with x[1] we select the second value of the tuple
u, ddv = max(dd.items(), key=lambda x: x[1])
# u, ddv = max(dd.items(), key=lambda (k,v): v)
dd.pop(u)
S.append(u)
for v in G[u]:
if v not in S:
t[v] += G[u][v]['weight'] # increase number of selected neighbors
dd[v] = d[v] - 2 * t[v] - (d[v] - t[v]) * t[v] * p
return S
'''
if __name__ == '__main__':
import time
import networkx as nx
start = time.time()
from YHSF import ICModel
S=[]
address = 'E:/新建文件夹/DPSO_demo-master/twitter.txt'
def read_raw_data(raw_file_dir):
g = nx.MultiDiGraph()
for line in open(raw_file_dir):
str_list = line.split()
n1 = int(str_list[0])
n2 = int(str_list[1])
weight = float(1)
# try:
# weight = float(str_list[2])
# except:
# weight = float(1)
g.add_weighted_edges_from([(n1, n2, weight)]) # G.add_edges_from([(n1, n2)])
# g.add_edges_from([(n1, n2)])
# g.add_edges_from([(n1, n2, {'weight': weight})])
# g.add_edges_from([(n1, n2, {'weight': weight, 'timestamp': timestamp})])
return g
def read_raw_data1(raw_file_dir):
g = nx.MultiDiGraph()
for line in open(raw_file_dir):
str_list = line.split()
n1 = int(str_list[0])
n2 = int(str_list[1])
weight = float(1)
# try:
# weight = float(str_list[2])
# except:
# weight = float(1)
# g.add_weighted_edges_from([(n1, n2, weight)]) # G.add_edges_from([(n1, n2)])
g.add_edges_from([(n1, n2)])
# g.add_edges_from([(n1, n2, {'weight': weight})])
# g.add_edges_from([(n1, n2, {'weight': weight, 'timestamp': timestamp})])
return g
def multidir2simpledir(multidir_graph):
# 输出所有有向边,包括权重
# print("-" * 10) # print(list(G.edges(data=True)))
# for e in multidir_graph.edges.data('weight'):
# print(e)
print("raw:", multidir_graph.number_of_nodes(), multidir_graph.number_of_edges(),
nx.number_of_selfloops(multidir_graph))
c = Counter(multidir_graph.edges())
simpledir_graph = nx.DiGraph()
for n1, n2, w in multidir_graph.edges.data('weight'):
# avoid repeating edges and self-loops
if not simpledir_graph.has_edge(n1, n2) and n1 != n2:
simpledir_graph.add_edge(n1, n2, weight=c[n1, n2])
if n1 == n2: # 没有loop的节点属性为None,有loop为loop个数
if not simpledir_graph.has_node(n1): # 新节点要先添加
simpledir_graph.add_node(n1, loops=c[n1, n2])
else: # 已有的节点,直接设置属性
simpledir_graph.nodes[n1]["loops"] = c[n1, n2] # 报错原因是n1节点尚未添加到simpledir_graph
print("processed:", simpledir_graph.number_of_nodes(), simpledir_graph.number_of_edges(),
nx.number_of_selfloops(simpledir_graph))
return simpledir_graph
# 根据有向单边图的节点loop数以及边频数,重新计算边影响力
def edgeimpact(simpledir_graph):
graph = nx.DiGraph()
N1insum = dict(simpledir_graph.in_degree(weight='weight'))
for v, u, w in simpledir_graph.edges.data('weight'):
impactv2u = float(w) / (N1insum[u] + 0) # simpledir_graph.nodes[u]["loops"]
graph.add_edge(v, u, weight=impactv2u)
flag = os.path.exists(address)
if not flag: file = open(address, 'a')
# print("^" * 10) # 输出归一化边权重
for e in graph.edges.data('weight'):
if not flag: s = str(e[0]) + " " + str(e[1]) + " " + str(e[2]) + '\n'
if not flag: file.write(s)
# print(e)
if not flag: file.close()
print("normalized:", graph.number_of_nodes(), graph.number_of_edges(), nx.number_of_selfloops(graph))
# print(graph.get_edge_data(6,1),graph.get_edge_data(1,2))#graph.edges[1,2]['weight'],
# print(graph.degree[1],graph.degree)
# print(graph[4])#,graph.neighbors(4)
return graph
simpledir_graph = multidir2simpledir(read_raw_data(address))
# 归一化边权重
graph = edgeimpact(simpledir_graph)
print(time.time() - start)
#for k in range(5, 55, 5):
print(S)
Q=[]
W=[]
for k in range(5, 55, 5):
start_time = time.time()
S = degreeDiscountIC(graph, k, p=.01)
#print('A:%.2f MB' % (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024))
activenodes = LTModel.simulate(graph, S, 0.25)
Q.append(activenodes)
end_time = time.time()
runningtime1 = end_time - start_time
W.append(runningtime1)
print(Q)
print("总时间:", W)
算法缺点:虽然度折扣算法具有快速、高效的特性,但该算法还有许多需要改进的地方,这些不足之处是制约该算法性能进一步提升的关键因素。首先,度折扣算法在计算节点的期望影响力时没有考虑邻居的差异性,而是简单地认为每个未激活的邻居节点对该节点期望影响力的贡献是相同的,导致计算期望影响力的公式不够精确。其次,度折扣算法没有考虑节点之间共同邻居数的影响,不能充分降低传播的冗余性,比如节点 和 之间有许多共同邻居,如果节点已被选择为种子节点,则节点被感染的可能性也很高,因为他们之间有多条可能的传播路径,此时再选择节点作为种子节点会导致传播的冗余效应。
算法介绍和内容引用:[1]夏欣, 马闯, 张海峰. 基于改进的度折扣方法研究社交网络影响力最大化问题[J]. 电子科技大学学报, 2021, 50(3):9.