MIA算法原文:Chen W , Wang C , Wang Y . Scalable Influence Maximization for Prevalent Viral Marketing in Large-Scale Social Networks[C]// Proceedings of the 16th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, Washington, DC, USA, July 25-28, 2010. ACM, 2010.
伪代码:
可根据注释和伪代码查看python代码
import sys
import time
import heapq
import numpy as np
import math
NETWORK = []
MIIA = []
MIOA = []
nodetable = []
dijk = []
Vnumber = 0
Enumber = 0
MAX = 9999 # MAX的作用是什么:表示路径距离为不可达,一般指的是算法中的∞
def MIA(k):
S = []
incinf = [0] * Vnumber
for v in range(Vnumber):
print("MIIA("+str(v)+",θ):", MIIA[v])
for i in range(len(MIIA[v])):
# print("tuple:", MIIA[v][i]) # 是个二元组 (节点下标, dijk) MIIA[v][i][0]就是算法line6-9中的u
# incinf[MIIA[v][i][0]] += getAlpha_initial(v, MIIA[v][i][0]) # 对应算法line9
incinf[MIIA[v][i][0]] += getAlpha(v, MIIA[v][i][0], S) # 对应算法line9
print("incinf:",incinf)
###main loop
for i in range(0, k):
u = maxp(incinf, S) # 对应算法line14
for j in range(len(MIOA[u])): # MIOA[u][j][0]就是算法line16中的v
v_line16 = MIOA[u][j][0]
if v_line16 in S:
continue
else:
# incinf[j] -= getAlpha(MIOA[u][j][0],u,S)*(1-ap(u,S,MIOA[u][j][0]))
for l in range(len(MIIA[v_line16])): # MIIA[v_line16][l][0]就是算法line18中的w
w_line18 = MIIA[v_line16][l][0]
if w_line18 in S:
continue
else:
incinf[w_line18] -= getAlpha(v_line16,w_line18,S)*(1-ap(w_line18,S,v_line16))
S.append(u)
for j in range(len(MIOA[u])):
v_line23 = MIOA[u][j][0]
if v_line23 in S:
continue
else:
# incinf[j] += getAlpha(MIOA[u][j][0],u,S)*(1-ap(u,S,MIOA[u][j][0]))
for l in range(len(MIIA[v_line23])):
w_line27 = MIIA[v_line23][l][0]
if w_line27 in S:
continue
else:
incinf[w_line27] += getAlpha(v_line23, w_line27, S) * (1 - ap(w_line27, S, v_line23))
return S
# 求incinf最大的非种子节点
def maxp(incinf, S):
maxp = 0 # incinf最大的节点下标 max point
maxv = 0 # 最大的incinf值 max value
for i in range(Vnumber):
if i in S: # 跳过种子
continue
elif incinf[i] > maxv: # 非种子节点incinf比之前的大 找最大值通用步骤
maxv = incinf[i]
maxp = i
return maxp
def getAlpha(v, u, S):
print("get α(" + str(v) + "," + str(u) + ")", S)
if v == u:
return 1
else:
print("--MIOA(" + str(u) + ",θ):", MIOA[u])
if MIOA[u] == []:
return 0
w = MIOA[u][0][0] ### 0 ??? 这里看来是随便选择了u的某一个出邻居w
print("w:", w)
if S == []:
return getAlpha(v, w, S) * math.pow(math.e, -MIOA[u][0][1]) # pp = e^(-dijk) <=> -log(pp) = dijk
if w in S:
return 0
else:
listV = []
for i in range(len(MIIA[v])):
listV.append(MIIA[v][i][0]) # listV 储存所有在MIIA(v,θ)中的节点下标
Nin = []
for i in range(len(MIIA[w])):
if MIIA[w][i][0] in listV: # MIIA[w][i][0]、Nin[i][0]对应算法2 line9的 u'
Nin.append(MIIA[w][i])
pun = 1
for i in range(len(Nin)):
if Nin[i][0] == u:
continue
else:
pun = pun * (1 - ap(Nin[i][0], S, v) * math.pow(math.e, -Nin[i][1])) # pp = e^(-dijk) <=> -log(pp) = dijk
return getAlpha(v, w, S) * math.pow(math.e, -MIOA[u][0][1]) * (1 - pun)
def getAlpha_initial(v, u):
print("get α("+str(v)+","+str(u)+")")
if v == u:
return 1
else:
print("--MIOA(" + str(u) + ",θ):", MIOA[u])
if MIOA[u] == []:
return 0
w = MIOA[u][0][0] ### 0 ??? 这里看来是随便选择了u的某一个出邻居w
print("w:",w)
# return getAlpha_initial(v, w) * MIOA[u][0][1]
return getAlpha_initial(v, w) * math.pow(math.e, -MIOA[u][0][1]) # pp = e^(-dijk) <=> -log(pp) = dijk
def ap(u, S, v):
print("get ap("+str(u)+","+str(v)+")", S)
listV = []
for i in range(len(MIIA[v])):
listV.append(MIIA[v][i][0]) # listV 储存所有在MIIA(v,θ)中的节点下标
Nin = []
for i in range(len(MIIA[u])):
if MIIA[u][i][0] in listV: # MIIA[u][i][0]、Nin[i][0]对应算法2 line6的 w
Nin.append(MIIA[u][i]) # Nin 储存的是既在MIIA(v,θ)又在MIIA(u,θ)中的节点w 的 (节点下标, dijk(w,u))二元组
if u in S:
return 1
elif Nin == []:
return 0
else:
pun = 1
for i in range(len(Nin)): #
# pun = pun * (1 - ap(Nin[i][0], S, v) * Nin[i][1]) ##CUN YI !!! ap(Nin[i][0], S, v)
pun = pun * (1 - ap(Nin[i][0], S, v) * math.pow(math.e, -Nin[i][1])) # pp = e^(-dijk) <=> -log(pp) = dijk
print("pp("+str(Nin[i][0])+","+str(u)+")")
return 1 - pun
def Load_data_IC(network_file):
global NETWORK, nodetable, Vnumber, Enumber, MIIA, MIOA
file = open(network_file, "r", encoding="utf-8")
Vnumber, Enumber = file.readline().split()
Vnumber = int(Vnumber)
Enumber = int(Enumber)
nodetable = [[MAX for j in range(Vnumber)] for j in range(Vnumber)]
# (weight, dijk)
for i in range(0, Vnumber):
NETWORK.append([])
MIIA.append([])
MIOA.append([])
for i in range(0, Enumber):
start, end, weight = file.readline().split()
# start = int(start)
# end = int(end)
start = int(start)-1 # 数据集中节点id从1开始 故此处减1 对应着list的下标从0开始 节点id=节点下标+1
end = int(end)-1 # 数据集中节点id从1开始 故此处减1 对应着list的下标从0开始 节点id=节点下标+1
weight = float(weight)
NETWORK[start].append([end, weight])
# nodetable[start][end] = math.log(weight)
nodetable[start][end] = -math.log(weight) # 根据MIA论文 应对权重取-loge 这样最短路径距离之和最小<=>pp值最大
return nodetable
# 行数 = v
# 每一行[:],[end, weight]
def getdijk_IC():
global dijk
for i in range(0, Vnumber):
##print(i)
dijk.append(Dijkstra(nodetable, i))
return dijk
def Dijkstra(table, node):
final = [0] * (Vnumber)
distance = [0] * (Vnumber)
path = [0] * (Vnumber)
print("node:" + str(node),"-"*15)
for i in range(0, Vnumber):
distance[i] = table[node][i]
if distance[i] != MAX:
path[i] = node
else:
path[i] = MAX
print("distance:", distance)
print("path:", path)
final[node] = 1
path[node] = node
print("final:", final)
print("path:", path)
k = 0
for i in range(0, Vnumber):
min = MAX
for j in range(0, Vnumber):
if distance[j] < min and final[j] == 0:
min = distance[j]
k = j
final[k] = True
for j in range(0, Vnumber):
if (distance[j] > min + table[k][j]) and final[j] == 0:
distance[j] = min + table[k][j]
path[j] = k
distance[node] = 0
print("*"*17+"final distance of",node,":", distance)
return distance
def getMMM(theta):
global MIIA, MIOA
for i in range(Vnumber):
for j in range(Vnumber):
# if dijk[i][j] > 1 or i == j:
if dijk[i][j] == MAX or i == j: # 不可达 或者 是自己,则不加入自己的MIIA或MIOA领域内
continue
# elif dijk[i][j] >= theta:
elif dijk[i][j] <= -math.log(theta): # dijk = -log(pp), pp<=theta <=> -log(pp)>=-log(theta)
MIIA[j].append((i, dijk[i][j]))
MIOA[i].append((j, dijk[i][j]))
print("MIIA:", MIIA)
print("MIOA:", MIOA)
if __name__ == '__main__':
# python IMP.py –i network.txt -k 5 -m IC -t 60
time_start = time.time()
#network_file = sys.argv[2]
#seed_size = int(sys.argv[4])
#diffusionmodel = sys.argv[6]
#TIME = int(sys.argv[8])
# Amazon2 DBLP2 network network5 testNetwork
# seeds seeds0 seeds2种子文件 Amazon DBLP只有两列
network_file = "F:/py/Network.txt" # network testNetwork
nodetable = Load_data_IC(network_file)
print(nodetable)
print("begindijk", time.time() - time_start)
dijk = getdijk_IC()
getMMM(1.928749847963923e-22) # 1.928749847963923e-22是e^-50 0.05
# print(MIOA[10]) # 打印节点11的最大影响出度节点及其最短路径距离
print("beginmia", time.time() - time_start)
S = MIA(5)
time_end = time.time()
print(S)
print(time_end - time_start)
#[33, 51, 60, 46, 53]
# 改Load_data_IC [26, 27, 30, 42, 46]
# 改getMMM [32, 43, 58, 60, 29]