用python复现一下论文Influence maximization in social networks based on TOPSIS 中的方法。
自己的一些理解,由于有点菜,可能不太正确和严谨。
TOPSIS是用于选择多种解决路径中最优的一种,在论文中用于选择种子节点。影响元素包括直接影响、间接影响、直接重叠、间接重叠。对于四者权重的分配,目前还没构思好。暂且用【0.3, 0.2, 0.3, 0.2】。
代码
TOPSIS的实现
import numpy as np
def topsis(data1, weight=None, n=0):
# 二维数组的矩阵转换成pandas中的数据类型dataframe
# data2 = pd.DataFrame(data1, index=[i for i in range(1, n+1)], columns=['DS', 'IDS', 'DO', 'IDO'])
# 归一化
data = data1 / np.sqrt((data1 ** 2).sum())
# 最优最劣方案
# Z是正理想解和负理想解矩阵
Z = pd.DataFrame([data.min(), data.max()], index=['负理想解', '正理想解'])
# 距离
#weight = entropyWeight(data) if weight is None else np.array(weight)
Result = data1.copy()
Result['正理想解'] = np.sqrt(((data - Z.loc['正理想解']) ** 2 * weight).sum(axis=1))
Result['负理想解'] = np.sqrt(((data - Z.loc['负理想解']) ** 2 * weight).sum(axis=1))
# 综合得分
Result['综合得分'] = Result['负理想解'] / (Result['负理想解'] + Result['正理想解'])
Result['排序'] = Result.rank(ascending=False, method='first')['综合得分']
print(Result)
return Result[(Result['排序'] == 1.0)].index.tolist()
代码
TOPSIS_graph 在图中用于寻找种子节点
import pandas as pd
import get_G
import DS_IDS
import TOPSIS
import datetime
start_time = datetime.datetime.now()
graph = get_G.read_graph_from_file('dblpcomunity.txt')
no = graph.number_of_nodes()
degree = get_G.get_degree(graph)[0]
IDS = DS_IDS.IDS(graph)
seed = []
# 把度最大的节点加入种子集合
u = max(degree, key=degree.get) # u 是节点16
seed.append(u)
weight = [0.3, 0.2, 0.3, 0.2]
# A矩阵n(21)*4大小
A = np.zeros((no, 4))
for i in range(no):
A[i][0] = degree[i+1]
A[i][1] = IDS[i+1]
# 二维数组的矩阵转换成pandas中的数据类型 dataframe
A = pd.DataFrame(A, index=[i for i in range(1, no+1)], columns=['DS', 'IDS', 'DO', 'IDO'])
k = 20
gg = []
for i in range(k-1):
A = A.drop([u], axis=0)
wi = [] # 在计算IDO时,种子节点u与其邻居的邻居节点w之间存在多条路径,导致会计算多次两点的共同邻居。所以如果w已经计算过,就不用在计算。
for v in graph.neighbors(u): # u=16的邻居
if v not in seed:
A['DO'][v] -= 1
for w in graph.neighbors(v):
Nu = []
Nw = []
if w not in seed and w not in wi:
wi.append(w)
for n in graph.neighbors(w):
Nw.append(n)
for a in graph.neighbors(u):
Nu.append(a)
c = [x for x in Nu if x in Nw]
A['IDO'][w] -= len(c)
# print(Nu)
# print(wi)
# print(Nw)
# print()
u = TOPSIS.topsis(A, weight, no)[0]
# print(A)
# print(u)
seed.append(u)
print(seed)
end_time = datetime.datetime.now()
print("运行时间是 %s" % (end_time - start_time))
代码
DS-IDS计算直接和间接影响
import math
# 图中所有节点的间接影响扩散值的字典
def IDS(G):
degree, degree1, degree2 = get_G.get_degree(G)
IDS = {}
for v in G.nodes():
rv = degree2[v] / max(degree2.values())
E1 = 0
E2 = 0
for u in G.neighbors(v):
a = degree[u] / degree1[v]
b = degree1[u] / degree2[v]
s1 = a * math.log(a)
s2 = b * math.log(b)
E1 = E1 - s1
E2 = E2 - s2
IDSv = E1 + rv * E2
IDS[v] = IDSv
return IDS
代码
get_graph从文件读入图
def read_graph_from_file(path):
"""
:param path:从文件中读取图结构
:return:Graph graph
"""
# 定义图
graph = nx.Graph()
# 获取边列表edges_list
edges_list = []
# 开始获取边
fp = open(path)
edge = fp.readline().split() # split()返回字符串列表
while edge: # 每条边
if edge[0].isdigit() and edge[1].isdigit(): # isdigit() 方法检测字符串是否只由数字组成
edges_list.append((int(edge[0]), int(edge[1]))) # 边列表edges_list为列表中元素为元组的形式
edge = fp.readline().split() # 下一条边
fp.close()
# 为图增加边
graph.add_edges_from(edges_list)
# # 构造邻接矩阵
# a = graph.number_of_nodes()
# adj = np.zeros((a, a))
# for i in range(a):
# for j in range(a):
# if (i, j) in edges_list:
# adj[i][j] = 1
return graph
# 计算所有节点的度(出度):用来衡量节点自身的影响力。有向图就用出度,无向图就用度,这里先看做无向图
def get_degree(G):
# 各个节点和它的度构成的字典
degree = {}
# 各个节点和它的邻居的度之和构成的字典
degree1 = {}
# 各个节点和它的邻居的邻居的度之和构成的字典
degree2 = {}
for node in G.nodes():
n = G.degree(node)
degree[node] = n
d1 = 0
d2 = 0
for i in G.neighbors(node):
d1 = d1 + G.degree(i)
for j in G.neighbors(i):
d2 = d2 + G.degree(j)
degree1[node] = d1
degree2[node] = d2
return degree, degree1, degree2
结果