CELF
数据集:facebook
# -*- coding:utf8 -*-
import numpy as np
import time
from collections import Counter
from igraph import *
import random
import copy
import matplotlib.pyplot as plt
from random import uniform, seed
import pandas as pd
import time
from collections import Counter
def generate_p(num):
p = np.random.rand(num)
return p
def IC(g, S, mc):
print('-----v', S)
spread = []
for i in range(mc):
new_nodes, A = S[:], S[:]
while new_nodes != []:
node_out = g.loc[g['source'].isin(new_nodes)]
node_out = node_out['target']
node_out = node_out.tolist()
success = np.random.uniform(0, 1, len(node_out)) < generate_p(len(node_out))
new_ones = list(np.extract(success, np.array(node_out)))
new_nodes = list(set(new_ones) - set(A))
print('new nodes', new_nodes)
A += new_nodes
spread.append(len(A))
print(spread)
return np.mean(spread)
def CELF(g, k, mc):
g_v = g['source'].tolist()
g_v = list(set(g_v))
v_count = len(g_v)
S, SPREAD, timelapse, start_time = [], [], [], time.time()
# margin_gain = [ IC(g, [v], mc) for v in g_v ]
# look_up = dict(zip(g_v, margin_gain))
look_up = look
look_up = dict(sorted(look_up.items(), key = lambda x:(x[1]), reverse=True))
print('lookup', look_up)
s = list(look_up.keys())[0] # 第一个加入的种子节点
S.append(s) # 将第一个种子节点加入种子集
spread = list(look_up.values())[0] # 第一个种子的影响力传播值
print('spread', spread)
SPREAD.append(list(look_up.values())[0]) # 将第一个种子的影响力传播加入影响力传播表
spread_max, node_max = -1, -1
for s_index in range(k-1):
flag = 0
for i_index, i in enumerate(look_up.keys()):
print('i_index, i', i_index, i)
print(look_up)
if i not in S:
spread_i = IC(g, S + [i], mc)
spread_i_margin = spread_i - spread
spread_next_i = i_index + 1 if i_index+1 < v_count else i_index
if spread_i_margin > list(look_up.values())[spread_next_i]: # 如果节点i的Inf Spread大于上一轮lookup表中的最前面的节点的Inf Spread,直接加入种子集
S.append(i)
flag = 1
spread = spread_i
SPREAD.append(spread)
timelapse.append(time.time() - start_time)
break
elif spread_i > spread_max:
spread_max = s
node_max = i
look_up[i] = spread_i_margin
if flag == 0:
S.append(node_max)
spread = spread_max
SPREAD.append(spread)
timelapse.append(time.time() - start_time)
return (S, spread, timelapse)
data = pd.read_csv('../data/facebook1.csv')
p = generate_p(data.shape[0])
(S, spread, timelapse) = CELF(data, k=3, mc=1)
print(S, spread, timelapse)