6541人阅读 评论(1)

一、基本Label Propagation算法原理

Cx(t)=f(Cxi1(t),,Cxim(t),Cxi(m+1)(t1),,Cxik(t1))

Label Propagation算法的过程如下：

• 对网络中的每一节点初始化其所属社区标签，如对于节点x$x$，初始化其社区标签为Cx(0)=x$C_x\left ( 0 \right )=x$
• 设置代数t$t$
• 对于网络中的节点设置其遍历顺序和节点的集合X$X$
• 对于每一个节点xX$x\in X$，令Cx(t)=f(Cxi1(t),,Cxim(t),Cxi(m+1)(t1),,Cxik(t1))$C_x\left ( t \right )=f\left ( C_{x_{i1}}\left ( t \right ),\cdots ,C_{x_{im}}\left ( t \right ),C_{x_{i(m+1)}}\left ( t-1 \right ),\cdots ,C_{x_{ik}}\left ( t-1 \right ) \right )$
• 判断是否可以迭代结束，如果否，则设置t=t+1$t=t+1$，重新遍历。

(图片来自百度百科)

wi,j=αλi,j+βλj,i

三、实验

0   2   1
2   0   2
0   3   2
3   0   1
0   4   3
4   0   1
0   5   2
5   0   1
1   2   3
2   1   1
1   4   5
4   1   2
1   7   1
7   1   4
2   4   2
4   2   2
2   5   9
5   2   7
2   6   1
6   2   4
3   7   1
7   3   5
4   10  1
10  4   4
5   7   1
7   5   2
5   11  1
11  5   2
6   7   3
7   6   7
6   11  5
11  6   2
8   9   1
9   8   6
8   10  4
10  8   2
8   11  2
11  8   1
8   14  5
14  8   3
8   15  8
15  8   5
9   12  2
12  9   1
9   14  1
14  9   2
10  11  10
11  10  1
10  12  2
12  10  3
10  13  9
13  10  8
10  14  8
14  10  7
11  13  1
13  11  4


#####################################
# Author:zhaozhiyong
# Date:20160602
# Fun:Label Propagation
#####################################
import string

f = open(filePath)
vector_dict = {}
edge_dict_out = {}#out
edge_dict_in = {}#in

lines = line.strip().split("\t")
if lines[0] not in vector_dict:
vector_dict[lines[0]] = string.atoi(lines[0])
if lines[1] not in vector_dict:
vector_dict[lines[1]] = string.atoi(lines[1])

if lines[0] not in edge_dict_out:
edge_list = []
if len(lines) == 3:
edge_list.append(lines[1] + ":" + lines[2])
edge_dict_out[lines[0]] = edge_list
else:
edge_list = edge_dict_out[lines[0]]
if len(lines) == 3:
edge_list.append(lines[1] + ":" + lines[2])
edge_dict_out[lines[0]] = edge_list

if lines[1] not in edge_dict_in:
edge_list = []
if len(lines) == 3:
edge_list.append(lines[0] + ":" + lines[2])
edge_dict_in[lines[1]] = edge_list
else:
edge_list = edge_dict_in[lines[1]]
if len(lines) == 3:
edge_list.append(lines[0] + ":" + lines[2])
edge_dict_in[lines[1]] = edge_list

f.close()
return vector_dict, edge_dict_out, edge_dict_in

label_dict = {}
# generate the label_dict
node_id_weight = node.strip().split(":")
node_id = node_id_weight[0]
node_weight = float(node_id_weight[1])
if vector_dict[node_id] not in label_dict:
label_dict[vector_dict[node_id]] = node_weight
else:
label_dict[vector_dict[node_id]] += node_weight

# find the max label
sort_list = sorted(label_dict.items(), key = lambda d: d[1], reverse=True)

return sort_list[0][0]

def check(vector_dict, edge_dict):
#for every node
for node in vector_dict.keys():

node_label = vector_dict[node]#suject to

label_check = {}

node_id = node_id_weight[0]
node_weight = node_id_weight[1]
if vector_dict[node_id] not in label_check:
label_check[vector_dict[node_id]] = float(node_weight)
else:
label_check[vector_dict[node_id]] += float(node_weight)
#print label_check

sort_list = sorted(label_check.items(), key = lambda d: d[1], reverse=True)

if node_label == sort_list[0][0]:
continue
else:
return 0

return 1

def label_propagation(vector_dict, edge_dict_out, edge_dict_in):
#rebuild edge_dict
edge_dict = {}
for node in vector_dict.iterkeys():
out_list = edge_dict_out[node]
in_list = edge_dict_in[node]
#print "node:", node
#print "out_list:", out_list
#print "in_list:", in_list
#print "------------------------------------------------"
out_dict = {}
for out_x in out_list:
out_xs = out_x.strip().split(":")
if out_xs[0] not in out_dict:
out_dict[out_xs[0]] = float(out_xs[1])
in_dict = {}
for in_x in in_list:
in_xs = in_x.strip().split(":")
if in_xs[0] not in in_dict:
in_dict[in_xs[0]] = float(in_xs[1])
#print "out_dict:", out_dict
#print "in_dict:", in_dict
last_list = []
for x in out_dict.iterkeys():
out_x = out_dict[x]
in_x = 0.0
if x in in_dict:
in_x = in_dict.pop(x)
result = out_x + 0.5 * in_x
last_list.append(x + ":" + str(result))
if not in_dict:
for x in in_dict.iterkeys():
in_x = in_dict[x]
result = 0.5 * in_x
last_list.append(x + ":" + str(result))
#print "last_list:", last_list

if node not in edge_dict:
edge_dict[node] = last_list

#initial, let every vector belongs to a community
t = 0
#for every node in a random order
while True:
if (check(vector_dict, edge_dict) == 0):
t = t+1
print "----------------------------------------"
print "iteration: ", t
for node in vector_dict.keys():
print vector_dict
else:
break

return vector_dict

if __name__ == "__main__":
print vector_dict
print edge_dict_out
print edge_dict_in

#print "original community: ", vector_dict

vec_new = label_propagation(vector_dict, edge_dict_out, edge_dict_in)

print "---------------------------------------------------------"
print "the final result: "
for key in vec_new.keys():
print str(key) + " ---> " + str(vec_new[key])


0
0

* 以上用户言论只代表其个人观点，不代表CSDN网站的观点或立场
个人声明

101620539

博客的主要内容主要是自己的学习笔记，并结合个人的理解，供各位在学习过程中参考，若有疑问，欢迎提出；若有侵权，请告知博主删除，原创文章转载还请注明出处。

-----------------

我写的书：

购买链接：

京东-Python机器学习算法
个人资料
• 访问：1351095次
• 积分：11054
• 等级：
• 排名：第1675名
• 原创：155篇
• 转载：1篇
• 译文：1篇
• 评论：575条
博客专栏
 深度学习Deep Learning 文章：10篇 阅读：48102
 优化算法 文章：14篇 阅读：152718
 机器学习，数据挖掘算法 文章：42篇 阅读：610998
联系我
Email：zhaozhiyong1989@126.com

评论排行
最新评论