这是一个程序,可以统计图谱中不同类别之间互相链接的频次,简而言之就是,图谱中存在着很多结点,结点都属于它们各自的类别,我们通过深度遍历整个图谱,将类别与类别之间的频次统计出来,最后的结果是一个dict,它的键值是每一种类别,结果示意如下:
{
'label': {
'in': {
'label_?': 0
}
'out': {
'label_?': 123
}
}
}
in 和 out 代表的是入边和出边,完整的程序如下所示:
def freq_of_labels(graph, matcher):
label_link_count = {} # 初始化标签链接频次字典
visited = set() # 初始化已访问节点集合
# 定义遍历函数
def traverse(graph, node, depth):
'''深度优先遍历'''
# 检查节点是否已被访问过
if node not in visited:
visited.add(node)
# 寻找当前节点的所有关系
relationships = graph.match((matcher.get(node), None), r_type=None)
# 获取节点标签(目前不考虑多标签)
node = graph.nodes.get(node)
label = list(node.labels)[0]
# 遍历节点的所有关系
for rel in relationships:
# 获取关系的终点节点和终点节点的标签
end_node = rel.end_node
end_label = list(end_node.labels)[0]
rel_name = list(rel.types())[0]
# 更新标签链接频次字典(出边)
if not label_link_count.get(label):
label_link_count[label] = {"in": {}, "out": {}}
if not label_link_count[label]["out"].get(rel_name):
label_link_count[label]['out'][rel_name] = {}
if not label_link_count[label]["out"][rel_name].get(end_label):
label_link_count[label]["out"][rel_name][end_label] = 0
label_link_count[label]["out"][rel_name][end_label] += 1
# 更新标签链接频次字典(入边)
if not label_link_count.get(end_label):
label_link_count[end_label] = {"in": {}, "out": {}}
if not label_link_count[label]["in"].get(rel_name):
label_link_count[end_label]['in'][rel_name] = {}
if not label_link_count[end_label]["in"][rel_name].get(label):
label_link_count[end_label]["in"][rel_name][label] = 0
label_link_count[end_label]["in"][rel_name][label] += 1
# 递归遍历终点节点
if depth > 0:
traverse(graph, end_node.identity, depth-1)
# 遍历整个图谱
for node in graph.nodes:
traverse(graph, node, depth=3)
# 返回节点标签链接频次字典
return label_link_count
这里的matcher指的是NodeMatcher:
from py2neo import Graph, NodeMatcher
graph = Graph('http://localhost:7474', auth=('neo4j', 'neo4j'))
matcher = NodeMatcher(graph)