开始代码:
from neo4j import GraphDatabase
from collections import Counter
# driver = GraphDatabase.driver("neo4j://localhost:7687", auth=("neo4j", "password"))
driver = GraphDatabase.driver("bolt://172.18.116.129:8687", auth=("neo4j", "neo4j123"))
# 读入并处理文件数据(分成四组)
def read_from_file(file_name):
hnewsl=[]
with open(file_name,"r",encoding ='utf-8') as f:
for line in f:
hnewsl.append(line.split('、'))
all_list = hnewsl[0]
return all_list
if __name__ == '__main__':
file_name = r'C:\Users\43512\Desktop\test(2).txt'
all_list = read_from_file(file_name)
group1 = all_list[0:10]
group2 = all_list[10:50]
group3 = all_list[50:200]
group4 = all_list[200:1000]
# print('------------第一组------------')
entity_dict = {}
for i in group1:
if i not in entity_dict:
entity_dict[i] = 1
else:
entity_dict[i] += 1
# print('全部实体的频次:',entity_dict)
ls = list(entity_dict.items())
sum = 0
for i in range(len(list(entity_dict.keys()))):
entity_list = ls[i]
sum = sum + entity_list[1]
dict1 = {}
for i in range(len(list(entity_dict.keys()))):
# print("第%d个实体:" % (i + 1), ls[i])
entity_listlist = ls[i]
entity_weight = round(0.7 * 0.4 * (entity_list[1] / sum), 5)
# print('第{}个实体"{}"的权重为:{}'.format(i + 1, entity_list[0], entity_weight))
dict1.update({entity_list[0] : entity_weight})
# print(dict1)
# print('------------第二组------------')
entity_dict = {}
for i in group2:
if i not in entity_dict:
entity_dict[i] = 1
else:
entity_dict[i] += 1
# print('全部实体的频次:', entity_dict)
ls = list(entity_dict.items())
sum = 0
for i in range(len(list(entity_dict.keys()))):
entity_list = ls[i]
sum = sum + entity_list[1]
for i in range(len(list(entity_dict.keys()))):
# print("第%d个实体:" % (i + 1), ls[i])
entity_list = ls[i]
entity_weight = round(0.7 * 0.3 * (entity_list[1] / sum), 5)
# print('第{}个实体"{}"的权重为:{}'.format(i + 1, entity_list[0], entity_weight))
dict1.update({entity_list[0]: entity_weight})
# print(dict1)
# print('------------第三组------------')
entity_dict = {}
for i in group3:
if i not in entity_dict:
entity_dict[i] = 1
else:
entity_dict[i] += 1
# print('全部实体的频次:', entity_dict)
ls = list(entity_dict.items())
sum = 0
for i in range(len(list(entity_dict.keys()))):
entity_list = ls[i]
sum = sum + entity_list[1]
for i in range(len(list(entity_dict.keys()))):
# print("第%d个实体:" % (i + 1), ls[i])
entity_list = ls[i]
entity_weight = round(0.7 * 0.2 * (entity_list[1] / sum), 5)
# print('第{}个实体"{}"的权重为:{}'.format(i + 1, entity_list[0], entity_weight))
dict1.update({entity_list[0]: entity_weight})
# print(dict1)
# print('------------第四组------------')
entity_dict = {}
for i in group4:
if i not in entity_dict:
entity_dict[i] = 1
else:
entity_dict[i] += 1
# print('全部实体的频次:', entity_dict)
ls = list(entity_dict.items())
sum = 0
for i in range(len(list(entity_dict.keys()))):
entity_list = ls[i]
sum = sum + entity_list[1]
for i in range(len(list(entity_dict.keys()))):
# print("第%d个实体:" % (i + 1), ls[i])
entity_list = ls[i]
entity_weight=round(0.7 * 0.1 * (entity_list[1] / sum), 5)
# print(entity_list[1],sum)
# print(entity_weight)
# print('第{}个实体"{}"的权重为:{}'.format(i + 1, entity_list[0], entity_weight))
# print()
dict1.update({entity_list[0]: entity_weight})
# print(dict1)
def search_node(labels):
list = []
for i in labels:
cypher_s = "MATCH (n)-[]-(p) where n.name='" + i + "' RETURN p.name"
# print(cypher_s)
with driver.session() as session:
for res in session.run(cypher_s):
result = res.get('p.name')
# print(result)
list.append(result)
return list
def search_xietong(labels):
# print('----------------------协同关系--------------------')
xietong_list = []
for i in labels:
cypher_s = "MATCH (n)-[:协同]-(p) where n.name='" + i + "' RETURN p.name"
# print(cypher_s)
with driver.session() as session:
for res in session.run(cypher_s):
result1 = res.get('p.name')
# print(result1)
xietong_list.append(result1)
# print(xietong_list)
return xietong_list
def search_buji(labels):
# print('--------------------补给关系------------------------')
buji_list = []
for i in labels:
cypher_s = "MATCH (n)-[:补给]-(p) where n.name='" + i + "' RETURN p.name"
with driver.session() as session:
for res in session.run(cypher_s):
result2 = res.get('p.name')
# print(result2)
buji_list.append(result2)
# print(buji_list)
return buji_list
labels = ['惠特贝岛号船坞登陆舰', '梅波', '约旦海军基地']
list = search_node(labels)
xietong_list = search_xietong(labels)
buji_list = search_buji(labels)
# print(len(labels))
both_list = xietong_list + buji_list
# print(both_list)
other_list = []
for i in list:
if i not in both_list:
other_list.append(i)
# print(other_list)
# print('------加权之后------------')
dict2 = {}
for i in other_list:
other_weight = 0.3 * 0.4
# print('实体"{}"的权重为:{}'.format(i,other_weight))
dict2.update({i: other_weight})
for i in both_list:
both_weight = 0.3 * 0.6
# print('实体"{}"的权重为:{}'.format(i,both_weight))
dict2.update({i: both_weight})
# print(dict2)
dict = dict(Counter(dict1) + Counter(dict2))
# print(dict)
line = sorted(dict.items(), key=lambda x: x[1], reverse=True)
print("-------------按权重大小输出结果:-------------")
for i in line:
print(i)
修改之后:
import math
from collections import Counter
from app.neo4j import graph
# 实体名称权重:70%
TAG_WEIGHT = 7
# 实体名称权重:前10条40%
TAG_G1 = 4
# 实体名称权重:10-50条30%
TAG_G2 = 3
# 实体名称权重:50-200条20%
TAG_G3 = 2
# 实体名称权重:200-1000条10%
TAG_G4 = 1
# 图谱关系权重:30%
GRAPH_WEIGHT = 3
# 协同和补给关系:60%
GRAPH_G1 = 6
# 其他关系:40%
GRAPH_G2 = 4
def calc_rate(group_list, group_index=1): #计算每个实体的权重
counter = Counter(group_list)
total = sum(counter.values())
# print(counter)
# print(total)
tag_group_weight = TAG_G1
if group_index == 1:
tag_group_weight = TAG_G1
elif group_index == 2:
tag_group_weight = TAG_G2
elif group_index == 3:
tag_group_weight = TAG_G3
elif group_index == 4:
tag_group_weight = TAG_G4
result = {key: math.ceil(value * TAG_WEIGHT * tag_group_weight / total) for key, value in counter.items()}
# print(result)
return result
def calc_group_rate(labels): #计算图谱中一层节点的每个权重
_cypher = f"MATCH ()-[r]-(n) WHERE n.name in {labels} RETURN n,type(r) as r_type"
print(_cypher)
name_list = [] #图谱中全部关系列表
other_name_list = [] #图谱中的协同和补给关系列表
with graph.session() as session:
for record in session.run(_cypher):
node = record.get("n")
relationship = record.get("r_type")
name = node.get("name")
if relationship not in ["协同", "补给"]:
other_name_list.append(name)
else:
name_list.append(name)
counter = Counter(name_list)
total = sum(counter.values())
other_counter = Counter(other_name_list)
other_total = sum(other_counter.values())
result = {key: math.ceil(value * GRAPH_WEIGHT * GRAPH_G1 / total) for key, value in counter.items()}
other_result = {key: math.ceil(value * GRAPH_WEIGHT * GRAPH_G2 / other_total) for key, value in other_counter.items()}
result = dict(Counter(result) + Counter(other_result)) #相同key的value相加,输出为字典
return result
def calc_boost(name_list=None, labels=None): #计算
if name_list is None:
name_list = []
if labels is None:
labels = []
name_group1 = name_list[0:10]
name_group2 = name_list[10:50]
name_group3 = name_list[50:200]
name_group4 = name_list[200:1000]
result = dict()
#用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据和数据下标,一般用在 for 循环当中
for index, group in enumerate([name_group1, name_group2, name_group3, name_group4]):
result = dict(Counter(result) + Counter(calc_rate(group, index + 1)))
# print(result)
result = dict(Counter(result) + Counter(calc_group_rate(labels))) #图谱的下一层节点和相同的实体进行权重相加(合并)
return result
if __name__ == '__main__':
import os
os.environ['NEO4J_HOST'] = "172.18.116.129"
os.environ['NEO4J_PORT'] = "8687"
os.environ['NEO4J_USER'] = "neo4j"
os.environ['NEO4J_PASSWORD'] = "neo4j123"
from app.neo4j import graph
_name_list = ["里根号航空母舰", "蓝岭号两栖指挥舰", "莱特湾号巡洋舰", "南海", "拉森号驱逐舰", "仁川号护卫舰", "护身符军刀演习", "B-52轰炸机", "B-52轰炸机", "P-8反潜机",
"护身符军刀演习", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机",
"P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机",
"P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "罗马尼亚海", "罗马尼亚海",
"罗马尼亚海", "罗马尼亚海", "罗马尼亚海", "罗马尼亚海", "罗马尼亚海", "罗马尼亚海", "罗马尼亚海", "罗马尼亚海", "罗马尼亚海"]
_labels = ['惠特贝岛号船坞登陆舰', '梅波', '约旦海军基地']
print(calc_boost(_name_list, _labels))