有关新闻推荐

开始代码:

from neo4j import GraphDatabase
from collections import Counter
# driver = GraphDatabase.driver("neo4j://localhost:7687", auth=("neo4j", "password"))
driver = GraphDatabase.driver("bolt://172.18.116.129:8687", auth=("neo4j", "neo4j123"))


# 读入并处理文件数据(分成四组)
def read_from_file(file_name):
    hnewsl=[]
    with open(file_name,"r",encoding ='utf-8') as f:
        for line in f:
            hnewsl.append(line.split('、'))
            all_list = hnewsl[0]
    return all_list

if __name__ == '__main__':
    file_name = r'C:\Users\43512\Desktop\test(2).txt'
    all_list = read_from_file(file_name)

    group1 = all_list[0:10]
    group2 = all_list[10:50]
    group3 = all_list[50:200]
    group4 = all_list[200:1000]

    # print('------------第一组------------')
    entity_dict = {}
    for i in group1:
        if i not in entity_dict:
            entity_dict[i] = 1
        else:
            entity_dict[i] += 1
    # print('全部实体的频次:',entity_dict)
    ls = list(entity_dict.items())
    sum = 0
    for i in range(len(list(entity_dict.keys()))):
        entity_list = ls[i]
        sum = sum + entity_list[1]

    dict1 = {}
    for i in range(len(list(entity_dict.keys()))):
        # print("第%d个实体:" % (i + 1), ls[i])
        entity_listlist = ls[i]
        entity_weight = round(0.7 * 0.4 * (entity_list[1] / sum), 5)
        # print('第{}个实体"{}"的权重为:{}'.format(i + 1, entity_list[0], entity_weight))
        dict1.update({entity_list[0] : entity_weight})
    # print(dict1)


    # print('------------第二组------------')
    entity_dict = {}
    for i in group2:
        if i not in entity_dict:
            entity_dict[i] = 1
        else:
            entity_dict[i] += 1
    # print('全部实体的频次:', entity_dict)
    ls = list(entity_dict.items())
    sum = 0
    for i in range(len(list(entity_dict.keys()))):
        entity_list = ls[i]
        sum = sum + entity_list[1]
    for i in range(len(list(entity_dict.keys()))):
        # print("第%d个实体:" % (i + 1), ls[i])
        entity_list = ls[i]
        entity_weight = round(0.7 * 0.3 * (entity_list[1] / sum), 5)
        # print('第{}个实体"{}"的权重为:{}'.format(i + 1, entity_list[0], entity_weight))
        dict1.update({entity_list[0]: entity_weight})
    # print(dict1)


    # print('------------第三组------------')
    entity_dict = {}
    for i in group3:
        if i not in entity_dict:
            entity_dict[i] = 1
        else:
            entity_dict[i] += 1
    # print('全部实体的频次:', entity_dict)
    ls = list(entity_dict.items())
    sum = 0
    for i in range(len(list(entity_dict.keys()))):
        entity_list = ls[i]
        sum = sum + entity_list[1]
    for i in range(len(list(entity_dict.keys()))):
        # print("第%d个实体:" % (i + 1), ls[i])
        entity_list = ls[i]
        entity_weight = round(0.7 * 0.2 * (entity_list[1] / sum), 5)
        # print('第{}个实体"{}"的权重为:{}'.format(i + 1, entity_list[0], entity_weight))
        dict1.update({entity_list[0]: entity_weight})
    # print(dict1)


    # print('------------第四组------------')
    entity_dict = {}
    for i in group4:
        if i not in entity_dict:
            entity_dict[i] = 1
        else:
            entity_dict[i] += 1
    # print('全部实体的频次:', entity_dict)
    ls = list(entity_dict.items())
    sum = 0
    for i in range(len(list(entity_dict.keys()))):
        entity_list = ls[i]
        sum = sum + entity_list[1]
    for i in range(len(list(entity_dict.keys()))):
        # print("第%d个实体:" % (i + 1), ls[i])
        entity_list = ls[i]
        entity_weight=round(0.7 * 0.1 * (entity_list[1] / sum), 5)
        # print(entity_list[1],sum)
        # print(entity_weight)
        # print('第{}个实体"{}"的权重为:{}'.format(i + 1, entity_list[0], entity_weight))
    # print()
        dict1.update({entity_list[0]: entity_weight})
    # print(dict1)


    def search_node(labels):
        list = []
        for i in labels:
            cypher_s = "MATCH (n)-[]-(p) where n.name='" + i + "' RETURN p.name"
            # print(cypher_s)
            with driver.session() as session:
                for res in session.run(cypher_s):
                    result = res.get('p.name')
                    # print(result)
                    list.append(result)
        return list


    def search_xietong(labels):
        # print('----------------------协同关系--------------------')
        xietong_list = []
        for i in labels:
            cypher_s = "MATCH (n)-[:协同]-(p) where n.name='" + i + "' RETURN p.name"
            # print(cypher_s)
            with driver.session() as session:
                for res in session.run(cypher_s):
                    result1 = res.get('p.name')
                    # print(result1)
                    xietong_list.append(result1)
        # print(xietong_list)
        return xietong_list


    def search_buji(labels):
        # print('--------------------补给关系------------------------')
        buji_list = []
        for i in labels:
            cypher_s = "MATCH (n)-[:补给]-(p) where n.name='" + i + "' RETURN p.name"
            with driver.session() as session:
                for res in session.run(cypher_s):
                    result2 = res.get('p.name')
                    # print(result2)
                    buji_list.append(result2)
        # print(buji_list)
        return buji_list


    labels = ['惠特贝岛号船坞登陆舰', '梅波', '约旦海军基地']
    list = search_node(labels)
    xietong_list = search_xietong(labels)
    buji_list = search_buji(labels)
    # print(len(labels))
    both_list = xietong_list + buji_list
    # print(both_list)
    other_list = []

    for i in list:
        if i not in both_list:
            other_list.append(i)
    # print(other_list)

    # print('------加权之后------------')
    dict2 = {}
    for i in other_list:
        other_weight = 0.3 * 0.4
        # print('实体"{}"的权重为:{}'.format(i,other_weight))
        dict2.update({i: other_weight})


    for i in both_list:
        both_weight = 0.3 * 0.6
        # print('实体"{}"的权重为:{}'.format(i,both_weight))
        dict2.update({i: both_weight})
    # print(dict2)


    dict = dict(Counter(dict1) + Counter(dict2))
    # print(dict)
    line = sorted(dict.items(), key=lambda x: x[1], reverse=True)
    print("-------------按权重大小输出结果:-------------")
    for i in line:
        print(i)

修改之后:

import math
from collections import Counter
from app.neo4j import graph

# 实体名称权重:70%
TAG_WEIGHT = 7
# 实体名称权重:前10条40%
TAG_G1 = 4
# 实体名称权重:10-50条30%
TAG_G2 = 3
# 实体名称权重:50-200条20%
TAG_G3 = 2
# 实体名称权重:200-1000条10%
TAG_G4 = 1
# 图谱关系权重:30%
GRAPH_WEIGHT = 3
# 协同和补给关系:60%
GRAPH_G1 = 6
# 其他关系:40%
GRAPH_G2 = 4


def calc_rate(group_list, group_index=1):  #计算每个实体的权重
    counter = Counter(group_list)
    total = sum(counter.values())
    # print(counter)
    # print(total)
    tag_group_weight = TAG_G1
    if group_index == 1:
        tag_group_weight = TAG_G1
    elif group_index == 2:
        tag_group_weight = TAG_G2
    elif group_index == 3:
        tag_group_weight = TAG_G3
    elif group_index == 4:
        tag_group_weight = TAG_G4
    result = {key: math.ceil(value * TAG_WEIGHT * tag_group_weight / total) for key, value in counter.items()}
    # print(result)
    return result


def calc_group_rate(labels): #计算图谱中一层节点的每个权重
    _cypher = f"MATCH ()-[r]-(n) WHERE n.name in {labels} RETURN n,type(r) as r_type"
    print(_cypher)
    name_list = [] #图谱中全部关系列表
    other_name_list = [] #图谱中的协同和补给关系列表
    with graph.session() as session:
        for record in session.run(_cypher):
            node = record.get("n")
            relationship = record.get("r_type")
            name = node.get("name")
            if relationship not in ["协同", "补给"]:
                other_name_list.append(name)
            else:
                name_list.append(name)
    counter = Counter(name_list)
    total = sum(counter.values())
    other_counter = Counter(other_name_list)
    other_total = sum(other_counter.values())
    result = {key: math.ceil(value * GRAPH_WEIGHT * GRAPH_G1 / total) for key, value in counter.items()}
    other_result = {key: math.ceil(value * GRAPH_WEIGHT * GRAPH_G2 / other_total) for key, value in other_counter.items()}

    result = dict(Counter(result) + Counter(other_result)) #相同key的value相加,输出为字典

    return result


def calc_boost(name_list=None, labels=None): #计算
    if name_list is None:
        name_list = []
    if labels is None:
        labels = []
    name_group1 = name_list[0:10]
    name_group2 = name_list[10:50]
    name_group3 = name_list[50:200]
    name_group4 = name_list[200:1000]
    result = dict()
    #用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据和数据下标,一般用在 for 循环当中
    for index, group in enumerate([name_group1, name_group2, name_group3, name_group4]):
        result = dict(Counter(result) + Counter(calc_rate(group, index + 1)))
        # print(result)

    result = dict(Counter(result) + Counter(calc_group_rate(labels))) #图谱的下一层节点和相同的实体进行权重相加(合并)

    return result

if __name__ == '__main__':
    import os

    os.environ['NEO4J_HOST'] = "172.18.116.129"
    os.environ['NEO4J_PORT'] = "8687"
    os.environ['NEO4J_USER'] = "neo4j"
    os.environ['NEO4J_PASSWORD'] = "neo4j123"
    from app.neo4j import graph

    _name_list = ["里根号航空母舰", "蓝岭号两栖指挥舰", "莱特湾号巡洋舰", "南海", "拉森号驱逐舰", "仁川号护卫舰", "护身符军刀演习", "B-52轰炸机", "B-52轰炸机", "P-8反潜机",
                  "护身符军刀演习", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机",
                  "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机",
                  "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "P-8反潜机", "罗马尼亚海", "罗马尼亚海",
                  "罗马尼亚海", "罗马尼亚海", "罗马尼亚海", "罗马尼亚海", "罗马尼亚海", "罗马尼亚海", "罗马尼亚海", "罗马尼亚海", "罗马尼亚海"]
    _labels = ['惠特贝岛号船坞登陆舰', '梅波', '约旦海军基地']
    print(calc_boost(_name_list, _labels))

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值