在知识图谱工作处理中,难免会接触到图数据库,而neo4j在这个领域排名领先的厂商,因此在这里整理一些自己在工作中常见的数据处理函数。此处主要针对Python开发语言的同仁们。欢迎copy。
1. 写入数据
直接代码奉上:
import json
from neo4j import GraphDatabase
def add_symptom_info(tx, symptom_info):
"""
description: 添加症状数据
:param tx: 事务
:param symptom_info: 症状信息字典结构为{ "症状名": {"features":["aaa", "bbb"], "degrees":["ccc"], "triggers":[], "frequencies":[]}}
:return:
"""
for symptom_name, object_dict in symptom_info.items():
# 分别获取症状属性
features = object_dict.get("features") if object_dict.get("features") else []
degrees = object_dict.get("degrees") if object_dict.get("degrees") else []
triggers = object_dict.get("triggers") if object_dict.get("triggers") else []
frequencies = object_dict.get("frequencies") if object_dict.get("frequencies") else []
tx.run("MERGE (s:Symptom {name: $name, "
"features: $features, "
"degrees: $degrees, "
"triggers: $triggers, "
"frequencies: $frequencies}) ",
name=symptom_name,
features=features,
degrees=degrees,
triggers=triggers,
frequencies=frequencies)
def add_disease_info(tx, disease_info):
"""
description: 增加疾病信息
:param tx: 事务
:param disease_info: 疾病信息
"""
disease_name = disease_info.get("name")
genetic = disease_info.get("genetic")
infectivity = disease_info.get("infectivity")
# 获取当前疾病对应症状
symptoms = disease_info.get("symptoms")
# 疾病发病部位
part_list = disease_info.get("part")
# 插入疾病数据
tx.run("MERGE (a:Disease {name: $name, "
"genetic: $genetic, "
"cost: $cost, "
"infectivity: $infectivity})",
name=disease_name,
genetic=genetic,
infectivity=infectivity)
# 更新式插入发病部位并建立关系
for part in part_list:
part = part.strip()
tx.run("MATCH (d:Disease {name: $disease_name}) "
"MERGE (a:Part {name: $name}) "
"MERGE (a)<-[:发病部位]-(d)",
name=part,
disease_name=disease_name)
# 查询症状并建立关联
for symptom in symptoms:
symptom = symptom.strip()
tx.run("MATCH (d:Disease {name: $disease_name}) "
"MATCH (s:Symptom {name: $name}) "
"MERGE (s)-[:病症]-(d)",
name=symptom,
disease_name=disease_name)
def import_data_to_kg():
"""
description: 导入数据至知识图谱
"""
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("username", "password"))
with driver.session() as session:
# 增加症状信息
symptom_info_dict = json.load(open("symptom_info_list_dict.json"))
session.write_transaction(add_symptom_info, symptom_info_dict)
# 增加疾病信息
for disease_info in json.load(open("disease_info_list.json")):
session.write_transaction(add_disease_info, disease_info)
2. 查询数据
同样直接上代码:
import json
from neo4j import GraphDatabase
def load_disease_list(tx):
"""
description: 加载疾病信息
:param tx:
"""
disease_info_list = []
for record in tx.run("MATCH (d:Disease) RETURN d ORDER BY d.name"):
disease_info = record["d"]
# 疾病名称
disease_name = disease_info.get("name")
# 疾病别名
disease_alias = disease_info.get("alias")
# 发病部位
part = disease_info.get("part")
# 多发人群
crowd = disease_info.get("crowd")
# 治疗方案
therapies = disease_info.get("therapies")
# 是否具有传染性
infectivity = disease_info.get("infectivity")
# 是否遗传
genetic = disease_info.get("genetic")
disease_dict = {
"name": disease_name,
"genetic": genetic,
"therapies": therapies,
"crowd": crowd,
"part": part,
"alias": disease_alias,
"infectivity": infectivity
}
disease_info_list.append(disease_dict)
# 输出数据至json文件中
json.dump(disease_info_list,
open("disease_info_list.json", mode="w", encoding="utf8"),
ensure_ascii=False,
indent=4)
def load_node_data():
"""
description: 获取知识图谱节点数据
"""
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("username", "password"))
with driver.session() as session:
# 获取疾病信息
session.read_transaction(load_disease_list)
参考
- 官方github地址->【Neo4j Bolt Driver for Python】