Datawhale 知识图谱组队学习 之 Task 3 Neo4j图数据库导入数据

Neo4j环境配置及安装

这里可以参考网上的资料自行进行配置环境及安装

Neo4j数据导入

1. Neo4j 账号密码设置

要将数据导入 Neo4j 图数据库,首先需要 进入 build_graph.py 类中,在 类 MedicalGraph 中 的加入 本地 Neo4j 图数据库的账号和密码

2. 导入数据

python build_graph.py

3. 知识图谱展示

运行上述代码后,打开http://localhost:7474/browser/,可以看到导入数据的知识图谱

4. 主体类 MedicalGraph 介绍

class MedicalGraph:
    def __init__(self):
        pass
    
    # 读取文件,获得实体,实体关系
    def read_file(self):
        psss
    # 创建节点
    def create_node(self, label, nodes):
        pass
    # 创建疾病节点的属性
    def create_diseases_nodes(self, disease_info):
        pass
    # 创建知识图谱实体
    def create_graphNodes(self):
        pass
    # 创建实体关系边
    def create_graphRels(self):
        pass
    # 创建实体关系边
    def create_relationship(self, start_node, end_node, edges, rel_type, rel_name):
        pass

5. 主体类 MedicalGraph 中关键代码讲解

  • 获取数据路径
    cur_dir = '/'.join(os.path.abspath(__file__).split('/')[:-1])
    self.data_path = os.path.join(cur_dir, 'data/disease.csv')
  • 链接 Neo4j 图数据库
   self.graph = Graph("http://localhost:7474", username="neo4j", password="自己设定的密码")
  • 读取文件,获得实体,实体关系(代码核心)
   def read_file(self):
       """
       读取文件,获得实体,实体关系
       :return:
       """
       # cols = ["name", "alias", "part", "age", "infection", "insurance", "department", "checklist", "symptom",
       #         "complication", "treatment", "drug", "period", "rate", "money"]
       # 实体
       diseases = []  # 疾病
       aliases = []  # 别名
       symptoms = []  # 症状
       parts = []  # 部位
       departments = []  # 科室
       complications = []  # 并发症
       drugs = []  # 药品

       # 疾病的属性:age, infection, insurance, checklist, treatment, period, rate, money
       diseases_infos = []
       # 关系
       disease_to_symptom = []  # 疾病与症状关系
       disease_to_alias = []  # 疾病与别名关系
       diseases_to_part = []  # 疾病与部位关系
       disease_to_department = []  # 疾病与科室关系
       disease_to_complication = []  # 疾病与并发症关系
       disease_to_drug = []  # 疾病与药品关系

       all_data = pd.read_csv(self.data_path, encoding='gb18030').loc[:, :].values
       for data in all_data:
           disease_dict = {}  # 疾病信息
           # 疾病
           disease = str(data[0]).replace("...", " ").strip()
           disease_dict["name"] = disease
           # 别名
           line = re.sub("[,、;,.;]", " ", str(data[1])) if str(data[1]) else "未知"
           for alias in line.strip().split():
               aliases.append(alias)
               disease_to_alias.append([disease, alias])
           # 部位
           part_list = str(data[2]).strip().split() if str(data[2]) else "未知"
           for part in part_list:
               parts.append(part)
               diseases_to_part.append([disease, part])
           # 年龄
           age = str(data[3]).strip()
           disease_dict["age"] = age
           # 传染性
           infect = str(data[4]).strip()
           disease_dict["infection"] = infect
           # 医保
           insurance = str(data[5]).strip()
           disease_dict["insurance"] = insurance
           # 科室
           department_list = str(data[6]).strip().split()
           for department in department_list:
               departments.append(department)
               disease_to_department.append([disease, department])
           # 检查项
           check = str(data[7]).strip()
           disease_dict["checklist"] = check
           # 症状
           symptom_list = str(data[8]).replace("...", " ").strip().split()[:-1]
           for symptom in symptom_list:
               symptoms.append(symptom)
               disease_to_symptom.append([disease, symptom])
           # 并发症
           complication_list = str(data[9]).strip().split()[:-1] if str(data[9]) else "未知"
           for complication in complication_list:
               complications.append(complication)
               disease_to_complication.append([disease, complication])
           # 治疗方法
           treat = str(data[10]).strip()[:-4]
           disease_dict["treatment"] = treat
           # 药品
           drug_string = str(data[11]).replace("...", " ").strip()
           for drug in drug_string.split()[:-1]:
               drugs.append(drug)
               disease_to_drug.append([disease, drug])
           # 治愈周期
           period = str(data[12]).strip()
           disease_dict["period"] = period
           # 治愈率
           rate = str(data[13]).strip()
           disease_dict["rate"] = rate
           # 费用
           money = str(data[14]).strip() if str(data[14]) else "未知"
           disease_dict["money"] = money

           diseases_infos.append(disease_dict)

       return set(diseases), set(symptoms), set(aliases), set(parts), set(departments), set(complications), \
               set(drugs), disease_to_alias, disease_to_symptom, diseases_to_part, disease_to_department, \
               disease_to_complication, disease_to_drug, diseases_infos
  • 实体信息:
    - diseases 疾病
    - aliases 别名
    - symptoms 症状
    - parts 部位
    - departments 科室
    - complications 并发症
    - drugs 药品
  • 实体关系:
    - disease_to_symptom 疾病与症状关系
    - disease_to_alias 疾病与别名关系
    - diseases_to_part 疾病与部位关系
    - disease_to_department 疾病与科室关系
    - disease_to_complication 疾病与并发症关系
    - disease_to_drug 疾病与药品关系
  • disease 实体 属性信息:
    - name
    - age 年龄
    - infection 传染性
    - insurance 医保
    - checklist 检查项
    - treatment 治疗方法
    - period 治愈周期
    - rate 治愈率
    - money 费用
  • 创建知识图谱实体
    def create_graphNodes(self):
        """
        创建知识图谱实体
        :return:
        """
        disease, symptom, alias, part, department, complication, drug, rel_alias, rel_symptom, rel_part, \
        rel_department, rel_complication, rel_drug, rel_infos = self.read_file()
        self.create_diseases_nodes(rel_infos)
        self.create_node("Symptom", symptom)
        self.create_node("Alias", alias)
        self.create_node("Part", part)
        self.create_node("Department", department)
        self.create_node("Complication", complication)
        self.create_node("Drug", drug)

        return
  • 创建知识图谱关系
    def create_graphRels(self):
        disease, symptom, alias, part, department, complication, drug, rel_alias, rel_symptom, rel_part, \
        rel_department, rel_complication, rel_drug, rel_infos = self.read_file()

        self.create_relationship("Disease", "Alias", rel_alias, "ALIAS_IS", "别名")
        self.create_relationship("Disease", "Symptom", rel_symptom, "HAS_SYMPTOM", "症状")
        self.create_relationship("Disease", "Part", rel_part, "PART_IS", "发病部位")
        self.create_relationship("Disease", "Department", rel_department, "DEPARTMENT_IS", "所属科室")
        self.create_relationship("Disease", "Complication", rel_complication, "HAS_COMPLICATION", "并发症")
        self.create_relationship("Disease", "Drug", rel_drug, "HAS_DRUG", "药品")
  • 创建实体关系边
    def create_relationship(self, start_node, end_node, edges, rel_type, rel_name):
        """
        创建实体关系边
        :param start_node:
        :param end_node:
        :param edges:
        :param rel_type:
        :param rel_name:
        :return:
        """
        count = 0
        # 去重处理
        set_edges = []
        for edge in edges:
            set_edges.append('###'.join(edge))
        all = len(set(set_edges))
        for edge in set(set_edges):
            edge = edge.split('###')
            p = edge[0]
            q = edge[1]
            query = "match(p:%s),(q:%s) where p.name='%s'and q.name='%s' create (p)-[rel:%s{name:'%s'}]->(q)" % (
                start_node, end_node, p, q, rel_type, rel_name)
            try:
                self.graph.run(query)
                count += 1
                print(rel_type, count, all)
            except Exception as e:
                print(e)
        return

参考资料

  1. QASystemOnMedicalGraph
  2. Datawhale知识图谱组队学习Task
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值