知识图谱构建与查询-CSDN博客

本文链接：https://blog.csdn.net/weifuliu/article/details/104720238

概要
知识图谱是近些年比较热门的一项技术，能够应用于人工智能的多个领域。笔者近期项目是有关知识图谱项目的，也是第一次接触，特将自己学习的内容做个分析。
我们知道原始知识图谱数据集一般是一些rdf、ttl、owl、json等格式的数据。在存储知识图谱前我们需要将有用的内容从中抽取出来。查询语句使用SPARQL。

自己构建一个简单的RDF文件

import rdflib

def creat():
    g = rdflib.Graph()
    has_border_with = rdflib.URIRef('http://www.example.org/has_border_with')
    located_in = rdflib.URIRef('http://www.example.org/located_in')
    
    germany = rdflib.URIRef('http://www.example.org/germany')
    france = rdflib.URIRef('http://www.example.org/france')
    mongolia = rdflib.URIRef('http://www.example.org/mongolia')
    china = rdflib.URIRef('http://www.example.org/china')

    europa = rdflib.URIRef('http://example.org/europa')
    asia = rdflib.URIRef('http://example.org/asia')

    g.add((germany,has_border_with,france))
    g.add((china, has_border_with, mongolia))
    g.add((germany, located_in, europa))
    g.add((france, located_in, europa))
    g.add((china, located_in, asia))
    g.add((mongolia, located_in, asia))

    g.serialize("g.rdf")


def query():
    g = rdflib.Graph()
    g.parse("g.rdf",format='xml')
    

if __name__ == "__main__":
    creat()

RDF文件简单查询
RDF里的内容都是一些有一定语义的三元组。我们可以通过SPARQL抽取出其中的主语、谓语、宾语。

def query():
    g = rdflib.Graph()
    g.parse("g.rdf", format="xml")
    ################################
    # <a,?,?>
    q = "select ?relation ?part where { ?s ?relation ?part}"
    x = g.query(q)
    t = list(x) ##### 二维
    # print(t[0][0])
    # http://www.example.org/has_border_with
    # print(t[0][1])
    # http://www.example.org/part1
    print(len(t))  #没有，则=0
    print(t[0])
    # <?,b,?>
    q = "select ?country ?part where {?country <http://www.example.org/located_in> ?part}"
    x = g.query(q)
    t = list(x)
    print(len(t))
    print(t[0])
    # <?,?,c>
    q = "select ?country ?relation where {?country ?relation ?part}"
    x = g.query(q)
    t = list(x)
    print(len(t))
    print(t[0])
    ################################
    # <a,b,?>
    q = "select ?part where {<http://www.example.org/China> <http://www.example.org/located_in> ?part}"
    x = g.query(q)
    t = list(x) ######二维: n*1
    print(len(t))
    # print(t[0][0])
    # http://www.example.org/part1
    print(t[0])
    # <a,?,c>
    # <?,a,b>

if __name__ == "__main__":
    query()

Turtle文件读取

import rdflib


g = rdflib.Graph()
g.parse('163musichyhot.ttl',format='ttl')
# 无条件查询
q = "select ?s ?p ?o where{?s ?p ?o}"
x = g.query(q)
print(len(x))
t = list(x)
for i in t:
    print("*******一条陈述******")
    print("主语：",i[0])
    print('谓语：',i[1])
    print('宾语: ',i[2])