来源:https://towardsdatascience.com/neo4j-cypher-python-7a919a372be7
有一个问题,如何用python包去调用neo4j的查询及算法包呢?
从本质上来思考问题,python只需要与Noe4j的接口打通就行了,将所有的执行命令传入给neo4j就行。
neo4j要启动才行,bin/neo4j start
session = self.__driver.session(database=db) if db is not None else self.__driver.session()
response = list(session.run(query))
只要将query语句传递过去就行了,调用的命令就一个session.run。
from neo4j import GraphDatabase
class Neo4jConnection:
def __init__(self, uri, user, pwd):
self.__uri = uri
self.__user = user
self.__pwd = pwd
self.__driver = None
try:
self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
except Exception as e:
print("Failed to create the driver:", e)
def close(self):
if self.__driver is not None:
self.__driver.close()
def query(self, query, db=None):
assert self.__driver is not None, "Driver not initialized!"
session = None
response = None
try:
session = self.__driver.session(database=db) if db is not None else self.__driver.session()
response = list(session.run(query))
except Exception as e:
print("Query failed:", e)
finally:
if session is not None:
session.close()
return response
还可以直接将整个csv文档导入到neo4j数据库,前面还担心用Python读取csv文档,然后再存储到数据库中,速度 会日常慢,相比较于直接导入csv文档会慢很多,随着数据增多,差异会越来越明显。
query_string = '''
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS FROM
'https://raw.githubusercontent.com/ngshya/datasets/master/cora/cora_content.csv'
AS line FIELDTERMINATOR ','
CREATE (:Paper {id: line.paper_id, class: line.label})
'''
conn.query(query_string, db='coradb')
调用算法包
query_string = '''
CALL gds.graph.create(
'coraGraph',
'Paper',
'CITES'
)
'''
conn.query(query_string, db='coradb')
query_string = '''
CALL gds.pageRank.write('coraGraph', {
writeProperty: 'pagerank'
})
YIELD nodePropertiesWritten, ranIterations
'''
conn.query(query_string, db='coradb')
query_string = '''
CALL gds.betweenness.write('coraGraph', {
writeProperty: 'betweenness' })
YIELD minimumScore, maximumScore, scoreSum, nodePropertiesWritten
'''
conn.query(query_string, db='coradb')
from pandas import DataFrame
query_string = '''
MATCH (p:Paper)
RETURN DISTINCT p.id, p.class, p.pagerank, p.betweenness
'''
dtf_data = DataFrame([dict(_) for _ in conn.query(query_string, db='coradb')])
dtf_data.sample(10)