class carrier(object):
def __init__(value):
value=value
def get_driver(self): # 获取neo4j的session
driver = GraphDatabase.driver('bolt://localhost:7687', auth=('test', '11'))#本地neo4j
return driver
def run_cypher(self,cypher): # 运行cypher
driver=self.get_driver()
with driver.session() as session:
with session.begin_transaction() as tx:
run=tx.run(cypher)
return run
class jigsaw(object):
def __init__(value):
value=value
def create_index(self,label,attri):
attri='''('''+attri+''')'''
cypher=f'''CREATE INDEX ON:{label}{attri}'''
c=carrier()
tx=c.run_cypher(cypher)
return cypher
def load_node(self,source,query,label,check): # 抽取节点数据(source:数据来源类型,query:查询语句,label:节点标签)
cypher=f'''
call apoc.load.jdbc("{source}","{query}") YIELD row
merge(n:{label} {check}) set n=row
'''
#print(cypher)
c=carrier()
tx=c.run_cypher(cypher)
return cypher
def load_set_entity_and_relationship(self,source,query,label,check,set_attri,label_e,check_e,relation1,relation2):
cypher = f'''
call apoc.load.jdbc("{source}","{query}") YIELD row
merge (n:{label} {check}) set {set_attri} with*
merge(n_e:{label_e} {check_e}) with *
merge (n)-[:{relation1}]->(n_e)
merge (n_e)-[:{relation2}]->(n)
'''
c = carrier()
tx = c.run_cypher(cypher)
return cypher
def load_entity_and_2_relationship(self,source,query,label,check,label_e,check_e,relation1,relation2): # 抽取关系数据(source:数据来源类型,query:查询语句,label:节点标签,label_end:尾部节点标签,relationtype:关系类型)
cypher=f'''
call apoc.load.jdbc("{source}","{query}") YIELD row
merge (n:{label} {check}) with *
merge (n_e:{label_e} {check_e}) with *
merge (n)-[:{relation1}]->(n_e)
merge (n_e)-[:{relation2}]->(n)
'''
c=carrier()
tx=c.run_cypher(cypher)
return cypher
def load_entity_and_1_relationship(self,source,query,label,check,label_e,check_e,relation):
cypher=f'''
call apoc.load.jdbc("{source}","{query}") YIELD row
merge (n:{label} {check}) with *
merge (n_e:{label_e} {check_e}) with *
merge (n)-[:{relation}]->(n_e)
'''
c=carrier()
tx=c.run_cypher(cypher)
return cypher
def batch_entity_and_2_relationship(self,source,query,label,check,label_e,check_e,relation1,relation2):
batch='''{batchsize:10000,parallel:false,iteratelist:true}'''
cypher=f'''
CALL apoc.periodic.iterate(
'call apoc.load.jdbc("{source}",characterEncoding=utf-8,"{query}") YIELD row'
,'merge (n:{label} {check}) with *
merge (n_e:{label_e} {check_e}) with *
merge (n)-[:{relation1}]->(n_e)
merge (n_e)-[:{relation2}]->(n)'
,{batch}
)
'''
c=carrier()
tx=c.run_cypher(cypher)
return cypher
def load_use_unwind_entity_and_2_relationship(self, source, query, label,check, check_split,label_e, check_e,relation1, relation2):
cypher = f'''
call apoc.load.jdbc("{source}","{query}") YIELD row
UNWIND split({check}, ',') AS filename
merge (n:{label} {check_split}) with *
merge (n_e:{label_e} {check_e}) with *
merge (n)-[r:{relation1}]->(n_e)
merge (n_e)-[:{relation2}]->(n)
'''
c = carrier()
tx = c.run_cypher(cypher)
return cypher
def batch_use_unwind_entity_and_2_relationship(self, source, query, label,check, check_split,label_e, check_e,relation1, relation2):
batch = '''{batchsize:10000,parallel:false,iteratelist:true}'''
cypher = f'''
CALL apoc.periodic.iterate(
'call apoc.load.jdbc("{source}","{query}") YIELD row',
'UNWIND split({check}, ",") AS filename
merge (n:{label} {check_split}) with *
merge (n_e:{label_e} {check_e}) with *
merge (n)-[r:{relation1}]->(n_e)
merge (n_e)-[:{relation2}]->(n)'
,{batch}
)
'''
c = carrier()
tx = c.run_cypher(cypher)
return cypher
def main():
l=jigsaw()
source = "jdbc:postgresql://10.100.200.191:5433/postdb1?user=postuser1&password=postuser123"
table_info = '''select * from pmart.dm_skill_file_info'''
l.load_entity_and_2_relationship(source,table_info,'''文檔''','''{name:row.attachment}''','''員工''','''{工號:row.author_no}''','''作者是''','''上傳的文檔有''')
starttime = time.time()
endtime = time.time()
print(endtime - starttime)
if __name__=='''__main__''':
main()