利用py2neo建立金融知识图谱(1)

数据来源

选择tushare的公募基金管理人接口,获取所需要的数据

import tushare as ts
import pandas as pd
import time
token = ''
ts.set_token(token)
pro = ts.pro_api()
df = pro.fund_company()
# df.to_csv("jijin.csv",encoding="utf_8_sig",index=None)

数据展示:

name,shortname,province,city,address,phone,office,website,chairman,manager,reg_capital,setup_date,end_date,employees,main_business,org_code,credit_code
北京广能投资基金管理有限公司,广能基金,北京,北京市,北京市朝阳区北四环中路27号院5号楼2712-2715A,,北京市朝阳区北四环中路27号院5号楼2712-2715A,www.gnfund.cn,刘锡潜,杨运成,10000.0,20111031,,10.0,,584419680,

设计流程

首先建立节点

选择name,province,manger几个字段,将其他字段添加为属性节省空间。

其次建立关系

name字段为主键,关联其他字段

代码

# coding:utf-8
import os
import pandas as pd
import re
from py2neo import Graph, Node, Relationship
'''
MATCH (n)
OPTIONAL MATCH (n)-[r]-()
DELETE n,r
#删库demo
'''

def creat_node(file,graph):
    if not os.path.exists(file):
        print('{} 文件不存在'.format(file))
    df=pd.read_csv(file)
    df = df.fillna(value=str('不存在'))
    # for column in list(df.columns)[:]:
    #     a = df[column]
    #     for i in zip(a):
    #         #print(i)
    #         reg = "[^0-9A-Za-z\u4e00-\u9fa5]"
    #         i = re.sub(reg, '', str(i))
    #         #print(i)
    #         node = Node(column,name=i)
    #         if not graph.find_one(label=column, property_key='name', property_value=i):
    #             graph.create(node)
    #             print('创建了新 结点 : {}'.format(node))
    name,shortname = df.name,df.shortname
    province,city =  df.province,df.city
    manager,chairman = df.manager,df.chairman

    for name,shortname,province,city,manager,chairman in zip(df.name,df.shortname,
                                                             df.province,df.city,
                                                             df.manager,df.chairman):
        name_node=Node('名字',
                       name=name,
                       shortname=shortname,
                       )
        province_node = Node('所在地',
                       name=province,
                       city=city,
                       )
        manager_node = Node('总经理',name=manager)
        chairman_node = Node('法人代表', name=chairman)

        if not graph.find_one(label='名字',property_key='name',property_value=name):
            graph.create(name_node)
        if not graph.find_one(label='所在地',property_key='name',property_value=province):
            graph.create(province_node)
        if not graph.find_one(label='总经理',property_key='name',property_value=manager):
            graph.create(manager_node)
        if not graph.find_one(label='法人代表',property_key='name',property_value=chairman):
            graph.create(chairman_node)
        print('创建了新的结点:{}{}{}{}'.format(name_node, province_node, manager_node, chairman_node))
        name_node = graph.find_one(label='名字',property_key='name',property_value=name)
        province_node = graph.find_one(label='所在地',property_key='name',property_value=province)
        manager_node = graph.find_one(label='总经理',property_key='name',property_value=manager)
        chairman_node = graph.find_one(label='法人代表',property_key='name',property_value=chairman)

        relationship1 = Relationship(name_node, '地址', province_node)
        graph.create(relationship1)
        print('新建关系: {}'.format(relationship1))

        relationship2 = Relationship(name_node, '经理人', manager_node)
        graph.create(relationship2)
        print('新建关系: {}'.format(relationship2))

        relationship3 = Relationship(name_node, '法人', chairman_node)
        graph.create(relationship3)
        print('新建关系: {}'.format(relationship3))


if __name__=='__main__':
    #graph = Graph(password="")
    graph = Graph('http://:7474', username='neo4j', password='')
    chess_file = 'jijin.csv'
    creat_node(chess_file,graph)

效果

在这里插入图片描述

以下是neo4j补充,在知乎中看到的。

作者:光明与黑暗
链接:https://zhuanlan.zhihu.com/p/143175875
来源:知乎
著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。


# -*- coding: UTF-8 -*-
from neo4j import GraphDatabase
import pandas as pd
import tushare as ts
#添加股票实体
def add_stock(tx, ts_code, name,area,industry):
    tx.run("CREATE (n:Stock { ts_code: $ts_code, name: $name,area:$area,industry:$industry })", ts_code=ts_code, name=name,area=area,industry=industry)
#添加概念实体
def add_concept(tx, code, name):
    tx.run("CREATE (n:Concept { code: $code, name: $name})", code=code, name=name)
#添加概念与股票之间的关系
def add_concept_relation(tx,code, ts_code):
    tx.run("MATCH (c_temp:Concept),(s_temp:Stock) WHERE c_temp.code=$code and s_temp.ts_code=$ts_code CREATE (s_temp)-[r:BELONG]->(c_temp) return c_temp,s_temp",code=code, ts_code=ts_code)
#添加申万分类
def add_index_classify(tx,index_code,industry_name):
    tx.run("CREATE (n:Classify { index_code: $index_code, industry_name: $industry_name})", index_code=index_code, industry_name=industry_name)
#添加申万分类与股票之间的关系
def add_index_relation(tx,index_code, con_code):
    tx.run("MATCH (c_temp:Classify),(s_temp:Stock) WHERE c_temp.index_code=$index_code and s_temp.ts_code=$con_code CREATE (s_temp)-[r:BELONG]->(c_temp) return c_temp,s_temp",index_code=index_code, con_code=con_code)
if __name__ == "__main__": 
    driver = GraphDatabase.driver("neo4j://localhost:7687", auth=("neo4j", "你的neo4j 密码"))
    with driver.session() as session:
        ts.set_token('你的Tushare Token')
        pro=ts.pro_api()        
 
        df_stock_basic=pro.stock_basic(list_status='L')
        for i in range(0,len(df_stock_basic)): 
            ts_code=df_stock_basic.iloc[i]['ts_code']
            name=df_stock_basic.iloc[i]['name']
            area=df_stock_basic.iloc[i]['area']
            industry=df_stock_basic.iloc[i]['industry']
            session.write_transaction(add_stock, ts_code,name,area,industry)
 
        df_stock_concept=pro.concept()
        for i in range(0,len(df_stock_concept)): 
            code=df_stock_concept.iloc[i]['code']
            name=df_stock_concept.iloc[i]['name']
            session.write_transaction(add_concept, code,name)
            df_concept_detail=pro.concept_detail(id=code)
            for j in range(0,len(df_concept_detail)): 
                code=df_concept_detail.iloc[j]['id']
                ts_code=df_concept_detail.iloc[j]['ts_code']
                session.write_transaction(add_concept_relation, code,ts_code)
 
        df_index_classify=pro.index_classify()
        for i in range(0,len(df_index_classify)): 
            index_code=df_index_classify.iloc[i]['index_code']
            industry_name=df_index_classify.iloc[i]['industry_name']
            session.write_transaction(add_index_classify, index_code,industry_name)
            df_index_classify_member=pro.index_member(index_code=index_code)
            for j in range(0,len(df_index_classify_member)): 
                index_code=df_index_classify_member.iloc[j]['index_code']
                con_code=df_index_classify_member.iloc[j]['con_code']
                session.write_transaction(add_index_relation, index_code,con_code)
    driver.close()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值