数据来源
选择tushare的公募基金管理人接口,获取所需要的数据
import tushare as ts
import pandas as pd
import time
token = ''
ts.set_token(token)
pro = ts.pro_api()
df = pro.fund_company()
# df.to_csv("jijin.csv",encoding="utf_8_sig",index=None)
数据展示:
name,shortname,province,city,address,phone,office,website,chairman,manager,reg_capital,setup_date,end_date,employees,main_business,org_code,credit_code
北京广能投资基金管理有限公司,广能基金,北京,北京市,北京市朝阳区北四环中路27号院5号楼2712-2715A,,北京市朝阳区北四环中路27号院5号楼2712-2715A,www.gnfund.cn,刘锡潜,杨运成,10000.0,20111031,,10.0,,584419680,
设计流程
首先建立节点
选择name,province,manger几个字段,将其他字段添加为属性节省空间。
其次建立关系
name字段为主键,关联其他字段
代码
# coding:utf-8
import os
import pandas as pd
import re
from py2neo import Graph, Node, Relationship
'''
MATCH (n)
OPTIONAL MATCH (n)-[r]-()
DELETE n,r
#删库demo
'''
def creat_node(file,graph):
if not os.path.exists(file):
print('{} 文件不存在'.format(file))
df=pd.read_csv(file)
df = df.fillna(value=str('不存在'))
# for column in list(df.columns)[:]:
# a = df[column]
# for i in zip(a):
# #print(i)
# reg = "[^0-9A-Za-z\u4e00-\u9fa5]"
# i = re.sub(reg, '', str(i))
# #print(i)
# node = Node(column,name=i)
# if not graph.find_one(label=column, property_key='name', property_value=i):
# graph.create(node)
# print('创建了新 结点 : {}'.format(node))
name,shortname = df.name,df.shortname
province,city = df.province,df.city
manager,chairman = df.manager,df.chairman
for name,shortname,province,city,manager,chairman in zip(df.name,df.shortname,
df.province,df.city,
df.manager,df.chairman):
name_node=Node('名字',
name=name,
shortname=shortname,
)
province_node = Node('所在地',
name=province,
city=city,
)
manager_node = Node('总经理',name=manager)
chairman_node = Node('法人代表', name=chairman)
if not graph.find_one(label='名字',property_key='name',property_value=name):
graph.create(name_node)
if not graph.find_one(label='所在地',property_key='name',property_value=province):
graph.create(province_node)
if not graph.find_one(label='总经理',property_key='name',property_value=manager):
graph.create(manager_node)
if not graph.find_one(label='法人代表',property_key='name',property_value=chairman):
graph.create(chairman_node)
print('创建了新的结点:{}{}{}{}'.format(name_node, province_node, manager_node, chairman_node))
name_node = graph.find_one(label='名字',property_key='name',property_value=name)
province_node = graph.find_one(label='所在地',property_key='name',property_value=province)
manager_node = graph.find_one(label='总经理',property_key='name',property_value=manager)
chairman_node = graph.find_one(label='法人代表',property_key='name',property_value=chairman)
relationship1 = Relationship(name_node, '地址', province_node)
graph.create(relationship1)
print('新建关系: {}'.format(relationship1))
relationship2 = Relationship(name_node, '经理人', manager_node)
graph.create(relationship2)
print('新建关系: {}'.format(relationship2))
relationship3 = Relationship(name_node, '法人', chairman_node)
graph.create(relationship3)
print('新建关系: {}'.format(relationship3))
if __name__=='__main__':
#graph = Graph(password="")
graph = Graph('http://:7474', username='neo4j', password='')
chess_file = 'jijin.csv'
creat_node(chess_file,graph)
效果
以下是neo4j补充,在知乎中看到的。
作者:光明与黑暗
链接:https://zhuanlan.zhihu.com/p/143175875
来源:知乎
著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。
# -*- coding: UTF-8 -*-
from neo4j import GraphDatabase
import pandas as pd
import tushare as ts
#添加股票实体
def add_stock(tx, ts_code, name,area,industry):
tx.run("CREATE (n:Stock { ts_code: $ts_code, name: $name,area:$area,industry:$industry })", ts_code=ts_code, name=name,area=area,industry=industry)
#添加概念实体
def add_concept(tx, code, name):
tx.run("CREATE (n:Concept { code: $code, name: $name})", code=code, name=name)
#添加概念与股票之间的关系
def add_concept_relation(tx,code, ts_code):
tx.run("MATCH (c_temp:Concept),(s_temp:Stock) WHERE c_temp.code=$code and s_temp.ts_code=$ts_code CREATE (s_temp)-[r:BELONG]->(c_temp) return c_temp,s_temp",code=code, ts_code=ts_code)
#添加申万分类
def add_index_classify(tx,index_code,industry_name):
tx.run("CREATE (n:Classify { index_code: $index_code, industry_name: $industry_name})", index_code=index_code, industry_name=industry_name)
#添加申万分类与股票之间的关系
def add_index_relation(tx,index_code, con_code):
tx.run("MATCH (c_temp:Classify),(s_temp:Stock) WHERE c_temp.index_code=$index_code and s_temp.ts_code=$con_code CREATE (s_temp)-[r:BELONG]->(c_temp) return c_temp,s_temp",index_code=index_code, con_code=con_code)
if __name__ == "__main__":
driver = GraphDatabase.driver("neo4j://localhost:7687", auth=("neo4j", "你的neo4j 密码"))
with driver.session() as session:
ts.set_token('你的Tushare Token')
pro=ts.pro_api()
df_stock_basic=pro.stock_basic(list_status='L')
for i in range(0,len(df_stock_basic)):
ts_code=df_stock_basic.iloc[i]['ts_code']
name=df_stock_basic.iloc[i]['name']
area=df_stock_basic.iloc[i]['area']
industry=df_stock_basic.iloc[i]['industry']
session.write_transaction(add_stock, ts_code,name,area,industry)
df_stock_concept=pro.concept()
for i in range(0,len(df_stock_concept)):
code=df_stock_concept.iloc[i]['code']
name=df_stock_concept.iloc[i]['name']
session.write_transaction(add_concept, code,name)
df_concept_detail=pro.concept_detail(id=code)
for j in range(0,len(df_concept_detail)):
code=df_concept_detail.iloc[j]['id']
ts_code=df_concept_detail.iloc[j]['ts_code']
session.write_transaction(add_concept_relation, code,ts_code)
df_index_classify=pro.index_classify()
for i in range(0,len(df_index_classify)):
index_code=df_index_classify.iloc[i]['index_code']
industry_name=df_index_classify.iloc[i]['industry_name']
session.write_transaction(add_index_classify, index_code,industry_name)
df_index_classify_member=pro.index_member(index_code=index_code)
for j in range(0,len(df_index_classify_member)):
index_code=df_index_classify_member.iloc[j]['index_code']
con_code=df_index_classify_member.iloc[j]['con_code']
session.write_transaction(add_index_relation, index_code,con_code)
driver.close()