import requests
from bs4 import BeautifulSoup
import pandas as pd
import os
from fake_useragent import UserAgent
import json
ua = UserAgent( )
user_id = 00000
Cookie = "focus-certification-pop=-1; certification=1; qualified_investor=1; evaluation_result=4; guest_id=1570936362; regsms=1624171892000; http_tK_cache=dbbf8f84c525a7160aaef4d21219b1bed2ecf512; cur_ck_time=1624694453; ck_request_key=WBEyAxdP1u8mGhbq4tWjWHNx4YhYSex9RQY6ddGS0hA%3D; passport=680546%09u9155843295513%09VAcAUwpQA1ZRBlxaVFAGUQICBlACC19TC1YDAgADVAk%3D3ca5bd2f94; rz_rem_u_p=LaeVx12Q0nYPk82vmZjilhpMv68x5o1b7fR0IN4%2FD5k%3D%24kyMzWHKQYikw%2FlAcPA3UxGpNGHTm4lbhwn2a7DpPr78%3D; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22680546%22%2C%22first_id%22%3A%22680546%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_utm_source%22%3A%2220048%22%2C%22%24latest_utm_campaign%22%3A%22NRYY-media%22%2C%22_latest_utm_sign%22%3A%22zimeiti%22%7D%2C%22%24device_id%22%3A%22179e5ae4c4c135-0119d6070e1a0f-f7f1939-921600-179e5ae4c4d567%22%7D; Hm_lvt_c3f6328a1a952e922e996c667234cdae=1624171597,1624709592,1624894510,1624895262; Hm_lpvt_c3f6328a1a952e922e996c667234cdae=1624896177"
def get_company ( scale) :
headers = { "Host" : 'ppwapi.simuwang.com' , "Referer" : 'https://mobile.simuwang.com/' , "User-Agent" : ua. random, "Cookie" : Cookie}
main_url = "https://ppwapi.simuwang.com/ranking/company"
main_web = requests. post( main_url, headers= headers, data= { "page" : 1 , "condition" : "ret:4;scale:%s;" % scale, "sort_name" : "ret" , "sort_asc" : "desc" , 'USER_ID' : user_id} ) . text
soup = BeautifulSoup( main_web, features= "lxml" )
company_list = soup. find( 'p' )
company_list_json = json. loads( company_list. text) [ 'data' ]
company_info = company_list_json[ 'list' ]
page_num = company_list_json[ 'pages' ]
company_df = pd. DataFrame( company_info)
if page_num < 2 :
pass
else :
for page in range ( 1 , page_num) :
main_web = requests. post( main_url, headers= headers,
data= { "page" : page + 1 , "condition" : "ret:4;scale:%s;" % scale,
"sort_name" : "ret" , "sort_asc" : "desc" , 'USER_ID' : user_id} ) . text
soup = BeautifulSoup( main_web, features= "lxml" )
company_list = soup. find( 'p' )
company_list_json = json. loads( company_list. text) [ 'data' ]
company_info = company_list_json[ 'list' ]
company_df = pd. concat( [ company_df, pd. DataFrame( company_info) ] )
return company_df
def get_strategy ( id_list, name_list) :
headers = { "Host" : 'sppwapi.simuwang.com' , "Referer" : 'https://mobile.simuwang.com/' , "User-Agent" : ua. random, "Cookie" : Cookie}
main_url = "https://sppwapi.simuwang.com/sun/company/allFundList"
strategy_result = [ ]
for id in id_list:
main_web = requests. post( main_url, headers= headers, data= { "id" : id , 'type' : 'all' , 'USER_ID' : user_id} ) . text
soup = BeautifulSoup( main_web, features= "lxml" )
fund_list = soup. find( 'p' )
fund_list_json = json. loads( fund_list. contents[ 0 ] )
fund_info = fund_list_json[ 'data' ] [ 0 ] [ 'list' ]
fund_df = pd. DataFrame( fund_info) [ [ 'strategy' , 'substrategy' , 'fund_status' ] ]
fund_df = fund_df. loc[ fund_df[ 'fund_status' ] == 1 , [ 'strategy' , 'substrategy' ] ]
fund_strategy = fund_df. apply ( lambda x: "+" . join( x) , axis= 1 )
strategy_result. append( fund_strategy. value_counts( ) )
result = pd. concat( strategy_result, axis= 1 )
result. columns = name_list
return result
def get_nav ( id_list, fund_list, company_name, fund_name) :
headers = { "Host" : 'sppwapi.simuwang.com' , "Referer" : 'https://mobile.simuwang.com/' , "User-Agent" : ua. random, 'Cookie' : Cookie}
fund_result = [ ]
company_result = [ ]
for i, id in enumerate ( id_list) :
print ( id )
delegate_id = fund_list[ i]
main_url = "https://sppwapi.simuwang.com/sun/chart/fundNavTrend"
main_web = requests. post( main_url, headers= headers, data= { "fund_id" : delegate_id, 'compare_id' : 'IN00000008' , 'nav_flag' : 1 , 'period' : 0 , 'USER_ID' : user_id} ) . text
soup = BeautifulSoup( main_web, features= "lxml" )
data_list = soup. find( 'p' )
data_list_json = json. loads( data_list. text) [ 'data' ]
time = data_list_json[ 'categories' ]
ret = data_list_json[ 'data' ]
ret_fund = pd. DataFrame( index= time, data = { '沪深300' : ret[ 'IN00000008' ] , fund_name[ i] : ret[ delegate_id] [ 'ret' ] , 'compare' : ret[ 'compare' ] } )
main_url = "https://sppwapi.simuwang.com/sun/chart/companyNavTrend"
main_web = requests. post( main_url, headers= headers, data= { "company_id" : id , 'compare_id' : 'IN00000008' , 'nav_flag' : 1 , 'period' : 0 , 'USER_ID' : user_id} ) . text
soup = BeautifulSoup( main_web, features= "lxml" )
data_list = soup. find( 'p' )
data_list_json = json. loads( data_list. text) [ 'data' ]
time = data_list_json[ 'categories' ]
ret = data_list_json[ 'data' ]
ret_company = pd. DataFrame( index= time, data= ret, columns= [ '沪深300' , company_name[ i] , 'compare' ] )
fund_result. append( ret_fund)
company_result. append( ret_company)
with pd. ExcelWriter( 'HF_fund.xlsx' ) as r:
for i in range ( len ( id_list) ) :
fund_result[ i] . to_excel( r, sheet_name= company_name[ i] )
with pd. ExcelWriter( 'HF_company.xlsx' ) as r:
for i in range ( len ( id_list) ) :
company_result[ i] . to_excel( r, sheet_name= company_name[ i] )
def get_quant_fund ( scale) :
scale_dict = { '10-20' : 3 , "20-50" : 4 , '50-100' : 5 , '100+' : 6 }
company_result = get_company( scale_dict[ scale] )
strategy_result = get_strategy( company_result[ 'company_id' ] , company_result[ 'company_short_name' ] )
strategy_result_percent = strategy_result / strategy_result. sum ( axis= 0 )
quant_fund = strategy_result_percent. transpose( ) [ [ "股票策略+量化多头" , "相对价值+股票市场中性" , "相对价值+套利" ,
"相对价值+相对价值复合" , "管理期货+量化趋势" , "管理期货+量化套利" , "管理期货+管理期货复合" ,
"复合策略+量化多策略" ] ] . sum ( axis= 1 ) >= 0.5
quant_fund = quant_fund[ quant_fund == True ]
quant_fund_info = company_result. set_index( 'company_short_name' ) . loc[ quant_fund. index, : ]
quant_fund_strategy = strategy_result[ quant_fund. index]
return quant_fund_info, quant_fund_strategy
def get_funds ( id_list) :
headers = { "Host" : 'sppwapi.simuwang.com' , "Referer" : 'https://mobile.simuwang.com/' , "User-Agent" : ua. random, "Cookie" : Cookie}
main_url = "https://sppwapi.simuwang.com/sun/company/allFundList"
strategy_result = [ ]
for id in id_list:
main_web = requests. post( main_url, headers= headers, data= { "id" : id , 'type' : 'all' , 'USER_ID' : user_id} ) . text
soup = BeautifulSoup( main_web, features= "lxml" )
fund_list = soup. find( 'p' )
fund_list_json = json. loads( fund_list. contents[ 0 ] )
fund_info = fund_list_json[ 'data' ] [ 0 ] [ 'list' ]
fund_info = pd. DataFrame( fund_info)
fund_info[ 'company_id' ] = id
strategy_result. append( fund_info)
return pd. concat( strategy_result)
id_ideal = [ 'CO00000HWR' ,
'CO00000HZI' ,
'CO00000BB6' ,
'CO000001N7' ,
'CO000001CM' ,
'CO000001G0' ,
'CO000001IQ' ,
'CO000000S4' ,
'CO000003KY' ,
'CO00000FNC' ,
'CO000001HJ' ,
'CO00000DOE' ,
'CO00000VQC' ,
'CO0000080D' ,
'CO000003HY' ,
'CO00000F35' ,
'CO00000AMA' ,
'CO0000014J' ,
'CO00000E66' ]
name_ideal = [ '思勰投资' ,
'宁波幻方量化' ,
'幻方量化' ,
'灵均投资' ,
'进化论资产' ,
'明汯投资' ,
'九坤投资' ,
'金锝资产' ,
'因诺资产' ,
'白鹭资管' ,
'千象资产' ,
'锐天投资' ,
'珠海致诚卓远' ,
'喜岳投资' ,
'宽德投资' ,
'华澄投资' ,
'念空数据科技' ,
'富善投资' ,
'平方和投资' ]
fund_info_list = [ pd. read_excel( "HF_statistics.xlsx" , sheet_name= 'fund_info %s' % scale) for scale in [ '10-20' , '20-50' , '50-100' , '100+' ] ]
fund_info = pd. concat( fund_info_list)
fund_info = fund_info. set_index( "company_short_name" )
fund_id = fund_info. loc[ name_ideal, [ 'fund_id' ] ]
fund_id. loc[ '念空数据科技' ] = "HF00005L7N"
get_nav( id_ideal, fund_id. values. flatten( ) , name_ideal, fund_info. loc[ name_ideal, [ 'fund_short_name' ] ] . values. flatten( ) )