基金收益排名
import xlwings as xw
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import json
import xlwt
import xlwings as xw
from selenium import webdriver
import time
import pandas as pd
import csv
import re
from selenium.webdriver import Chrome, ChromeOptions, ActionChains
import multiprocessing
def web(url):
driver.get(url)
driver.refresh()
time.sleep(1)
#获取返回值
source = driver.page_source # 获取网页内容
html = BeautifulSoup(source, 'html.parser') # 获取网页内容
# 写入网页
with open('FundRank.html', 'w', encoding='utf-8-sig') as f:
f.write(source)
print('写入网页')
return html
def data(html):
df=pd.DataFrame()
fundName=[]
html.list=html.find_all('div',attrs={'class','ti-left'})[1]
# print(html.list)
for i,item in enumerate(html.list):
try:
if item.text!='\n':
fundName.append(item.text)
# print(item.text)
except:
print('基本信息错误')
df['基金名称'] = fundName[0],
df['基金代码'] = fundName[1],
df['基金类型'] = fundName[2],
df['基金风险'] = fundName[3],
print(df)
return df
def data_b(html):#基金管理
df = pd.DataFrame()
fundManage = []
html.list = html.find_all('div', attrs={'class', 'all'})
# print(html.list)
for i, item in enumerate(html.list):
item.list=item.find_all('p')
for i, item in enumerate(item.list):
try:
item_a=item.text.replace('\n', '').replace('\t', '').replace(' ', '')
print(item_a)
fundManage.append(item_a)
# print(item_a)
except:
print('管理信息错误')
# print(fundManage[0])
df['基金经理'] = fundManage[0],
df['从业年均回报'] = fundManage[3],
df['从业时间'] = fundManage[2].replace('从业时间:', ''),
df['最大盈利'] = fundManage[5],
df['最大回撤'] = fundManage[7],
print(df)
return df
def data_c(html):#基金涨幅
df=pd.DataFrame()
fundIncrease=[]
html.list=html.find_all('tbody',attrs={'id':'tableCtn'})[0]
# print(html.list)
for i,item in enumerate(html.list):
item.list=item.find_all('td')
for i, item in enumerate(item.list):
try:
fundIncrease.append(item.text)
# print(item.text)
except:
print('基本信息错误')
df['近1周'] = fundIncrease[1],
df['近1月'] = fundIncrease[5],
df['近3月'] = fundIncrease[9],
df['近6月'] = fundIncrease[13],
df['近1年'] = fundIncrease[17],
df['年度排名'] = fundIncrease[19],
df['近2年'] = fundIncrease[21],
df['近3年'] = fundIncrease[25],
df['近5年'] = fundIncrease[29],
df['成立以来'] = fundIncrease[33],
print(df)
return df
def data_d(html):#获取净值、规模信息
df = pd.DataFrame()
fundShare_a = []
fundShare_b = []
html.list = html.find_all('div', attrs={'class', 't-right'})
# print(html.list)
#获取基金净值信息
for i, item in enumerate(html.list):
item.list=item.find_all('div', attrs={'class', 'data'})
for i, item in enumerate(item.list):
item.list=item.find_all('b')
for i, item in enumerate(item.list):
try:
# print(item)
item_a=item.text.replace('\n', '').replace('\t', '').replace(' ', '')
# print(item_a)
fundShare_a.append(item_a)
# print(item_a)
except:
print('净值信息错误')
#获取基金公司信息
for i, item in enumerate(html.list):
item.list = item.find('ul', attrs={'class': 'company'})
# print(item.list)
for i, item in enumerate(item.list):
# print(item,i,sep=',')
try:
item_a = item.text.replace('\n', '').replace('\t', '').replace(' ', '')
# print(item_a)
fundShare_b.append(item_a)
except:
print('公司信息错误')
# print(fundManage[0])
df['当前净值'] = fundShare_a[0],
df['基金公司'] = fundShare_b[4].replace('管理人:', ''),
df['基金规模'] = fundShare_b[0].replace('基金规模:', ''),
df['成立时间'] = fundShare_b[1].replace('成立时间:', ''),
print(df)
return df
def data_e(html):#获取分红
df = pd.DataFrame()
fundshare = []
html.list = html.find_all('div', attrs={'class', 'fundshare fund-module'})
# print(html.list)
for i, item in enumerate(html.list):
item.list=html.find_all('div', attrs={'class', 's-list'})[2]#获取第三组数据
for i, item in enumerate(item.list):
# print(item,i,sep=',')
item.list = html.find_all('ul', attrs={'class', 'data'})[31]# 获取第三组数据
for i, item in enumerate(item.list):
# print(item, i, sep=',')
# print(item.text, i, sep=',')
try:
item_a = item.text.replace('\n', '').replace('\t', '').replace(' ', '')
if item_a != '':
# print(item_a, i, sep=',')
fundshare.append(item_a)
except:
print('分红信息错误')
# print(fundshare[3])
df['分红金额'] = fundshare[3],
df['分红时间'] = fundshare[4],
print(df)
return df
def data_f(html):#获取持仓股票、持仓结构
df = pd.DataFrame()
fundband= []
fundstructure=[]
#获取持仓股票
html.list = html.find_all('div', attrs={'class': 's-list'})[0].find_all('ul', attrs={'class': 'data'})
# print(html.list)
for i, item in enumerate(html.list):
# print(item,i,sep=',')
item.list=item.find_all('a')
# print(item.list)
for i, item in enumerate(item.list):
try:
item_a = item.text.replace('\n', '').replace('\t', '').replace(' ', '')
# print(item_a, i, sep=',')
fundband.append(item_a)
except:
print('持仓信息错误')
# print(len(fundband))
fundband_a = ''
for i in range(0,len(fundband)):
fundband_a=fundband_a+fundband[i]+'/'
# print(fundband_a)
#获取持仓结构
html.list = html.find_all('div', attrs={'class': 'highcharts-container'})
# print(html.list)
for i, item in enumerate(html.list):
# print(item,i,sep=',')
item.list = html.find_all('div', attrs={'class': 'highcharts-legend'})
# print(item.list)
for i, item in enumerate(item.list):
try:
item_a = item.text.replace('\n', '').replace('\t', '').replace(' ', '')
print(item_a, i, sep=',')
fundstructure.append(item_a)
except:
print('持仓结构错误')
df['持仓股票'] = fundband_a,
df['持仓结构']=fundstructure[0]
print(df)
return df
def data_g(html):#获取个人持仓占比
df = pd.DataFrame()
fundperson = []
html.list = html.find_all('div', attrs={'class', 'owner'})
# print(html.list)
for i, item in enumerate(html.list):
item.list=html.find_all('p', attrs={'class', 'o-summary'})#获取第三组数据
for i, item in enumerate(item.list):
# print(item,i,sep=',')
try:
item_a = item.text.replace('\n', '').replace('\t', '').replace(' ', '')
if item_a != '':
# print(item_a, i, sep=',')
fundperson.append(item_a)
except:
print('个人持仓信息错误')
df['个人持仓占比'] = fundperson[0],
print(df)
return df
df = pd.DataFrame()
if __name__=="__main__":
#打开excel
app=xw.App(visible=True,add_book=False)
wb=app.books.open('FundRank.xlsx')
#连接excel
sh=wb.sheets['FundRank']
rng=[i for i in sh.range('E:E').value if i!=None]
j=sh.range('a1').expand('table').rows.count
app.display_alerts=False
app.screen_updating=False
# 打开网页
opt = ChromeOptions()
# # 禁止弹窗
# prefs = {
# 'profile.default_content_setting_values':
# {
# 'notifications': 2
# }
# }
# # 禁止弹窗加入
# opt.add_experimental_option('prefs', prefs)
# opt.add_experimental_option("excludeSwitches", ['enable-automation'])
opt.headless = False
driver = Chrome(options=opt)
# driver.set_window_size(400,900)
df_a=[]
df_b=[]
df_c = []
df_d = []
df_e = []
df_f = []
df_g = []#个人持仓占比
''''''
for i in range(len(rng)-1):
try:
#提取数据并查询
time1=time.time()#开始计时
Fundname=str(rng[i+1]).replace('.','')[0:6]#'003298'
print(Fundname)#基金代码
url='http://fund.10jqka.com.cn/'+Fundname+'/'#获取基本信息
url_increase='https://fund.10jqka.com.cn/public/newfund/syrank.html#'+Fundname#获取涨幅
url_share='http://fund.10jqka.com.cn/'+Fundname+'/historynet.html#dividends'#获取分红、规模
'http://fund.10jqka.com.cn/006039/portfolioindex.html'
url_band='http://fund.10jqka.com.cn/'+Fundname+'/portfolioindex.html'##获取持仓股票
'http://fund.10jqka.com.cn/002965/holder.html#holder'
url_person='http://fund.10jqka.com.cn/'+Fundname+'/holder.html#holder'#获取个人持仓占比
#获取基本信息
print(url)
html=web(url)#获取网页信息
df_a = data(html) #获取基金基本信息
time.sleep(0.5)
df_b = data_b(html) #获取基金管理信息
time.sleep(1)
''''''
# #获取涨幅信息
print(url_increase)
html_increase = web(url_increase) #获取网页信息
df_c=data_c(html_increase)#获取基金涨幅
time.sleep(1)
# print(html)
# 获取净值、规模信息分红、
print(url_share)
html_share= web(url_share) # 获取网页信息
df_d = data_d(html_share) # 获取基金净值、规模信
time.sleep(0.5)
df_e = data_e(html_share) # 获取分红
time.sleep(1)
# print(html)
# 获取持仓股票、
print(url_band)
html_band = web(url_band) # 获取网页信息
df_f = data_f(html_band) # 获取基金持仓股票
time.sleep(1)
# 获取个人持仓占比、
print(url_person)
html_person = web(url_person) # 获取网页信息
df_g= data_g(html_person) # 获取个人持仓占比
time.sleep(1)
''''''
time2=time.time()
print('总耗时{}'.format(time2-time1))
except:
print(str(i),'错误')
''''''
try:
df1 = pd.concat([df_a, df_b], axis=1)
# df = pd.concat([df, df1], axis=0)#测试
''''''
df2 = pd.concat([df1, df_c], axis=1)
df3 = pd.concat([df2, df_d], axis=1)
df4 = pd.concat([df3, df_e], axis=1)
df5 = pd.concat([df4, df_f], axis=1)
df6 = pd.concat([df5, df_g], axis=1)#并入个人持仓占比
df = pd.concat([df, df6], axis=0)
''''''
print(df)
df.to_csv('FundRank.csv', mode='a+', header=None, index=None, encoding='utf-8-sig', sep=',') # 提前写入vsv文件
# 写入json文件
df.to_json('FundRank.json', orient='records', indent=1, force_ascii=False)
print('写入jason正常')
except:
print('写入jason错误')
''''''
''''''
''''''
with open('FundRank.json','r',encoding='utf-8')as f:
data=json.load(f)
# print(data[0]['基金名称'])
FundN=['基金名称', '基金代码','基金类型','基金风险','近1周','近1月','近3月','近6月','近1年','近2年','近3年','近5年','成立以来','年度排名','成立时间',
'基金规模','持有股票']
for i in range(len(data)):
try:
''''''
sh.cells[i+1,0].value=i+1
sh.cells[i+1,1].value=data[i]['基金类型']
sh.cells[i + 1, 3].value = data[i]['基金名称']
sh.cells[i + 1, 4].value = data[i]['基金代码']
sh.cells[i + 1, 6].value = data[i]['基金风险']
sh.cells[i + 1, 7].value = data[i]['基金公司']
sh.cells[i + 1, 12].value = data[i]['分红时间']
sh.cells[i + 1, 13].value = data[i]['分红金额']
sh.cells[i + 1, 14].value = data[i]['当前净值']
sh.cells[i + 1, 15].value = data[i]['持仓结构']
sh.cells[i + 1, 8].value = data[i]['基金经理']
''''''
sh.cells[i + 1, 9].value = data[i]['从业年均回报']
''''''
sh.cells[i + 1, 10].value = data[i]['从业时间']
sh.cells[i + 1, 11].value = data[i]['个人持仓占比']
sh.cells[i + 1, 16].value = data[i]['最大盈利']
sh.cells[i + 1, 17].value = data[i]['最大回撤']
sh.cells[i + 1, 18].value = data[i]['最大回撤']
sh.cells[i + 1, 18].value = data[i][FundN[4]]
sh.cells[i + 1, 19].value = data[i][FundN[5]]
sh.cells[i + 1, 20].value = data[i][FundN[6]]
sh.cells[i + 1, 21].value = data[i][FundN[7]]
sh.cells[i + 1, 22].value = data[i][FundN[8]]
sh.cells[i + 1, 23].value = data[i][FundN[9]]
sh.cells[i + 1, 24].value = data[i][FundN[10]]
sh.cells[i + 1, 25].value = data[i][FundN[11]]
sh.cells[i + 1, 26].value = data[i][FundN[12]]
sh.cells[i + 1, 27].value = data[i][FundN[13]]
sh.cells[i + 1, 28].value = data[i]['成立时间']
sh.cells[i + 1, 29].value = data[i]['基金规模']
sh.cells[i + 1, 30].value = data[i]['持仓股票']
''''''
print(str(i), 'excel写入正常')
except:
# continue
print(str(i), 'excel写入错误')
''''''
try:
wb.save('FundRank.xlsx')
wb.close()
app.quit()
''''''''
# 获得当前窗口句柄
sreach_windows = driver.current_window_handle
driver.quit()
# 获得当前所有打开的窗口的句柄
all_handles = driver.window_handles
for handle in all_handles:
driver.switch_to.window(handle)
driver.close()
time.sleep(1)
driver.close()
driver.quit()
''''''
except:
print('有错误代码')