基金收益排名

基金收益排名

import xlwings as xw
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import json
import xlwt
import xlwings as xw
from selenium import webdriver
import time
import pandas as pd
import csv
import re
from selenium.webdriver import Chrome, ChromeOptions, ActionChains
import multiprocessing

def web(url):
    driver.get(url)
    driver.refresh()
    time.sleep(1)
    #获取返回值
    source = driver.page_source  # 获取网页内容
    html = BeautifulSoup(source, 'html.parser')  # 获取网页内容
    # 写入网页
    with open('FundRank.html', 'w', encoding='utf-8-sig') as f:
        f.write(source)
    print('写入网页')
    return html
def data(html):
    df=pd.DataFrame()
    fundName=[]
    html.list=html.find_all('div',attrs={'class','ti-left'})[1]
    # print(html.list)
    for i,item in enumerate(html.list):
        try:
            if item.text!='\n':
                fundName.append(item.text)
                # print(item.text)
        except:
            print('基本信息错误')
    df['基金名称'] = fundName[0],
    df['基金代码'] = fundName[1],
    df['基金类型'] = fundName[2],
    df['基金风险'] = fundName[3],
    print(df)
    return df

def data_b(html):#基金管理
    df = pd.DataFrame()
    fundManage = []
    html.list = html.find_all('div', attrs={'class', 'all'})
    # print(html.list)
    for i, item in enumerate(html.list):
        item.list=item.find_all('p')
        for i, item in enumerate(item.list):
            try:
                item_a=item.text.replace('\n', '').replace('\t', '').replace(' ', '')
                print(item_a)
                fundManage.append(item_a)
                # print(item_a)
            except:
                print('管理信息错误')
    # print(fundManage[0])
    df['基金经理'] = fundManage[0],
    df['从业年均回报'] = fundManage[3],
    df['从业时间'] = fundManage[2].replace('从业时间:', ''),
    df['最大盈利'] = fundManage[5],
    df['最大回撤'] = fundManage[7],
    print(df)
    return df

def data_c(html):#基金涨幅
    df=pd.DataFrame()
    fundIncrease=[]
    html.list=html.find_all('tbody',attrs={'id':'tableCtn'})[0]
    # print(html.list)
    for i,item in enumerate(html.list):
        item.list=item.find_all('td')
        for i, item in enumerate(item.list):
            try:
                fundIncrease.append(item.text)
                # print(item.text)
            except:
                print('基本信息错误')
    df['近1周'] = fundIncrease[1],
    df['近1月'] = fundIncrease[5],
    df['近3月'] = fundIncrease[9],
    df['近6月'] = fundIncrease[13],
    df['近1年'] = fundIncrease[17],
    df['年度排名'] = fundIncrease[19],
    df['近2年'] = fundIncrease[21],
    df['近3年'] = fundIncrease[25],
    df['近5年'] = fundIncrease[29],
    df['成立以来'] = fundIncrease[33],
    print(df)
    return df

def data_d(html):#获取净值、规模信息
    df = pd.DataFrame()
    fundShare_a = []
    fundShare_b = []
    html.list = html.find_all('div', attrs={'class', 't-right'})
    # print(html.list)
    #获取基金净值信息
    for i, item in enumerate(html.list):
        item.list=item.find_all('div', attrs={'class', 'data'})
        for i, item in enumerate(item.list):
            item.list=item.find_all('b')
            for i, item in enumerate(item.list):
                try:
                    # print(item)
                    item_a=item.text.replace('\n', '').replace('\t', '').replace(' ', '')
                    # print(item_a)
                    fundShare_a.append(item_a)
                    # print(item_a)
                except:
                    print('净值信息错误')
    #获取基金公司信息
    for i, item in enumerate(html.list):
        item.list = item.find('ul', attrs={'class': 'company'})
        # print(item.list)
        for i, item in enumerate(item.list):
            # print(item,i,sep=',')
            try:
                item_a = item.text.replace('\n', '').replace('\t', '').replace(' ', '')
                # print(item_a)
                fundShare_b.append(item_a)
            except:
                print('公司信息错误')


    # print(fundManage[0])
    df['当前净值'] = fundShare_a[0],
    df['基金公司'] = fundShare_b[4].replace('管理人:', ''),
    df['基金规模'] = fundShare_b[0].replace('基金规模:', ''),
    df['成立时间'] = fundShare_b[1].replace('成立时间:', ''),
    print(df)
    return df

def data_e(html):#获取分红
    df = pd.DataFrame()
    fundshare = []
    html.list = html.find_all('div', attrs={'class', 'fundshare fund-module'})
    # print(html.list)
    for i, item in enumerate(html.list):
        item.list=html.find_all('div', attrs={'class', 's-list'})[2]#获取第三组数据
        for i, item in enumerate(item.list):
            # print(item,i,sep=',')
            item.list = html.find_all('ul', attrs={'class', 'data'})[31]# 获取第三组数据
            for i, item in enumerate(item.list):
                # print(item, i, sep=',')
                # print(item.text, i, sep=',')
                try:
                    item_a = item.text.replace('\n', '').replace('\t', '').replace(' ', '')
                    if item_a != '':
                        # print(item_a, i, sep=',')
                        fundshare.append(item_a)
                except:
                    print('分红信息错误')
    # print(fundshare[3])
    df['分红金额'] = fundshare[3],
    df['分红时间'] = fundshare[4],
    print(df)
    return df

def data_f(html):#获取持仓股票、持仓结构
    df = pd.DataFrame()
    fundband= []
    fundstructure=[]
    #获取持仓股票

    html.list = html.find_all('div', attrs={'class': 's-list'})[0].find_all('ul', attrs={'class': 'data'})
    # print(html.list)
    for i, item in enumerate(html.list):
        # print(item,i,sep=',')
        item.list=item.find_all('a')
        # print(item.list)
        for i, item in enumerate(item.list):
            try:
                item_a = item.text.replace('\n', '').replace('\t', '').replace(' ', '')
                # print(item_a, i, sep=',')
                fundband.append(item_a)
            except:
                print('持仓信息错误')
    # print(len(fundband))
    fundband_a = ''
    for i in range(0,len(fundband)):
        fundband_a=fundband_a+fundband[i]+'/'
        # print(fundband_a)

    #获取持仓结构
    html.list = html.find_all('div', attrs={'class': 'highcharts-container'})
    # print(html.list)
    for i, item in enumerate(html.list):
        # print(item,i,sep=',')
        item.list = html.find_all('div', attrs={'class': 'highcharts-legend'})
        # print(item.list)
        for i, item in enumerate(item.list):
            try:
                item_a = item.text.replace('\n', '').replace('\t', '').replace(' ', '')
                print(item_a, i, sep=',')
                fundstructure.append(item_a)
            except:
                print('持仓结构错误')

    df['持仓股票'] = fundband_a,
    df['持仓结构']=fundstructure[0]
    print(df)
    return df
def data_g(html):#获取个人持仓占比
    df = pd.DataFrame()
    fundperson = []
    html.list = html.find_all('div', attrs={'class', 'owner'})
    # print(html.list)
    for i, item in enumerate(html.list):
        item.list=html.find_all('p', attrs={'class', 'o-summary'})#获取第三组数据
        for i, item in enumerate(item.list):
            # print(item,i,sep=',')
            try:
                item_a = item.text.replace('\n', '').replace('\t', '').replace(' ', '')
                if item_a != '':
                    # print(item_a, i, sep=',')
                    fundperson.append(item_a)
            except:
                print('个人持仓信息错误')

    df['个人持仓占比'] = fundperson[0],
    print(df)
    return df

df = pd.DataFrame()
if __name__=="__main__":

    #打开excel
    app=xw.App(visible=True,add_book=False)
    wb=app.books.open('FundRank.xlsx')
    #连接excel
    sh=wb.sheets['FundRank']
    rng=[i for i in sh.range('E:E').value if i!=None]
    j=sh.range('a1').expand('table').rows.count
    app.display_alerts=False
    app.screen_updating=False
    # 打开网页
    opt = ChromeOptions()
    # # 禁止弹窗
    # prefs = {
    #     'profile.default_content_setting_values':
    #         {
    #             'notifications': 2
    #         }
    # }
    # # 禁止弹窗加入
    # opt.add_experimental_option('prefs', prefs)
    # opt.add_experimental_option("excludeSwitches", ['enable-automation'])
    opt.headless = False
    driver = Chrome(options=opt)
    # driver.set_window_size(400,900)

    df_a=[]
    df_b=[]
    df_c = []
    df_d = []
    df_e = []
    df_f = []
    df_g = []#个人持仓占比
    ''''''
    for i in range(len(rng)-1):
        try:
            #提取数据并查询
            time1=time.time()#开始计时
            Fundname=str(rng[i+1]).replace('.','')[0:6]#'003298'
            print(Fundname)#基金代码
            url='http://fund.10jqka.com.cn/'+Fundname+'/'#获取基本信息
            url_increase='https://fund.10jqka.com.cn/public/newfund/syrank.html#'+Fundname#获取涨幅
            url_share='http://fund.10jqka.com.cn/'+Fundname+'/historynet.html#dividends'#获取分红、规模
            'http://fund.10jqka.com.cn/006039/portfolioindex.html'
            url_band='http://fund.10jqka.com.cn/'+Fundname+'/portfolioindex.html'##获取持仓股票
            'http://fund.10jqka.com.cn/002965/holder.html#holder'
            url_person='http://fund.10jqka.com.cn/'+Fundname+'/holder.html#holder'#获取个人持仓占比

            #获取基本信息
            print(url)
            html=web(url)#获取网页信息
            df_a = data(html)  #获取基金基本信息
            time.sleep(0.5)
            df_b = data_b(html)  #获取基金管理信息
            time.sleep(1)
            ''''''
            # #获取涨幅信息
            print(url_increase)
            html_increase = web(url_increase)  #获取网页信息
            df_c=data_c(html_increase)#获取基金涨幅
            time.sleep(1)
            #  print(html)
            
            # 获取净值、规模信息分红、
            print(url_share)
            html_share= web(url_share)  # 获取网页信息
            df_d = data_d(html_share)  # 获取基金净值、规模信
            time.sleep(0.5)
            df_e = data_e(html_share)  # 获取分红
            time.sleep(1)
            #  print(html)
            
            # 获取持仓股票、
            print(url_band)
            html_band = web(url_band)  # 获取网页信息
            df_f = data_f(html_band)  # 获取基金持仓股票
            time.sleep(1)
           
            # 获取个人持仓占比、
            print(url_person)
            html_person = web(url_person)  # 获取网页信息
            df_g= data_g(html_person)  # 获取个人持仓占比
            time.sleep(1)
            ''''''
            time2=time.time()
            print('总耗时{}'.format(time2-time1))
        except:
            print(str(i),'错误')
        ''''''
        try:
            df1 = pd.concat([df_a, df_b], axis=1)
            # df = pd.concat([df, df1], axis=0)#测试
            ''''''
            df2 = pd.concat([df1, df_c], axis=1)
            df3 = pd.concat([df2, df_d], axis=1)
            df4 = pd.concat([df3, df_e], axis=1)
            df5 = pd.concat([df4, df_f], axis=1)
            df6 = pd.concat([df5, df_g], axis=1)#并入个人持仓占比
            df = pd.concat([df, df6], axis=0)
            ''''''
            print(df)
            df.to_csv('FundRank.csv', mode='a+', header=None, index=None, encoding='utf-8-sig', sep=',')  # 提前写入vsv文件
            # 写入json文件
            df.to_json('FundRank.json', orient='records', indent=1, force_ascii=False)
            print('写入jason正常')
        except:
            print('写入jason错误')
        ''''''
    ''''''
    ''''''
    with open('FundRank.json','r',encoding='utf-8')as f:
        data=json.load(f)
        # print(data[0]['基金名称'])
    FundN=['基金名称', '基金代码','基金类型','基金风险','近1周','近1月','近3月','近6月','近1年','近2年','近3年','近5年','成立以来','年度排名','成立时间',
           '基金规模','持有股票']
    for i in range(len(data)):
        try:
            ''''''
            sh.cells[i+1,0].value=i+1
            sh.cells[i+1,1].value=data[i]['基金类型']
            sh.cells[i + 1, 3].value = data[i]['基金名称']
            sh.cells[i + 1, 4].value = data[i]['基金代码']
            sh.cells[i + 1, 6].value = data[i]['基金风险']
            sh.cells[i + 1, 7].value = data[i]['基金公司']
            sh.cells[i + 1, 12].value = data[i]['分红时间']
            sh.cells[i + 1, 13].value = data[i]['分红金额']
            sh.cells[i + 1, 14].value = data[i]['当前净值']
            sh.cells[i + 1, 15].value = data[i]['持仓结构']

            sh.cells[i + 1, 8].value = data[i]['基金经理']
            ''''''
            sh.cells[i + 1, 9].value = data[i]['从业年均回报']
            ''''''
            sh.cells[i + 1, 10].value = data[i]['从业时间']
            sh.cells[i + 1, 11].value = data[i]['个人持仓占比']

            sh.cells[i + 1, 16].value = data[i]['最大盈利']
            sh.cells[i + 1, 17].value = data[i]['最大回撤']
            sh.cells[i + 1, 18].value = data[i]['最大回撤']

            sh.cells[i + 1, 18].value = data[i][FundN[4]]
            sh.cells[i + 1, 19].value = data[i][FundN[5]]
            sh.cells[i + 1, 20].value = data[i][FundN[6]]
            sh.cells[i + 1, 21].value = data[i][FundN[7]]
            sh.cells[i + 1, 22].value = data[i][FundN[8]]
            sh.cells[i + 1, 23].value = data[i][FundN[9]]
            sh.cells[i + 1, 24].value = data[i][FundN[10]]
            sh.cells[i + 1, 25].value = data[i][FundN[11]]
            sh.cells[i + 1, 26].value = data[i][FundN[12]]
            sh.cells[i + 1, 27].value = data[i][FundN[13]]

            sh.cells[i + 1, 28].value = data[i]['成立时间']
            sh.cells[i + 1, 29].value = data[i]['基金规模']
            sh.cells[i + 1, 30].value = data[i]['持仓股票']
            ''''''
            print(str(i), 'excel写入正常')
        except:
            # continue
            print(str(i), 'excel写入错误')
    ''''''
    try:
        wb.save('FundRank.xlsx')
        wb.close()
        app.quit()
        ''''''''
        # 获得当前窗口句柄
        sreach_windows = driver.current_window_handle
        driver.quit()
        # 获得当前所有打开的窗口的句柄
        all_handles = driver.window_handles
        for handle in all_handles:
            driver.switch_to.window(handle)
            driver.close()
            time.sleep(1)
        driver.close()
        driver.quit()
        ''''''
    except:
        print('有错误代码')

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

品尚公益团队

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值