Python实现获取(且慢)基金夏普比率爬虫

# -*- coding:utf-8 -*-

import os
import sys
import time
import traceback
import json
import bs4
import pandas as pd
from selenium import webdriver

'''
https://www.jianshu.com/p/4b89c92ff9b4
https://cuiqingcai.com/2577.html
'''

print(sys.getdefaultencoding())

driver = webdriver.PhantomJS(executable_path="./bin/phantomjs.exe")


def func_load_webpage(code):
    if not code.isdigit() or len(code) != 6:
        return False
    base_url = 'https://qieman.com/funds/%s' % code

    dict_select = {
        'fundname': '',      # 基金名称
        'scale': '',        # 最新规模
        'withdrawal': '',   # 最大回撤
        'sharp': '',        # 夏普比率
        'volatility': '',   # 波动率
        'code': code,       # 基金代码
    }

    driver.get(base_url)

    content = driver.page_source.encode('utf-8')

    target = ''
    soup = bs4.BeautifulSoup(content, "html.parser")
    for co in soup.find_all(['span', 'h1']):
        if target != '':
            '''dict_select[target] = co.text.encode('utf-8')'''
            dict_select[target] = co.text
            if target == 'volatility':
                break
            target = ''

        if co.name == 'h1':
            dict_select['fundname'] = co.text

        if co.text == u'最新规模':
            target = 'scale'
        elif co.text == u'最大回撤':
            target = 'withdrawal'
        elif co.text == u'夏普比率':
            target = 'sharp'
        elif co.text == u'波动率':
            target = 'volatility'

    return json.dumps(dict_select, ensure_ascii=False)


if __name__ == "__main__":
    codelist = ['000216','000961', '001071', '001513', '001549', '001550', '001632', '006751', '003095',
                '040046', '110011', '110023', '150303', '161033', '161122', '161130', '161725', '161903','163406', '163415', '519674']
    #codelist = ['000216', '000313']

    data = []  # 用于存储每一行的Json数据
    df = pd.DataFrame()  # 最后转换得到的结果

    for item in codelist:
        # print(func_load_webpage(item))
        data.append(func_load_webpage(item))
        time.sleep(1)

    for line in data:
        data_item = json.loads(line)
        df1 = pd.DataFrame(data_item, index=[0])
        df = df.append(df1)

    # 在excel表格的第1列写入, 不写入index
    df.to_excel('Fund.xlsx', sheet_name='Data', startcol=0, index=False)

    #code = '110027'
    # if len(sys.argv) > 1:
    #    code = sys.argv[1]
    # print(func_load_webpage(code))

如图:

在这里插入图片描述

代码下载

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值