Python获取基金收益计算

用python来获取基金历史收益率来估算出那些基金可能比较好,自己的一些愚见,纯粹视为了学习python,里面的内容不能作为任何参考,刚开始学习python,写的不好希望各位大牛能帮忙指出不足,谢谢。

1、获取获取所有基金的代码

import requests
import re


def getAllFundCode():
    url = "http://fund.eastmoney.com/js/fundcode_search.js"
    all_text = requests.get(url).text
    # 取出全部内容
    findall = re.findall(r'"\d+\.?\d*"', all_text)
    result = []
    for x in findall:
        # 替换双引号
        result.append(re.sub(r'"', "", x))
    return result

2、获取基金信息

import requests
import re
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import logging


# 获取html
def getHtml(code, start_date, end_date, page=1, per=20):
    url = "http://fund.eastmoney.com/f10/F10DataApi.aspx?type=lsjz&code={0}&page={1}&sdate={2}&edate={3}&per={4}".format(
        code, page, start_date, end_date, per)
    rsp = requests.get(url)
    return rsp.text


# 获取基金数据
def getFund(name, code, start_date, end_date, page=1, per=20):
    log_format = "%(asctime)s - %(message)s"
    logging.basicConfig(filename="exec.log", format=log_format, level=logging.INFO)
    try:
        html = getHtml(code, start_date, end_date, per)
        soup = BeautifulSoup(html, "html.parser")
        # 获取总业
        pattern = re.compile('pages:(.*),')
        result = re.search(pattern, html).group(1)
        total_page = int(result)
        # 获取表头信息
        heads = []
        for head in soup.find_all("th"):
            heads.append(head.contents[0])
        # 数据存取列表
        records = []
        # 获取每一页数据
        current_page = 1
        while current_page <= total_page:
            html = getHtml(code, start_date, end_date, current_page, per)
            soup = BeautifulSoup(html, 'html.parser')
            # 获取数据
            for row in soup.findAll("tbody")[0].findAll("tr"):
                row_records = []
                for record in row.findAll('td'):
                    val = record.contents
                    # 处理空值
                    if val == []:
                        row_records.append(np.nan)
                    else:
                        row_records.append(val[0])
                # 记录数据
                records.append(row_records)
            # 下一页
            current_page = current_page + 1
        if len(records) <= 0:
            return None
        # 将数据转换为Dataframe对象
        np_records = np.array(records)
        fund_df = pd.DataFrame()
        for col, col_name in enumerate(heads):
            fund_df[col_name] = np_records[:, col]
        # 按照日期排序
        fund_df['净值日期'] = pd.to_datetime(fund_df['净值日期'], format='%Y/%m/%d')
        fund_df = fund_df.sort_values(by='净值日期', axis=0, ascending=True).reset_index(drop=True)
        fund_df = fund_df.set_index('净值日期')
        # 数据类型处理
        fund_df['单位净值'] = fund_df['单位净值'].astype(float)
        fund_df['累计净值'] = fund_df['累计净值'].astype(float)
        fund_df['日增长率'] = fund_df['日增长率'].str.strip('%').astype(float)
        # print("thread_name: " + name + " code: " + code + " finish!")
        finish_info = "thread_name: {} code: {} finish!".format(name, code)
        logging.info(finish_info)
        return fund_df
    except Exception as e:
        print(e)
        error_info = "thread_name: {} code: {} error!".format(name, code)
        logging.info(error_info)
        return None

3、开始分析基金数据并排序,我使用了基金  收益率方差*收益率标准差/收益率²作为基准进行排序,算出来的值越低,说明偏离和离散程度较低,收益率较高。

import GetFundInfo as gf
import GetFundCode as gd
import datetime
import numpy as np
import threading as tr
import os

threadLock = tr.Lock()


# 获取列表的第三个元素
def takeSecond(elem):
    return elem[2]


# 2015年以前的基金不算,因为没有足够的历史数据回测
def compareTime(time):
    # end_time = datetime.datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
    end_time = time
    start_time = datetime.datetime.strptime("2015-01-01", "%Y-%m-%d")
    if end_time <= start_time:
        return True
    return False


# 开始分析
def analysis(name, codes, analysis_list):
    try:
        for code in codes:
            fund_df = gf.getFund(name, code, start_date='2014-01-01', end_date='2021-01-01')
            if fund_df is None:
                continue
            index_min = fund_df.index.min()
            if not compareTime(index_min):
                continue
            # 平均值
            df_mean = fund_df['日增长率'].mean(axis=0)
            if df_mean <= 0:
                continue
            # 方差
            df_var = fund_df['日增长率'].var(axis=0)
            # 标准差
            df_td = fund_df['日增长率'].std(axis=0)
            # 按照自己方法计算排序 (方差*标准差/平均差²)
            df_calculate = df_var * df_td / np.square(df_mean)
            info = (code, df_mean, df_var, df_td, df_calculate)
            analysis_list.append(info)
    except Exception as e:
        print("analysis error: " + str(e))


# 取基金代码数据
def getTheCode():
    codes_list = []
    codes = gd.getAllFundCode()
    codes_copies = len(codes) // 20
    for i in range(0, len(codes), codes_copies):
        codes_list.append(codes[i:i + codes_copies])
    return codes_list


# 写日志
def writeDocument(data):
    base_dir = os.getcwd()
    file_name = os.path.join(base_dir, '', 'my_info.log')
    my_open = open(file_name, "a")
    my_open.write(str(data) + '\n')
    my_open.close()


# 开始分析
def start():
    try:
        analysis_list = []
        threads = []
        # 获取代码列表
        codes_list = getTheCode()
        thread_number = 0
        for codes in codes_list:
            thread_number = thread_number + 1
            t = tr.Thread(target=analysis, args=("thread" + str(thread_number), codes, analysis_list))
            # t = myThread(thread_number, "analysis_thread" + str(thread_number), thread_number, codes, analysis_list)
            t.start()
            threads.append(t)
        # print("开始运行!")
        writeDocument("开始运行!")

        # 等待所有线程任务结束。
        for t in threads:
            t.join()
        # print("完成添加开始排序-------------")
        writeDocument("完成添加开始排序-------------")
        output_content = sorted(analysis_list, key=lambda x: (x[4]))
        # print("所有线程任务完成")
        for infos in output_content:
            writeDocument(infos)
        writeDocument("所有线程任务完成")
    except Exception as e:
        print("start error: " + str(e))

评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值