tushare多因子选股

借助tushare,计算各个因子,然后根据OLS回归,计算各股票因子分值,排序进行股票购买

导包:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
import tushare as ts
token = token
pro = ts.pro_api(token)
import statsmodels.api as sm
import talib as ta

获取数据

def get_HS300(start,end):
    df1 = pro.index_weight(index_code='399300.SZ',start_date =start,end_date=end)
    df1 = list(df1['con_code'])
    #删除最近一年上市股票,st股票
    df2 = pro.stock_basic(exchange='',list_status='L')
    df2 = df2[df2['list_date'].apply(int).values<20190601]
    df2 = df2[-df2['name'].apply(lambda x:x.startswith('*ST'))]
    df2 = list(df2['ts_code'])
    codes = []
    for i in df1:
        if i in df2:
            codes.append(i)
    return codes
def get_index(start,end):
    data = pro.index_daily(ts_code='399300.SZ',start_date=start,end_date=end)
    data.index = pd.to_datetime(data['trade_date'])
    data.sort_index(ascending=True,inplace=True)
    data= data['pct_chg']
    return data
              

计算因子值

def get_change(lists,start,end):
    stock_list= []
    for i in lists:
        df = pro.daily(ts_code=i,start_date=start,end_date=end)
        df2 = pro.daily_basic(ts_code=i,start_date=start,end_date=end)
        df = pd.merge(df,df2['ps_ttm'],left_index=True,right_index=True)
        df.index = pd.to_datetime(df['trade_date'])
        df = df.sort_index(ascending=True)
        df['EMAC12'] = ta.EMA(df.close,timeperiod=12)/df.close
        df['HO'] = df.high-df.open
        df['OL'] = df.open-df.low
        df['HCY'] = df.high-df.close.shift(1)
        df['CYL'] = df.close.shift(1)-df.low
        df['AR'] = ta.SUM(df.HO,timeperiod=26)/ta.SUM(df.OL,timeperiod=26)*100
        df['BR'] = ta.SUM(df.HCY,timeperiod=26)/ta.SUM(df.CYL,timeperiod=26)*100
        df['ARBR'] = df['AR'] - df['BR']
        df = df.dropna()
        df = df[['pct_chg','EMAC12','ARBR','ps_ttm']]
        stock_list.append(df)
    return stock_list

中性化:

def MAD(data):
    data= pd.DataFrame(data)
    data = (data-data.mean())/data.std()
    for i in range(len(data.columns)):
        MAD = np.median(abs(data.iloc[:,i]-np.median(data.iloc[:,i])))
        MAX = np.median(data.iloc[:,i]) + 3*1.4826*MAD
        MIN = np.median(data.iloc[:,i]) - 3*1.4826*MAD
        data.iloc[:,i][data.iloc[:,i]>MAX] = MAX
        data.iloc[:,i][data.iloc[:,i]<MIN] = MIN
    data = (data - data.min()) / (data.max() - data.min())
    return data

获取基准指数数据:

codes = get_HS300('20200601','20231203')
x =get_change(codes,20200401,20200619)
rf = 1.04**(1/360)-1
for i in x:
    i['change'] = i['pct_chg'] - rf
    i['EMAC12_normal'] = MAD(i['EMAC12'])
    i['ARBR_normal'] = MAD(i['ARBR'])
    i['ps_ttm_normal'] = MAD(i['ps_ttm'])
    i.drop(['pct_chg','EMAC12','ARBR','ps_ttm'],axis=1,inplace=True)
x[0].head()

获取股票数据:

HS300_index = get_index('20200513','20231209')
HS300_index = HS300_index-rf
HS300_index.head()
stocks = []
for i in x:
    stock = pd.merge(i,HS300_index,left_index=True,right_index=True)
    stock.columns=['日涨跌','EMAC12因子','ARBR因子','滚动市销率因子','市场风险溢价因子']
    stocks.append(stock)
stocks[0].head()

ols计算回归

results =pd.DataFrame()
for i in range(len(stocks)):
    #print(i)
    try:
        model = sm.OLS(stocks[i]['日涨跌'],sm.add_constant(stocks[i][['EMAC12因子','ARBR因子','滚动市销率因子','市场风险溢价因子']].values))
        result = model.fit()
        results[codes[i]] = result.params
    except:
        pass
   
results.head()

根据常数项,判断离散程度,选择最大的,构建股票池:

z = results.sort_values(by=['const'],axis=1)
z.head()

  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

神出鬼没,指的就是我!

必须花钱,数据超好

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值