通常情况下,我们使用线性去回归因子数据,用来解释因子对于收益的贡献程度。但事实情况下,此类方法过于简单,且市场收益并不一定与各类因子线性相关,难以达到预期效果。众所周知,树是机器学习中的一类算法,决策树算法则是其中的基础,以下将详述决策树的概念与应用。
决策树的概念与运用
作者:邱吉尔
1. 导入库包
In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
plt.style.use('seaborn')
2. 获取数据和数据合并处理
In [2]:
start_date='2017-01-01'
end_date='2019-01-21'
start_date=datetime.datetime.strptime(start_date,'%Y-%m-%d')
end_date=datetime.datetime.strptime(end_date,'%Y-%m-%d')
stock_list1=get_index_stocks('000300.SH',start_date)
stock_list2=get_index_stocks('000300.SH',end_date)
stock_list=list(set(stock_list1).intersection(set(stock_list2)))
flag=True
for stock in stock_list:
trade_days = get_trade_days(start_date, end_date).strftime('%Y-%m-%d')
q = query(
factor.date,
factor.symbol,
#趋势性
factor.bbi,factor.ma,factor.expma,factor.priceosc,factor.ddi,
#反趋势
factor.bias, factor.cci,factor.dbcd,factor.dpo,factor.kdj,
#能量型
factor.arbr,factor.cr,factor.psy,factor.vr_rate,factor.wad,
#量价指标
factor.mfi,factor.obv,factor.pvt,factor.wvad,
#压力支撑型
factor.bbiboll,factor.boll,factor.cdp,factor.env,factor.mike,
#成交量
factor.vr,factor.vma,factor.vmacd,factor.vosc,factor.vstd,factor.tapi,
#超买超卖
factor.adtm,
#摆动类
factor.mi,factor.micd,factor.rc,factor.rccd,factor.srmi,
#强弱型
factor.dptb,factor.jdqs,factor.jdrs,factor.zdzb,factor.atr,factor.mass,
#日期类
factor.up_n,factor.down_n
).filter(
factor.symbol==stock,
factor.date.in_(trade_days)
)
df = get_factors(q)
for i in range(len(df['factor_symbol'])):
try:
df.ix[i,'price']=get_price(df.ix[i,'factor_symbol'], None,df.ix[i,'factor_date'], '1d', ['close'], True, None,1, is_panel=1)['close&#