研究平台代码
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
# 确定每月日期 2014-01-01~2016-01-01
dates = get_trading_dates(start_date="2014-01-01", end_date="2016-01-01")
# 每天日期---->每月月末
# 每月最后一个交易日, 按月计算收益率
month_date = []
for i in range(len(dates) -1):
if dates[i].year != dates[i+1].year:
month_date.append(dates[i])
elif dates[i].month != dates[i+1].month:
month_date.append(dates[i])
#把最后一个交易日加入
month_date.append(dates[-1])
stocks = index_components("000300.XSHG")
all_data = pd.DataFrame()
len_date = len(month_date[:-1])
for i in range(0, len_date):
date = month_date[i]
date_next = None
if i + 1 < len_date:
date_next = month_date[i+1]
fund = get_factor(stocks, ["pe_ratio_lyr","pb_ratio_lyr","market_cap","ev_lyr","return_on_asset_net_profit_lyr","du_return_on_equity_lyr","basic_earnings_per_share","net_profit_to_revenue_lyr","total_expense"]
,start_date=date,end_date=date)
fund.reset_index(inplace=True)
fund.sort_index(inplace=True)
fund['next_month_return'] = np.nan
if not (date_next is None):
for i in range(0, fund.shape[0]):
stock = fund.iloc[i]["order_book_id"]
price1 = get_price(stock, start_date=date, end_date=date, fields='close')
price2 = get_price(stock, start_date=date_next, end_date=date_next, fields='close')
if (price1 is not None) and (price2 is not None):
price1.reset_index(inplace=True)
price2.reset_index(inplace=True)
fund.loc[i:i,('next_month_return')] = (float)(price2["close"] / price1["close"]) - 1 #赋值不强转就赋值不上
# 进行每月因子数据拼接
all_data = pd.concat([all_data, fund])
# 把收益率为空删除
all_data = all_data.dropna()
def mad(factor):
"""3倍中位数去极值
"""
# 求出因子值的中位数
med = np.median(factor)
# 求出因子值与中位数的差值,进行绝对值
mad = np.median(np.abs(factor - med))
# 定义几倍的中位数上下限
high = med + (3 * 1.4826 * mad)
low = med - (3 * 1.4826 * mad)
# 替换上下限以外的值
factor = np.where(factor > high, high, factor)
factor = np.where(factor < low, low, factor)
return factor
def stand(factor):
"""标准化
"""
mean = np.mean(factor)
std = np.std(factor)
return (factor - mean)/std
#随机打乱,并采样,index已乱
all_data = all_data.sample(frac=0.7)
#训练数据
x = all_data[["pe_ratio_lyr","pb_ratio_lyr","market_cap","ev_lyr","return_on_asset_net_profit_lyr","du_return_on_equity_lyr","basic_earnings_per_share","net_profit_to_revenue_lyr","total_expense"]].copy()
# 取出目标值
y = all_data[['next_month_return']].copy()
# 1、特征值处理
# 去极值、标准化、中性化
for name in x.columns:
x[name] = mad(x[name])
x[name] = stand(x[name])
x_market_cap = x["market_cap"]
for name in x.columns:
if name == "market_cap":
continue
# 准备特征值、目标值
# x_market_cap
y_factor = x[name]
# 线性回归方程建立
lr = LinearRegression()
lr.fit(x_market_cap.values.reshape(-1, 1), y_factor)
y_predict = lr.predict(x_market_cap.values.reshape(-1, 1))
# 得出真实值与预测之间的误差当做新的因子值
x[name] = y_factor - y_predict
# 收益率目标值y
y = stand(y)
lr = LinearRegression()
lr.fit(x, y)
lr.coef_
得出回归系数
我的策略实现
# 可以自己import我们平台支持的第三方python模块,比如pandas、numpy等。
# 回测区间:2014-01-01~2018-01-01
# 选股:
# 范围:沪深300
# 因子:
# 方法:回归法,利用系数相乘(矩阵相乘运算)得出结果排序
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge
# 在这个方法中编写任何的初始化逻辑。context对象将会在你的算法策略的任何方法之间做传递。
def init(context):
context.stock_num = 20
# 定义沪深300指数股
context.hs300 = index_components("000300.XSHG")
scheduler.run_monthly(regression_select, tradingday=1)
def regression_select(context, bar_dict):
"""回归法进行选择股票
准备因子数据、数据处理(缺失值、去极值、标准化、中性化)
预测每个股票对应这一天的结果,然后排序选出前20只股票
""" fundamentals.income_statement.total_expense).filter(fundamentals.stockcode.in_(context.hs300))
# fund = get_fundamentals(q,expect_df=True)
fund = get_factor(context.hs300, ["pe_ratio_lyr","pb_ratio_lyr","market_cap","ev_lyr","return_on_asset_net_profit_lyr","du_return_on_equity_lyr","basic_earnings_per_share","net_profit_to_revenue_lyr","total_expense"])
factors_data = fund
# 2、处理数据
# 缺失值
factors_data = factors_data.dropna()
# 保留原来的数据,后续处理
factors1 = pd.DataFrame()
# 去极值
for name in factors_data.columns:
factors1[name] = mad(factors_data[name])
# logger.info(factors1)
# 标准化
std = StandardScaler()
# factors1 dataframe --->
factors1 = std.fit_transform(factors1)
# 将factors1还原成dataframe,方便后面取数据处理
factors1 = pd.DataFrame(factors1, index=factors_data.index, columns=factors_data.columns)
# 给中心化处理
# 确定中性化:特征值:原始的市值,目标值:处理过后的因子数据
x = factors_data['market_cap']
for name in factors1.columns:
# 跳过market_cap
if name == "market_cap":
continue
# 取出因子作为目标值
y = factors1[name]
# 建立回归方程,得出预测结果
# 用真是结果-预测结果得到残差,即为新的因子值
lr = LinearRegression()
lr.fit(x.values.reshape(-1, 1), y.values)
# 预测结果
y_predict = lr.predict(x.values.reshape(-1, 1))
# 得出没有相关性的残差部分
res = y - y_predict
# 将残差部分作为新的因子值
factors1[name] = res
# 处理结束,factors1即为我们最终需要的数据结果
# 3、选股
# 建立回归方程,得出预测结果,然后排序选出30个股票
# 特征值:factors1:9个因子特征值
# 训练的权重系数为:9个权重
# 假如5月1日,
# 得出的结果:相当于预测接下来的5月份收益率,哪个收益率高选谁
weights = np.array([ 0.05082123, -0.00411096, -0.09166143, 0.03656969, -0.07487511,
-0.04669774, 0.07647528, 0.02942259, 0.10650444])
# 进行特征值与权重之间的矩阵运算
# (m行,n列) *(n行,l列) = (m行,l列)
# (300, 9) * (9, 1) = (300, 1)
return_ = np.matmul(factors1, weights.reshape(-1, 1))
# logger.info(stock_return)
# 根据收益率的大小排序去选股
# 将股票的代码和收益率绑定一起排序
factors1 = factors1.reset_index()
return_ = return_.reset_index()
#print(len(return_["order_book_id"]))
return_ = return_.dropna()
# print(len(return_[0]))
# print(return_["order_book_id"])
#stock_return = dict(zip(factors1.index, return_))
stock_return = dict(zip(return_["order_book_id"], return_[0]))
# logger.info(stock_return)
# 对字典进行排序
score = sorted(stock_return.items(), key=lambda x: x[1], reverse=True)[:20]
# 取出score的股票代码
context.stocklist = [x[0] for x in score]
# logger.info(context.stocklist)
rebalance(context)
def rebalance(context):
# 卖出
for stock in context.portfolio.positions.keys():
if context.portfolio.positions[stock].quantity > 0:
if stock not in context.stocklist:
order_target_percent(stock, 0)
weight = 1.0 / len(context.stocklist)
# 买入
for stock in context.stocklist:
order_target_percent(stock, weight)
# before_trading此函数会在每天策略交易开始前被调用,当天只会被调用一次
def before_trading(context):
pass
# 你选择的证券的数据更新将会触发此段逻辑,例如日或分钟历史数据切片或者是实时数据切片更新
def handle_bar(context, bar_dict):
# 开始编写你的主要的算法逻辑
# bar_dict[order_book_id] 可以拿到某个证券的bar信息
# context.portfolio 可以拿到现在的投资组合信息
# 使用order_shares(id_or_ins, amount)方法进行落单
# TODO: 开始编写你的算法吧!
#order_shares(context.s1, 1000)
pass
# after_trading函数会在每天交易结束后被调用,当天只会被调用一次
def after_trading(context):
pass
def mad(factor):
"""3倍中位数去极值
"""
# 求出因子值的中位数
med = np.median(factor)
# 求出因子值与中位数的差值,进行绝对值
mad = np.median(np.abs(factor - med))
# 定义几倍的中位数上下限
high = med + (3 * 1.4826 * mad)
low = med - (3 * 1.4826 * mad)
# 替换上下限以外的值
factor = np.where(factor > high, high, factor)
factor = np.where(factor < low, low, factor)
return factor
测试结果:
获得超过市场预期1倍的收益