前言:
谈及BackTrader的回测速度优化,最常见的说法是从底层使用numpy等计算库来替换,但这种优化无疑非常新手不友好。因此本文着眼于如何最简单的优化多股情况下回测慢这一情况。考虑测试效率,本文使用100支股票回测。经过测试,优化后策略执行速度提升59%(93->38.4)。
策略描述:
前一天非一字涨停的股票进入候选池。
第二天10~11点若涨幅大于4%买入。
持仓股在14:30时若未涨停卖出。
V1策略及运行时间:
v1代码设计思路:
使用5分数据进行交易,而使用日线数据进行候选池判断及涨幅判断。添加定时器只在每天15:00点筛选候选池,然后在next中根据时间与涨幅判断是否需要买入或卖出。策略部分代码如下:
class MyStrategy(bt.Strategy):
params = dict(
when=bt.timer.SESSION_START,
end=bt.timer.SESSION_END,
timer=True,
cheat=False,
offset=timedelta(),
repeat=timedelta(),
weekdays=[],
period=3,
)
def log(self, txt, dt=None):
''' Logging function fot this strategy'''
dt = dt or self.datas[0].datetime.datetime(0)
print('%s, %s' % (dt.isoformat(), txt))
def __init__(self):
self.order = None
self.add_timer(
when=time(15, 0),
offset=self.p.offset,
repeat=self.p.repeat,
weekdays=self.p.weekdays,
)
s_m = []
for i, d in enumerate(self.datas):
if not d._name.endswith('_day'):
s_m.append([d._name, i, None])
self.st_df = pd.DataFrame(data=s_m, columns=['code', 'min', 'day'])
for i, d in enumerate(self.datas):
if d._name.endswith('_day'):
n = d._name.split('_')[0]
self.st_df.loc[self.st_df.code == n, 'day'] = i
# self.stock_names.append(d._name)
# self.min_stocks = self.datas[:int(len(self.datas)/2)]
# self.day_stocks = self.datas[-int(len(self.datas)/2):]
self.zt_list = []
self.last_hold = []
self.new_hold = []
self.zt_num = 0
def notify_order(self, order):
if order.status in [order.Submitted, order.Accepted]:
# Buy/Sell order submitted/accepted to/by broker - Nothing to do
return
# Check if an order has been completed
# Attention: broker could reject order if not enough cash
idx = self.st_df.loc[self.st_df.code==order.data._name].index.values[0]
if order.status in [order.Completed]:
if order.isbuy():
self.log(
'BUY EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' %
(order.executed.price,
order.executed.value,
order.executed.comm))
self.new_hold.append(idx)
self.zt_list.remove(idx)
else: # Sell
self.log('SELL EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' %
(order.executed.price,
order.executed.value,
order.executed.comm))
self.last_hold.remove(idx)
elif order.status in [order.Canceled, order.Expired, order.Margin, order.Rejected]:
self.log('Order Canceled/Expired/Margin/Rejected')
self.new_hold.remove(idx)
# Write down: no pending order
self.order = None
def next(self):
t = self.datetime.time(0)
#1.每天早上10:05至11:05买入
len_for_new = 10 - len(self.last_hold) - len(self.new_hold)
if len(self.zt_list) > 0 and len_for_new > 0:
if t >= time(9,40) and t <= time(14,30):
for i in self.zt_list:
if i in self.last_hold:
continue
d = self.datas[self.st_df.loc[i, 'min']]
if len_for_new <= 0:
break
last_close = self.datas[self.st_df.loc[i, 'day']].close[0]
if 1.045 * last_close < d.close[0] < 1.09 * last_close:
len_for_new -= 1
targetvalue = 0.1 * self.broker.getvalue()
size = targetvalue/(last_close*1.09)//100*100
self.buy(data=d, size=size, price=last_close*1.09, exectype=bt.Order.Limit,
valid=self.datetime.datetime(0)+timedelta(minutes=5))
#2.每天14:35卖出
if len(self.last_hold) > 0:
if t == time(14, 35):
for i in self.last_hold:
m = self.datas[self.st_df.loc[i, 'min']]
d = self.datas[self.st_df.loc[i, 'day']]
if m.close[0] < d.high_limit[0]: #14:30时 day bar最新是昨天的
print('sell 平仓', m._name, self.getposition(m).size)
self.close(data=m)
def notify_timer(self, timer, when, *args, **kwargs):
# 2.合并买入卖出结果
self.last_hold += self.new_hold
self.new_hold = []
# 1.根据涨停预选股票池
self.zt_list = []
for i, row in self.st_df.iterrows():
d = self.datas[row['day']]
if d.close[0] > d.low[0] and d.pctChg[0] > 9.9:
self.log('zhangting ' + str(d.close[0]) + d._name)
self.zt_list.append(i)
# 3.删除已买入
self.zt_list = list(set(self.zt_list)-set(self.last_hold))
self.zt_num += len(self.zt_list)
#print('平均涨停数', self.zt_num/len(self.data0))
运行时间:
总时间:72秒
读取csv | cerebro.adddata | 执行完成 |
---|---|---|
5.8 | 4 | 62 |
可以看到耗时主要集中在cerebro添加数据完成到执行完成,[3]中所提及的优化数据读取的方式便不适用。而根据[2]中提出,Observers和Analyzers耗时能达到执行的一半,去掉以后重新运行得到总时间:71秒,没有明显提升,可能是本例中添加的Observers和Analyzers都比较简单。
V2策略及运行时间:
v2代码改进思路:
为了提高运行效率,考虑尽量减少next中的判断,将其放到cerebro之外,同时将信号直接附加到5min数据上,不再传入日数据。代码如下:
class PandasDataExtendInd(bt.feeds.PandasData):
# 增加线
lines = ('ind','high_limit','buy_ind', 'sell_ind',)
params = (('ind', -1),('high_limit', -1),('buy_ind', -1),('sell_ind', -1), ) # 机构持股数量合计
class MyStrategy(bt.Strategy):
params = dict(
when=bt.timer.SESSION_START,
end=bt.timer.SESSION_END,
timer=True,
cheat=False,
offset=timedelta(),
repeat=timedelta(),
weekdays=[],
period=3,
)
def log(self, txt, dt=None):
''' Logging function fot this strategy'''
dt = dt or self.datas[0].datetime.datetime(0)
print('%s, %s' % (dt.isoformat(), txt))
def __init__(self):
self.order = None
self.add_timer(
when=time(15, 0),
offset=self.p.offset,
repeat=self.p.repeat,
weekdays=self.p.weekdays,
)
self.zt_list = []
self.last_hold = []
self.new_hold = []
self.zt_num = 0
def notify_order(self, order):
if order.status in [order.Submitted, order.Accepted]:
# Buy/Sell order submitted/accepted to/by broker - Nothing to do
return
# Check if an order has been completed
# Attention: broker could reject order if not enough cash
#idx = self.st_df.loc[self.st_df.code==order.data._name].index.values[0]
if order.status in [order.Completed]:
if order.isbuy():
self.log(
'BUY EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' %
(order.executed.price,
order.executed.value,
order.executed.comm))
self.new_hold.append(order.data)
self.zt_list.remove(self.datas.index(order.data))
else: # Sell
self.log('SELL EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' %
(order.executed.price,
order.executed.value,
order.executed.comm))
self.last_hold.remove(order.data)
elif order.status in [order.Canceled, order.Expired, order.Margin, order.Rejected]:
self.log('Order Canceled/Expired/Margin/Rejected')
self.new_hold.remove(order.data)
# Write down: no pending order
self.order = None
def next(self):
t = self.datetime.time(0)
#1.每天早上10:05至11:05买入
len_for_new = 10 - len(self.last_hold) - len(self.new_hold)
if len(self.zt_list) > 0 and len_for_new > 0:
if t >= time(9,40) and t <= time(14,30):
for i in self.zt_list:
if i in self.last_hold:
continue
d = self.datas[i]
if len_for_new <= 0:
break
if d.buy_ind:
len_for_new -= 1
targetvalue = 0.1 * self.broker.getvalue()
size = targetvalue/(d.high_limit*0.99)//100*100
self.buy(data=d, size=size, price=d.high_limit*0.99, exectype=bt.Order.Limit,
valid=self.datetime.datetime(0)+timedelta(minutes=5))
#2.每天14:35卖出
if len(self.last_hold) > 0:
if t == time(14, 35):
for m in self.last_hold:
if m.sell_ind: #14:30时 day bar最新是昨天的
print('sell 平仓', m._name, self.getposition(m).size)
self.close(data=m)
def notify_timer(self, timer, when, *args, **kwargs):
# 2.合并买入卖出结果
self.last_hold += self.new_hold
self.new_hold = []
# 1.根据涨停预选股票池
self.zt_list = []
for i, d in enumerate(self.datas):
if d.ind[0]:
self.zt_list.append(i)
# 3.删除已买入
self.zt_list = list(set(self.zt_list)-set(self.last_hold))
self.zt_num += len(self.zt_list)
#print('平均涨停数', self.zt_num/len(self.data0))
运行时间:
总时间:103秒
读取csv | cerebro.adddata | 执行完成 |
---|---|---|
5.8 | 4.5 | 93 |
反向优化效果显著,也就是next中的比较操作+少传入日数据的效果远远小于传入了复杂的5分钟数据。详细打印运行时间,可以看到next第一次开始时为80秒,中间接近70秒的时间是cerebro进行各种初始化。
V3最终优化
优化思路:
详细分析代码后可以得到其中最耗时的部分为:
# cerebro.py -> runstrategies()
for data in self.datas:
data.preload()
# feed.py -> preload()
def preload(self):
while self.load():
pass
self._last()
self.home()
preload本身不好优化,但是对于runstrategies可以采用多线程执行进行优化,采用cerebro本身使用的Multiprocessing.Pool完成。
运行时间:
总时间:49秒
读取csv | cerebro.adddata | 执行完成 |
---|---|---|
5.9 | 4.3 | 38.4 |
数据读取、载入耗时不变,执行速度大幅提升。
结论
利用多线程可以大幅提升策略回测速度,同时修改难度较低。
电脑参数:
i7-10510U 2.30GHz, 4核8线程
15G内存
win10
参考:
[1] https://zhuanlan.zhihu.com/p/345815425