获取数据 from qlib.data import D
从D中筛选出数据和指标
>> from qlib.data import D
>> from qlib.data.filter import NameDFilter, ExpressionDFilter
>> nameDFilter = NameDFilter(name_rule_re='SH[0-9]{4}55')
>> expressionDFilter = ExpressionDFilter(rule_expression='$close>Ref($close,1)')
>> instruments = D.instruments(market='csi300', filter_pipe=[nameDFilter, expressionDFilter])
>> fields = ['$close', '$volume', 'Ref($close, 1)', 'Mean($close, 3)', '$high-$low']
>> D.features(instruments, fields, start_time='2010-01-01', end_time='2017-12-31', freq='day').head().to_string()
' $close $volume Ref($close, 1) Mean($close, 3) $high-$low
... instrument datetime
... SH600655 2010-01-04 2699.567383 158193.328125 2619.070312 2626.097738 124.580566
... 2010-01-08 2612.359619 77501.406250 2584.567627 2623.220133 83.373047
... 2010-01-11 2712.982422 160852.390625 2612.359619 2636.636556 146.621582
... 2010-01-12 2788.688232 164587.937500 2712.982422 2704.676758 128.413818
... 2010-01-13 2790.604004 145460.453125 2788.688232 2764.091553 128.413818'
>> from qlib.data import D
>> data = D.features(["sh600519"], ["(($high / $close) + ($open / $close)) * (($high / $close) + ($open / $close)) / (($high / $close) + ($open / $close))"], start_time="20200101")
>> from qlib.data.ops import *
>> f1 = Feature("high") / Feature("close")
>> f2 = Feature("open") / Feature("close")
>> f3 = f1 + f2
>> f4 = f3 * f3 / f3
>> data = D.features(["sh600519"], [f4], start_time="20200101")
>> data.head()
参考文档:Data Retrieval — QLib 0.8.6.99 documentation
================================================================
workflow_by_code用例
1. 加载数据
qlib.init(provider_uri=provider_uri, region=REG_CN)
2. 设置模型数据范围和学习时间范围
market = "csi300"
data_handler_config = {
"start_time": "2021-01-01",
"end_time": "2022-12-31",
"fit_start_time": "2021-01-01",
"fit_end_time": "2021-12-31",
"instruments": market,
}
3. 设置模型及参数
task = {
"model": {
"class": "LGBModel",
"module_path": "qlib.contrib.model.gbdt",
"kwargs": {
"loss": "mse",
"colsample_bytree": 0.8879,
"learning_rate": 0.0421,
"subsample": 0.8789,
"lambda_l1": 205.6999,
"lambda_l2": 580.9768,
"max_depth": 8,
"num_leaves": 210,
"num_threads": 20,
},
},
4. 设置数据集和数据集处理范围
"dataset": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": {
"class": "Alpha158",
"module_path": "qlib.contrib.data.handler",
"kwargs": data_handler_config, #步骤1中的时间范围和股票集合
},
"segments": {
"train": ("2021-01-01", "2021-12-31"),
"valid": ("2021-07-01", "2021-12-31"),
"test": ("2022-01-01", "2022-12-01"),
},
},
},
5. 模型初始化
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])
6. 模型学习
# start exp to train model
with R.start(experiment_name="train_model"):
R.log_params(**flatten_dict(task))
model.fit(dataset)
R.save_objects(trained_model=model)
rid = R.get_recorder().id
7. 预测、回测、分析
# backtest and analysis
with R.start(experiment_name="backtest_analysis"):
recorder = R.get_recorder(recorder_id=rid, experiment_name="train_model")
model = recorder.load_object("trained_model")
# prediction
recorder = R.get_recorder()
ba_rid = recorder.id
sr = SignalRecord(model, dataset, recorder)
sr.generate()
# backtest & analysis
par = PortAnaRecord(recorder, port_analysis_config, "day")
par.generate()
7.1.其中PortAnaRecord做回测和分析,用到了一组新参数
port_analysis_config = {
"executor": {
#交易执行agent
"class": "SimulatorExecutor",
"module_path": "qlib.backtest.executor",
"kwargs": {
"time_per_step": "day",# 回测频率
"generate_portfolio_metrics": True,
},
},
"strategy": {
"class": "TopkDropoutStrategy",
"module_path": "qlib.contrib.strategy.signal_strategy",
"kwargs": {
"model": model,
"dataset": dataset,
"topk": 50,
"n_drop": 5,
},
},
"backtest": {
"start_time": "2022-01-01",
"end_time": "2022-12-01",
"account": 100000000,
"benchmark": benchmark, #基准收益,默认是沪深300, SH000300
"exchange_kwargs": {
"freq": "day",
"limit_threshold": 0.095, #涨跌停设定
"deal_price": "close",
"open_cost": 0.0005, #手续费设置
"close_cost": 0.0015,
"min_cost": 5,
},
},
}
可以这么理解,先对数据进行建立LGB模型,然后对数据进行预测,对预测的结果使用TopkDropout策略进行回测执行。
8. 图表分析
其中该模型的图表分析直接加载了磁盘中的pkl文件,后续将自行再测试一下
from qlib.contrib.report import analysis_model, analysis_position
from qlib.data import D
#模型加载,从experiment中加载
recorder = R.get_recorder(recorder_id=ba_rid, experiment_name="backtest_analysis")
pred_df = recorder.load_object("pred.pkl")
#从磁盘中加载3个不同的pkl,与回测分析进行对比?
report_normal_df = recorder.load_object("portfolio_analysis/report_normal_1day.pkl")
positions = recorder.load_object("portfolio_analysis/positions_normal_1day.pkl")
analysis_df = recorder.load_object("portfolio_analysis/port_analysis_1day.pkl")
#绘图
analysis_position.report_graph(report_normal_df)
图表指标,
cum bench: 基准指数的累计收益
cum return wo cost: 不计交易成本的策略累计收益
cum return w cost: 计入交易成本的策略累计收益
return w cost mdd: 计入交易成本的收益最大回撤
cum ex return wo cost: 不计交易成本的策略累计超额收益
turnover:策略换手率