需要先获得pred_score, 这个是通过pred_score = model.predict(dataset) 获得的。放进
STRATEGY_CONFIG = {
"topk": 50,
"n_drop": 5,
# pred_score, pd.Series
"signal": pred_score,
}
import qlib
import pandas as pd
from qlib.utils.time import Freq
from qlib.utils import flatten_dict
from qlib.backtest import backtest, executor
from qlib.contrib.evaluate import risk_analysis
from qlib.contrib.strategy import TopkDropoutStrategy
# init qlib
qlib.init(provider_uri=<qlib data dir>)
CSI300_BENCH = "SH000300"
FREQ = "day"
STRATEGY_CONFIG = {
"topk": 50,
"n_drop": 5,
# pred_score, pd.Series
"signal": pred_score,
}
EXECUTOR_CONFIG = {
"time_per_step": "day",
"generate_portfolio_metrics": True,
}
backtest_config = {
"start_time": "2017-01-01",
"end_time": "2020-08-01",
"account": 100000000,
"benchmark": CSI300_BENCH,
"exchange_kwargs": {
"freq": FREQ,
"limit_threshold": 0.095,
"deal_price": "close",
"open_cost": 0.0005,
"close_cost": 0.0015,
"min_cost": 5,
},
}
# strategy object
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
# executor object
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
# backtest
portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))
# backtest info
report_normal_df, positions_normal = portfolio_metric_dict.get(analysis_freq)
# risk analysis
analysis = dict()
analysis["excess_return_without_cost"] = risk_analysis(
r=report_normal_df["return"] - report_normal_df["bench"],
freq=analysis_freq
)
analysis["excess_return_with_cost"] = risk_analysis(
r=report_normal_df["return"] - report_normal_df["bench"] - report_normal_df["cost"],
freq=analysis_freq
)
analysis_df = pd.concat(analysis) # type: pd.DataFrame
analysis_position.risk_analysis_graph(analysis_df, report_normal_df)
其中 库函数 risk analysis 源码为:
def risk_analysis(r, N: int = None, freq: str = "day"):
"""Risk Analysis
NOTE:
The calculation of annulaized return is different from the definition of annualized return.
It is implemented by design.
Qlib tries to cumulated returns by summation instead of production to avoid the cumulated curve being skewed exponentially.
All the calculation of annualized returns follows this principle in Qlib.
TODO: add a parameter to enable calculating metrics with production accumulation of return.
Parameters
----------
r : pandas.Series
daily return series.
N: int
scaler for annualizing information_ratio (day: 252, week: 50, month: 12), at least one of `N` and `freq` should exist
freq: str
analysis frequency used for calculating the scaler, at least one of `N` and `freq` should exist
"""
def cal_risk_analysis_scaler(freq):
_count, _freq = Freq.parse(freq)
# len(D.calendar(start_time='2010-01-01', end_time='2019-12-31', freq='day')) = 2384
_freq_scaler = {
Freq.NORM_FREQ_MINUTE: 240 * 238,
Freq.NORM_FREQ_DAY: 238,
Freq.NORM_FREQ_WEEK: 50,
Freq.NORM_FREQ_MONTH: 12,
}
return _freq_scaler[_freq] / _count
if N is None and freq is None:
raise ValueError("at least one of `N` and `freq` should exist")
if N is not None and freq is not None:
warnings.warn("risk_analysis freq will be ignored")
if N is None:
N = cal_risk_analysis_scaler(freq)
mean = r.mean()
std = r.std(ddof=1)
annualized_return = mean * N
information_ratio = mean / std * np.sqrt(N)
max_drawdown = (r.cumsum() - r.cumsum().cummax()).min()
data = {
"mean": mean,
"std": std,
"annualized_return": annualized_return,
"information_ratio": information_ratio,
"max_drawdown": max_drawdown,
}
res = pd.Series(data).to_frame("risk")
return res
重点在这几句
mean = r.mean()
std = r.std(ddof=1)
annualized_return = mean * N
information_ratio = mean / std * np.sqrt(N)
max_drawdown = (r.cumsum() - r.cumsum().cummax()).min()
data = {
"mean": mean,
"std": std,
"annualized_return": annualized_return,
"information_ratio": information_ratio,
"max_drawdown": max_drawdown,
}
可以打印出analysis_df 看