数据源采用akshare,基于决策树的量化回测,一个特点,好。但是好的不真实。看看就好,看看就好
import warnings
warnings.filterwarnings('ignore')
import math
import yfinance
import pandas as pd
import numpy as np
import quantstats as qs
qs.extend_pandas()
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import tree
import akshare as ak
df = ak.stock_zh_index_daily_em(symbol="sh000300")
df['date']=pd.to_datetime(df['date'],format="%Y-%m-%d")
df.rename(columns={"close":'Close'},inplace = True)
last =df.set_index('date')
SP500_DATA = last
#计算日回报和月度波动率
#计算日回报和月度波动率
data = SP500_DATA
data["day_return"] = data.Close.rolling(2).apply(lambda x:(x[-1]-x[0])/x[0])
data["month_volatility"] = data.day_return.rolling(window=21).std()*math.sqrt(math.sqrt(252))
fig = data["month_volatility"].plot(title="Rolling Volatility", figsize=(20,5))
#查看历史收盘价曲线
#查看历史收盘价曲线
fig = data.loc[data.groupby(data.index.to_period('M')).apply(lambda x: x.index.max())].Close.plot(title="Close", figsize=(20,5))
#获得月频数据
month_data = data.loc[data.groupby(data.index.to_period('M')).apply(lambda x: x.index.max())]
#可视化长,短周期动量信号
slow_return = month_data.Close.rolling(13).apply(lambda x:(x[-1]-x[0])/x[0])
slow_momentum_signal = (slow_return>0).astype(int)*2 -1
month_data["slow_momentum_signal"] = slow_momentum_signal
_slow_momentum_signal = slow_momentum_signal.loc['2000-01-01':'2023-02-17']
_ = plt.figure(figsize=(20, 5))
_ = plt.plot(_slow_momentum_signal.index,_slow_momentum_signal.values)
_ = plt.fill_between(_slow_momentum_signal.index, 0, _slow_momentum_signal.values)
fast_return = month_data.Close.rolling(2).apply(lambda x:(x[-1]-x[0])/x[0])
fast_momentum_signal = (fast_return>0).astype(int)*2 - 1
month_data["fast_momentum_signal"] = fast_momentum_signal
_fast_momentum_signal = fast_momentum_signal.loc['2000-01-01':'2023-02-17']
_ = plt.figure(figsize=(20, 5))
_ = plt.plot(_fast_momentum_signal.index,_fast_momentum_signal.values)
_ = plt.fill_between(_fast_momentum_signal.index, 0, _fast_momentum_signal.values)
#可视化短周期动量信号,长周期动量信号和市场的累积收益, 计算策略指标
month_data["fast_momentum_flag"] = None
month_data["fast_momentum_flag"].iloc[1:] = month_data["fast_momentum_signal"].iloc[:-1]
month_data["slow_momentum_flag"] = None
month_data["slow_momentum_flag"].iloc[1:] = month_data["slow_momentum_signal"].iloc[:-1]
month_data["month_return"] = month_data.Close.rolling(2).apply(lambda x:(x[-1]-x[0])/x[0])
month_data["slow_momentum_month_return"] = month_data.slow_momentum_flag * month_data.month_return
month_data["fast_momentum_month_return"] = month_data.fast_momentum_flag * month_data.month_return
month_data_2000_2020 = month_data.loc['2010-02-01':'2023-02-17']
month_data_2000_2020["market_return"] = np.cumprod(month_data_2000_2020.month_return.values + 1) *100
month_data_2000_2020["slow_momentum_month_cumprod_return"] = np.cumprod(month_data_2000_2020["slow_momentum_month_return"].values + 1) * 100
month_data_2000_2020["fast_momentum_month_cumprod_return"] = np.cumprod(month_data_2000_2020["fast_momentum_month_return"].values + 1) * 100
_ = plt.figure(figsize=(20, 5))
plt.xlabel('Time')
plt.ylabel('Return')
_ = plt.plot(month_data_2000_2020["market_return"].index,month_data_2000_2020["market_return"].values)
_ = plt.plot(month_data_2000_2020["slow_momentum_month_cumprod_return"].index,month_data_2000_2020["slow_momentum_month_cumprod_return"].values)
_ = plt.plot(month_data_2000_2020["fast_momentum_month_cumprod_return"].index,month_data_2000_2020["fast_momentum_month_cumprod_return"].values)
_ = plt.legend(['market_return', 'slow_momentum_month_cumprod_return', 'fast_momentum_month_cumprod_return'])
result = {}
for i in ["slow_momentum_month_cumprod_return", "fast_momentum_month_cumprod_return", "market_return"]:
result[i] = []
for j in ["avg_return", "volatility", "sharpe", "max_drawdown", "win_rate"]:
exec( f"r = qs.stats.{j}(month_data_2000_2020[i])")
result[i].append(r)
result = pd.DataFrame(result, index=["avg_return", "volatility", "sharpe", "max_drawdown", "win_rate"])
#result
#第四部分:训练决策树模型自适应的切换长短动量策略
#标注训练数据,如果下一段时间长周期信号获得更高收益,则标记为0,否则为1
#第四部分:训练决策树模型自适应的切换长短动量策略
#标注训练数据,如果下一段时间长周期信号获得更高收益,则标记为0,否则为1
month_data["momentum_label"] = 0
month_data["momentum_label"][month_data["fast_momentum_month_return"] >= month_data["slow_momentum_month_return"]] = 1
# month_volatility下移一个时间单位,作为下一时间单位的输入
month_data["month_volatility"].iloc[1:] = month_data["month_volatility"].iloc[:-1]
month_data
#生成训练数据,使用决策树训练分类模型,训练目标为在当前市场波动情况下,在长周期和短周期动量信号的不一致的情况下,在下一个阶段采用的策略。
tree_data = month_data[month_data.fast_momentum_flag != month_data.slow_momentum_signal]
tree_data = tree_data.fillna(0)
X_train = tree_data.loc['1971-01-01':'2010-12-31']["month_volatility"].values.reshape(-1,1)
y_train = tree_data.loc['1971-01-01':'2010-12-31']["momentum_label"]
X_test = month_data.loc['2011-01-01':'2020-12-31']["month_volatility"].values.reshape(-1,1)
y_test = month_data.loc['2011-01-01':'2020-12-31']["momentum_label"]
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)
y_predict = clf.predict(X_test)
#可视化对比预测值和真值
_ = plt.figure(figsize=(20, 5))
plt.xlabel('Time')
plt.ylabel('y')
_ = plt.plot(y_test.index,y_predict)
_ = plt.plot(y_test.index,y_test)
_ = plt.legend(['y_predict', 'y_test'])
#可视化短周期动量信号,长周期动量信号和树模型策略的累积收益
month_data["momentum_predict_label"] = None
month_data["momentum_predict_label"].loc['2011-01-01':'2020-12-31']= y_predict * 2 -1
month_data["tree_momentum_flag"] = month_data.fast_momentum_flag
month_data.tree_momentum_flag[(month_data.fast_momentum_signal * month_data.slow_momentum_signal + month_data.momentum_predict_label) == -2] *= -1
month_data["tree_momentum_month_return"] = month_data.tree_momentum_flag * month_data.month_return
month_data_2006_2020 = month_data.loc['2011-02-01':'2020-12-31']
month_data_2006_2020["tree_momentum_month_cumprob_return"] = np.cumprod(month_data_2006_2020["tree_momentum_month_return"].values + 1) *100
month_data_2006_2020["slow_momentum_month_cumprod_return"] = np.cumprod(month_data_2006_2020["slow_momentum_month_return"].values + 1) * 100
month_data_2006_2020["fast_momentum_month_cumprod_return"] = np.cumprod(month_data_2006_2020["fast_momentum_month_return"].values + 1) * 100
_ = plt.figure(figsize=(20, 5))
plt.xlabel('Time')
plt.ylabel('Return')
_ = plt.plot(month_data_2006_2020["tree_momentum_month_cumprob_return"].index,month_data_2006_2020["tree_momentum_month_cumprob_return"].values)
_ = plt.plot(month_data_2006_2020["slow_momentum_month_cumprod_return"].index,month_data_2006_2020["slow_momentum_month_cumprod_return"].values)
_ = plt.plot(month_data_2006_2020["fast_momentum_month_cumprod_return"].index,month_data_2006_2020["fast_momentum_month_cumprod_return"].values)
_ = plt.legend(['tree_momentum_month_cumprob_return', 'slow_momentum_month_cumprod_return', 'fast_momentum_month_cumprod_return'])
result = {}
for i in ["slow_momentum_month_cumprod_return", "fast_momentum_month_cumprod_return", "tree_momentum_month_cumprob_return"]:
result[i] = []
for j in ["avg_return", "volatility", "sharpe", "max_drawdown", "win_rate"]:
exec( f"r = qs.stats.{j}(month_data_2006_2020[i])")
result[i].append(r)
result = pd.DataFrame(result, index=["avg_return", "volatility", "sharpe", "max_drawdown", "win_rate"])
#result