基于Python的策略开发与回测:统计套利策略
1. 策略原理深度解析
核心逻辑:
通过统计学方法挖掘一篮子资产间的价格关系,构建标准化价差(Z-score),当价差偏离历史分布时进行反向交易。与配对交易的区别:
- 多资产组合:可能涉及3+资产的线性组合(如行业ETF组合)
- 动态筛选:定期重新选择协整性最强的资产组合
- 非线性关系:引入机器学习优化对冲比例(如通过PCA降维)
数学基础:
- 价差构建:( \text{Spread} = \sum_{i=1}^n β_i P_i ) (需满足平稳性)
- Z-score标准化:( Z = \frac{\text{Spread} - μ_{rolling}}{σ_{rolling}} )
- 交易触发:
- ( Z > 2.0 ): 做空价差
- ( Z < -2.0 ): 做多价差
- ( |Z| < 0.5 ): 平仓
2. 完整策略实现代码
步骤1:生成多资产模拟数据集
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt # 添加matplotlib导入
import statsmodels.api as sm # 添加statsmodels导入
from statsmodels.tsa.stattools import adfuller
from itertools import combinations
# 生成三资产数据集(其中两资产具有协整关系)
np.random.seed(42)
days = 1000
# 基准资产
base = np.cumprod(1 + np.random.normal(0.0003, 0.015, days)) * 100
# 协整资产
coint_asset = 1.2 * base + np.random.normal(0, 3, days)
# 噪声资产
noise_asset = np.cumprod(1 + np.random.normal(0.0005, 0.025, days)) * 100
data = pd.DataFrame(
{"Asset_A": base, "Asset_B": coint_asset, "Asset_C": noise_asset},
index=pd.date_range("2020-01-01", periods=days),
)
步骤2:动态协整组合筛选
def find_best_coint_pair(df):
pairs = list(combinations(df.columns, 2))
best_p = 1.0
best_pair = None
best_beta = 0
for pair in pairs:
x = df[pair[0]]
y = df[pair[1]]
model = sm.OLS(x, sm.add_constant(y)).fit() # 修复sm未定义问题
residuals = model.resid
p_value = adfuller(residuals)[1]
if p_value < best_p:
best_p = p_value
best_pair = pair
best_beta = model.params.iloc[1] # 修复FutureWarning
return best_pair, best_beta, best_p
best_pair, beta, p = find_best_coint_pair(data)
print(f"最优协整组合: {best_pair}, β={beta:.2f}, p值={p:.4f}")
打印结果:
最优协整组合: ('Asset_A', 'Asset_B'), β=0.82, p值=0.0000
步骤3:价差计算与信号生成
# 计算价差序列
data["Spread"] = data[best_pair[0]] - beta * data[best_pair[1]]
# 处理缺失值
data = data.dropna()
# 滚动窗口参数
lookback = 60
threshold_entry = 2.0
threshold_exit = 0.5
# Z-score计算
data["Z_mean"] = data["Spread"].rolling(lookback).mean()
data["Z_std"] = data["Spread"].rolling(lookback).std()
data["Zscore"] = (data["Spread"] - data["Z_mean"]) / data["Z_std"]
# 生成交易信号
data["Position"] = 0
data.loc[data["Zscore"] > threshold_entry, "Position"] = -1 # 做空价差
data.loc[data["Zscore"] < -threshold_entry, "Position"] = 1 # 做多价差
data.loc[abs(data["Zscore"]) < threshold_exit, "Position"] = 0 # 平仓
3. 可视化代码与解析
可视化1:多资产价格与价差关系
# 绘制价格走势
plt.figure(figsize=(14, 10))
ax1 = plt.subplot(211)
ax1.plot(data["Asset_A"], label="Asset A", color="#1f77b4")
ax1.plot(data["Asset_B"], label="Asset B", color="#ff7f0e")
ax1.plot(data["Asset_C"], label="Asset C", color="#2ca02c")
ax1.set_title("Multi-Asset Price Series", pad=15)
ax1.legend(loc="upper left")
ax1.grid(alpha=0.3)
# 绘制价差与Z-score
ax2 = plt.subplot(212)
ax2.plot(data["Spread"], label="Spread", color="#9467bd", linewidth=1)
ax2.plot(data["Zscore"], label="Z-score", color="#d62728", alpha=0.7, linewidth=1)
ax2.axhline(threshold_entry, color="gray", linestyle="--", label="Entry Threshold")
ax2.axhline(-threshold_entry, color="gray", linestyle="--")
ax2.axhline(0, color="black", linewidth=0.8)
ax2.set_title("Spread & Z-score Dynamics", pad=15)
ax2.legend(loc="upper right")
ax2.grid(alpha=0.3)
plt.tight_layout()
plt.show()
图表说明:
- 上图展示三资产价格走势(A、B协整,C为噪声)
- 下图紫色曲线为价差,红色曲线为Z-score,灰色虚线为交易阈值
可视化2:持仓信号与资金曲线
# 计算策略收益
data["Returns"] = data["Position"].shift(1) * data["Spread"].diff()
data["Strategy"] = data["Returns"].cumsum()
# 绘制持仓和收益
plt.figure(figsize=(12, 6))
# 持仓状态
plt.subplot(211)
plt.step(data.index, data["Position"], where="post", color="#17becf", linewidth=1.5)
plt.title("Position Status", pad=15)
plt.yticks([-1, 0, 1], ["Short", "Flat", "Long"])
plt.grid(alpha=0.3)
# 净值曲线
plt.subplot(212)
plt.plot(data["Strategy"], color="#e377c2", linewidth=1.5)
plt.title("Strategy Cumulative Returns", pad=15)
plt.grid(alpha=0.3)
plt.ylabel("Cumulative Return")
plt.tight_layout()
plt.show()
图表说明:
- 上图为持仓状态变化(-1=做空,0=平仓,1=做多)
- 下图为策略累计收益曲线
4. 高级优化技巧
- 自适应阈值:
# 根据波动率调整阈值 data['ATR'] = data['Spread'].diff().abs().rolling(20).mean() dynamic_threshold = 2.0 * data['ATR'] / data['ATR'].mean()
- 组合再平衡:
# 每月重新检验协整关系 if data.index[i].is_month_end: best_pair, beta, p = find_best_coint_pair(data.iloc[:i])
- 非线性对冲:
# 使用支持向量回归优化对冲比例 from sklearn.svm import SVR model = SVR(kernel='rbf').fit(y.values.reshape(-1,1), x) beta_nonlinear = model.predict(y.values.reshape(-1,1))