#特别有意思的是,券商的软件让散户免费用. 可是为什么机构版的就是收费的?我只不过多问了一个为什么,于是我发现了这个很奇怪的叙述....#
起因: 近一段时间都在折腾预训练算法,直到和一些老朋友聊天的过程中我才发现,一些老玩家从不看股票软件的MACD线图的,最多只看蜡烛图.为什么呢?
这篇小文就纠结一下这到底是为什么.
首先: 股票市场本身而言,就是时间序列的数据, 本质上是一个账本. 记录历史成交的价格. 比如日线数据中,通常有这么几列 如下:
date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,pctChg,peTTM,psTTM,pcfNcfTTM,pbMRQ,isST,market
2024-08-21,000004,10.4100,10.5900,10.2100,10.2900,10.4100,4057100,42116068.0000,3,3.212600,1,-1.152700,-1.152700,-8.496810,13.067495,697.031667,7.370315,0,sz
2024-08-22,000004,10.3100,11.2300,10.2100,10.4800,10.2900,7014595,74489729.9500,3,5.554400,1,1.846500,1.846500,-8.653699,13.308780,709.902028,7.506404,0,sz
2024-08-23,000004,10.4500,11.5300,10.3800,11.5300,10.4800,13957200,159213386.0000,3,11.051900,1,10.019100,10.019100,-10.308171,13.249892,-83.693919,8.644457,0,sz
2024-08-26,000004,11.5700,12.0800,11.1200,11.6800,11.5300,23554595,271678300.4000,3,18.651500,1,1.301000,1.301000,-10.442275,13.422266,-84.782738,8.756917,0,sz
2024-08-27,000004,11.5100,12.2000,11.4500,11.8000,11.6800,18605300,220527690.0000,3,14.732400,1,1.027400,1.027400,-10.549559,13.560166,-85.653793,8.846886,0,sz
我们通常关注的只有 date,code,open,high,low,close,perclose,volume 这几列.那么, MACD是怎么计算的呢? 通常需要 date, close 就足够了. 这篇小文不解释MACD的原理,网上有铺天盖地的解释原理的文章.
从本质上讲MACD用来描述某只股票的特定历史趋势,它本质上就是滞后的, 券商提供给散户的股票软件通常是免费的.比如某个软件的MACD是这样画的
在这里, MACD是固定的阈值,这显然不是最佳的阈值,也不适用于所有的股票.
于是我就在想,是否可以自己画出来呢? 参考一个特定历史时期的最佳收益作为训练集,另一段时期作为测试集,最接近现在的作为验证集,并画出MACD并返回最动态的MACD线图呢?
废话少说,代码如下, 这个是最后的版本,当然也只是一个用例.
import akshare as ak
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import talib
from sklearn.linear_model import LinearRegression
# 设置中文显示
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 获取沪深300 ETF (510300) 数据
df = ak.fund_etf_hist_em(symbol="510300", period="daily", start_date="20220101", end_date="20250317", adjust="qfq")
df['日期'] = pd.to_datetime(df['日期'])
df = df.set_index('日期').sort_index()
df = df[['开盘', '最高', '最低', '收盘', '成交量']]
# 数据划分
train_data = df['2022-01-01':'2022-12-31'].copy()
test_data = df['2023-01-01':'2023-12-31'].copy()
valid_data = df['2024-01-01':'2025-03-17'].copy()
# 计算ATR
for data in [train_data, test_data, valid_data]:
data['ATR'] = talib.ATR(data['最高'], data['最低'], data['收盘'], timeperiod=14)
# 特征工程
def create_features(df):
df['MA5'] = df['收盘'].rolling(window=5).mean()
df['MA20'] = df['收盘'].rolling(window=20).mean()
df['RSI'] = talib.RSI(df['收盘'], timeperiod=14)
df['Vol_MA'] = df['成交量'].rolling(window=5).mean()
df['Price_Change'] = df['收盘'].pct_change()
return df.dropna()
# 准备训练数据
train_data_features = create_features(train_data)
X = train_data_features[['ATR', 'MA5', 'MA20', 'RSI', 'Vol_MA', 'Price_Change']]
y_short = train_data_features['ATR'] * 10 # 简单目标:基于ATR预测SHORT (5-40)
y_long = train_data_features['ATR'] * 50 # 预测LONG (10-200)
y_signal = train_data_features['ATR'] * 5 # 预测M (2-40)
y_short = np.clip(y_short, 5, 40).astype(int)
y_long = np.clip(y_long, 10, 200).astype(int)
y_signal = np.clip(y_signal, 2, 40).astype(int)
# 训练线性回归模型
model_short = LinearRegression()
model_long = LinearRegression()
model_signal = LinearRegression()
model_short.fit(X, y_short)
model_long.fit(X, y_long)
model_signal.fit(X, y_signal)
# 预测参数
def predict_parameters(df):
features = create_features(df)[['ATR', 'MA5', 'MA20', 'RSI', 'Vol_MA', 'Price_Change']].iloc[-1].values.reshape(1,
-1)
short_pred = int(np.clip(model_short.predict(features), 5, 40))
long_pred = int(np.clip(model_long.predict(features), 10, 200))
signal_pred = int(np.clip(model_signal.predict(features), 2, 40))
return short_pred, long_pred, signal_pred
# 计算动态MACD(使用预测参数)
def calculate_dynamic_macd(df):
macd_list, signal_list, hist_list = [], [], []
for i in range(len(df)):
if i < 200: # 最大LONG周期为200
macd_list.append(np.nan)
signal_list.append(np.nan)
hist_list.append(np.nan)
else:
short, long, signal = predict_parameters(df.iloc[:i + 1])
ema_fast = talib.EMA(df['收盘'].iloc[i - short:i + 1].values, timeperiod=short)[-1]
ema_slow = talib.EMA(df['收盘'].iloc[i - long:i + 1].values, timeperiod=long)[-1]
diff = ema_fast - ema_slow
macd_list.append(diff)
if i >= 200 + signal:
dea = talib.EMA(np.array(macd_list[i - signal:i + 1]), timeperiod=signal)[-1]
signal_list.append(dea)
macd_value = 2 * (diff - dea)
hist_list.append(macd_value)
else:
signal_list.append(np.nan)
hist_list.append(np.nan)
df['Dynamic_DIFF'] = macd_list
df['Dynamic_DEA'] = signal_list
df['Dynamic_MACD'] = hist_list
return df
# 计算固定MACD (12, 26, 9)
def calculate_fixed_macd(df):
diff, dea, macd = talib.MACD(df['收盘'], fastperiod=12, slowperiod=26, signalperiod=9)
df['Fixed_DIFF'] = diff
df['Fixed_DEA'] = dea
df['Fixed_MACD'] = 2 * (diff - dea)
return df
# 交易信号和收益计算(加入止损)
def calculate_returns(df):
df['Signal'] = 0
df['Signal'] = np.where((df['Dynamic_MACD'] > 0) & (df['Dynamic_MACD'].shift(1) <= 0), 1, df['Signal'])
df['Signal'] = np.where((df['Dynamic_MACD'] < 0) & (df['Dynamic_MACD'].shift(1) >= 0), 0, df['Signal'])
df['Signal'] = np.where(df['收盘'].pct_change() < -0.05, 0, df['Signal']) # 止损:跌幅超5%
df['Signal'] = df['Signal'].ffill()
df['Returns'] = df['收盘'].pct_change() * df['Signal'].shift(1)
total_return = (1 + df['Returns'].dropna()).prod() - 1
return total_return
# 计算年化收益和最大回撤
def calculate_metrics(df):
returns = df['Returns'].dropna()
total_return = (1 + returns).prod() - 1
annualized_return = (1 + total_return) ** (252 / len(returns)) - 1
cumulative_returns = (1 + returns).cumprod()
max_drawdown = (cumulative_returns.cummax() - cumulative_returns).max()
return total_return, annualized_return, max_drawdown
# 计算各数据集的指标
train_data = calculate_dynamic_macd(train_data)
train_data = calculate_fixed_macd(train_data)
train_return = calculate_returns(train_data)
train_total, train_ar, train_md = calculate_metrics(train_data)
test_data = calculate_dynamic_macd(test_data)
test_data = calculate_fixed_macd(test_data)
test_return = calculate_returns(test_data)
test_total, test_ar, test_md = calculate_metrics(test_data)
valid_data = calculate_dynamic_macd(valid_data)
valid_data = calculate_fixed_macd(valid_data)
valid_return = calculate_returns(valid_data)
valid_total, valid_ar, valid_md = calculate_metrics(valid_data)
# 计算持有收益基准
train_bh = (train_data['收盘'].iloc[-1] / train_data['收盘'].iloc[0] - 1)
test_bh = (test_data['收盘'].iloc[-1] / test_data['收盘'].iloc[0] - 1)
valid_bh = (valid_data['收盘'].iloc[-1] / valid_data['收盘'].iloc[0] - 1)
# 输出结果
print(f"训练集收益: {train_total:.4f}, 年化收益: {train_ar:.4f}, 最大回撤: {train_md:.4f}, 持有收益: {train_bh:.4f}")
print(f"测试集收益: {test_total:.4f}, 年化收益: {test_ar:.4f}, 最大回撤: {test_md:.4f}, 持有收益: {test_bh:.4f}")
print(f"验证集收益: {valid_total:.4f}, 年化收益: {valid_ar:.4f}, 最大回撤: {valid_md:.4f}, 持有收益: {valid_bh:.4f}")
print(f"验证集数据量: {len(valid_data)} 天")
# 确定固定MACD和动态MACD的共同有效时间段
start_idx = max(35, 240) # 固定MACD (35), 动态MACD (240)
valid_data_subset = valid_data.iloc[start_idx:].copy()
# 可视化验证集结果(三张图)
plt.figure(figsize=(14, 10))
# 第一张图:收盘价
plt.subplot(3, 1, 1)
plt.plot(valid_data_subset.index, valid_data_subset['收盘'], label='收盘价', color='black')
plt.title('沪深300 ETF (2024-2025) - 收盘价')
plt.legend()
# 第二张图:固定MACD
plt.subplot(3, 1, 2)
plt.plot(valid_data_subset.index, valid_data_subset['Fixed_DIFF'], label='固定DIFF (12,26)', color='blue')
plt.plot(valid_data_subset.index, valid_data_subset['Fixed_DEA'], label='固定DEA', color='orange')
plt.bar(valid_data_subset.index, valid_data_subset['Fixed_MACD'], label='固定MACD', color='gray', alpha=0.5)
plt.axhline(y=0, color='black', linestyle='--', label='零线')
plt.title('固定MACD (12, 26, 9)')
plt.legend()
# 第三张图:动态MACD
plt.subplot(3, 1, 3)
plt.plot(valid_data_subset.index, valid_data_subset['Dynamic_DIFF'], label='动态DIFF', color='green')
plt.plot(valid_data_subset.index, valid_data_subset['Dynamic_DEA'], label='动态DEA', color='red')
plt.bar(valid_data_subset.index, valid_data_subset['Dynamic_MACD'], label='动态MACD', color='purple', alpha=0.5)
plt.axhline(y=0, color='black', linestyle='--', label='零线')
plt.title('动态MACD (线性回归)')
plt.legend()
plt.tight_layout()
plt.show()
我发现了什么? 和券商的对比又有什么区别呢? 看下面的三张图
第一张图是我最初的动态MACD的算法,代码这里就不贴出来了.
第二张图是券商的软件里MACD的展示.
第三张图是上面的代码运行后返回的结果.
试想,散户如果通过券商提供的免费软件的MACD来进行决策, 为啥散户被噶就很好解释了.
但不是所有的散户都会写代码,都愿意去纠结各种参数. 所以嘛.....
于是乎,我又趁热打铁,计算了布林线,ENE, RSI,均已实现回归算法动态计算. 当然了,参数的选择是以收益为权重依据的. 毕竟炒股是为了干啥, 你说呢?