分析ETF美开收盘价预测中开收盘价

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import lightgbm as lgb

plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号

# === 1. 读取 Excel 数据 ===
df = pd.read_excel(r'gold_etf_data.xlsx', parse_dates=["date"])
df = df.sort_values("date").reset_index(drop=True)

# === 2. 构造滞后特征 ===
#表示 美股的前一天开盘价和收盘价;因为中国黄金ETF市场是在北京时间交易,而美股是在纽约时间(时差半天),
# 所以中国市场第二天开盘时,已经知道美股前一日的全部信息。
df['gld_open_shift'] = df['gld_open'].shift(1)
df['gld_close_shift'] = df['gld_close'].shift(1)
#加入美股波动指标
df['gld_change'] = df['gld_close_shift'] - df['gld_open_shift']
df['gld_pct_change'] = df['gld_change'] / df['gld_open_shift']

#中国市场前一天、前两天的开盘价,用来捕捉中国黄金ETF自身的自相关性(也就是你提到的“它本身对自己有影响”)
df['cn_open_lag1'] = df['cn_open'].shift(1)
df['cn_close_lag1'] = df['cn_close'].shift(1)
#引入更多滞后数据(历史序列)
df['cn_open_lag2'] = df['cn_open'].shift(2)
df['cn_close_lag2'] = df['cn_close'].shift(2)
df['date'] = pd.to_datetime(df['date'], errors='coerce')

df['dayofweek'] = df['date'].dt.dayofweek

# === 3. 删除含空值的行 ===
df = df.dropna()

# === 4. 构造输入输出特征 ===
features_open = ['gld_open_shift', 'gld_close_shift', 'gld_pct_change',
                 'cn_close_lag1', 'cn_open_lag1', 'cn_open_lag2', 'cn_close_lag2', 'dayofweek']
features_close = ['gld_open_shift', 'gld_close_shift', 'gld_pct_change',
                  'cn_open', 'cn_open_lag1', 'cn_close_lag1', 'dayofweek']

X_open = df[features_open]
y_open = df['cn_open']

X_close = df[features_close]
y_close = df['cn_close']

# === 5. 划分训练测试集 ===
X_train_open, X_test_open, y_train_open, y_test_open = train_test_split(X_open, y_open, test_size=0.2, shuffle=False)
X_train_close, X_test_close, y_train_close, y_test_close = train_test_split(X_close, y_close, test_size=0.2, shuffle=False)

# === 6. 模型训练 ===
# linear
model_open = LinearRegression().fit(X_train_open, y_train_open)
model_close = LinearRegression().fit(X_train_close, y_train_close)
#lgb
# model_open = lgb.LGBMRegressor()
# model_open.fit(X_train_open, y_train_open)
#
# model_close = lgb.LGBMRegressor()
# model_close.fit(X_train_close, y_train_close)


# === 7. 预测 ===
df['pred_cn_open'] = model_open.predict(X_open)
df['pred_cn_close'] = model_close.predict(X_close)

# === 8. 评估误差 ===
rmse_open = mean_squared_error(y_test_open, model_open.predict(X_test_open), squared=False)
rmse_close = mean_squared_error(y_test_close, model_close.predict(X_test_close), squared=False)

print(f"开盘价预测 RMSE:{rmse_open:.4f}")
print(f"收盘价预测 RMSE:{rmse_close:.4f}")

# === 9. 可视化 ===
fig, axes = plt.subplots(2, 1, figsize=(16, 12), sharex=True)
axes[0].plot(df['date'], df['cn_open'], label='实际开盘价', marker='o', alpha=0.7)
axes[0].plot(df['date'], df['pred_cn_open'], label='预测开盘价', marker='x', linestyle='--', alpha=0.7)
axes[0].set_title('中国黄金ETF 开盘价预测')
axes[0].set_xlabel('日期')
axes[0].set_ylabel('价格')
axes[0].legend()
axes[0].grid(True)
axes[0].tick_params(axis='x', rotation=45)
axes[0].text(0.02, 0.95, f'开盘价预测 RMSE:{rmse_open:.4f}', transform=axes[0].transAxes,
             fontsize=12, color='red', bbox=dict(facecolor='white', alpha=0.8))

intercept_open = model_open.intercept_
coefs_open = model_open.coef_
feature_names_open = X_train_open.columns
# 构建公式字符串
formula_open = f"中国黄金开盘价预测公式: y = {intercept_open:.2f}"
for coef, name in zip(coefs_open, feature_names_open):
    formula_open += f" + {coef:.2f}×{name}"
print(formula_open)
# 添加公式到图上(比如放在左上角)
# plt.text(0.00, 1.05, formula_open, transform=plt.gca().transAxes,
#          fontsize=11, color='black', bbox=dict(facecolor='white', alpha=0.8))

axes[1].plot(df['date'], df['cn_close'], label='实际收盘价', marker='o', alpha=0.7)
axes[1].plot(df['date'], df['pred_cn_close'], label='预测收盘价', marker='x', linestyle='--', alpha=0.7)
axes[1].set_title('中国黄金ETF 收盘价预测')
axes[1].set_xlabel('日期')
axes[1].set_ylabel('价格')
axes[1].legend()
axes[1].grid(True)
axes[1].tick_params(axis='x', rotation=45)
axes[1].text(0.02, 0.95, f'收盘价预测 RMSE: {rmse_close:.4f}', transform=axes[1].transAxes,
             fontsize=12, color='blue', bbox=dict(facecolor='white', alpha=0.8))

intercept_close = model_close.intercept_
coefs_close = model_close.coef_
feature_names_close = X_train_close.columns
# 构建公式字符串
formula_close = f"中国黄金收盘价预测公式: y = {intercept_close:.2f}"
for coef, name in zip(coefs_close, feature_names_close):
    formula_close += f" + {coef:.2f}×{name}"
print(formula_close)
# === 自动布局 & 显示 ===
plt.tight_layout()
plt.show()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值