本文介绍了一种基于全连接神经网络(Fully Connected Network, FCN)的股票K线数据预测策略。具体步骤包括数据预处理、模型构建与训练、以及模型的验证和预测。
数据预处理
数据预处理是时间序列预测中的关键步骤。我们从历史K线数据中提取特征并生成标签。以下是主要的预处理步骤:
数据加载与格式转换
首先加载历史K线数据,并将其转换为浮点型数组。
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
# 数据路径和参数
data_path = "dataset/BTCUSDT-2018_09_01-2024_06_01-15.npy" # 替换为实际的文件路径
data_name = os.path.basename(data_path).split('.')[0]
data_resolution = int(data_name.split("-")[-1])
# 加载数据并转换为浮点型
data = np.load(data_path)[:, :4] # [时间, (开盘价, 最高价, 最低价, 收盘价)]
data = np.float32(data)
特征与标签生成
使用滑动窗口方法生成特征和标签。特征窗口大小为96(即过去一天的数据),标签窗口大小为16(即未来4小时的数据)。同时,我们使用快速傅里叶变换(FFT)提取频域特征。
def create_windows_with_labels(data, window_size, step_size, label_size, use_freq=True, increase_rate=0.005):
"""
使用滑动窗口生成特征和标签。
参数:
data (numpy array): 历史K线数据
window_size (int): 特征窗口大小
step_size (int): 滑动步长
label_size (int): 标签窗口大小
use_freq (bool): 是否使用频域特征
increase_rate (float): 涨幅阈值
返回:
features (numpy array): 特征数据
labels (numpy array): 标签数据
"""
num_windows = (len(data) - window_size - label_size) // step_size + 1
features = np.zeros((num_windows, window_size), dtype='float32')
labels = np.zeros((num_windows, 1), dtype='float32')
for i in range(num_windows):
left_start_idx = i * step_size
left_end_idx = left_start_idx + window_size
right_start_idx = left_end_idx
right_end_idx = right_start_idx + label_size
# 提取收盘价并进行归一化处理
close_prices = data[left_start_idx:left_end_idx, 3]
latest_close_price = close_prices[-1]
normalized_prices = (close_prices - np.mean(close_prices))
# 提取频域特征
if use_freq:
normalized_prices = np.abs(np.fft.fft(normalized_prices))
features[i] = normalized_prices
# 生成标签
right_windows = data[right_start_idx:right_end_idx]
if right_windows[:, 1].max() >= (1 + increase_rate) * latest_close_price:
labels[i][0] = 1
return features, labels
# 定义窗口大小、滑动步长和标签窗口大小
window_size = 96 # 1天
step_size = 4 # 1小时
label_size = 16 # 4小时
# 生成特征和标签
features, labels = create_windows_with_labels(data, window_size, step_size, label_size, use_freq=True, increase_rate=0.01)
print("label_ratio:", sum(labels[:,0])/len(labels))
# 打乱数据
indices = np.arange(len(features))
np.random.shuffle(indices)
features = features[indices]
labels = labels[indices]
# 转换为 PyTorch 张量
features_tensor = torch.tensor(features, dtype=torch.float32) # [N, T]
labels_tensor = torch.tensor(labels, dtype=torch.float32) # [N, 1]
print(features.shape, labels.shape)
数据集划分
将数据划分为训练集和验证集,训练集占比为90%。
# 创建数据集和数据加载器
dataset = TensorDataset(features_tensor, labels_tensor)
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
模型构建
我们构建了一个全连接神经网络模型,包含三层全连接层,每层后面接一个ReLU激活函数和Batch Normalization层。
# 定义全连接网络模型
class FullyConnectedNetwork(nn.Module):
def __init__(self, input_dim):
super(FullyConnectedNetwork, self).__init__()
self.model = nn.Sequential(
nn.Linear(input_dim, 1024),
nn.ReLU(),
nn.BatchNorm1d(1024),
nn.Linear(1024, 1024),
nn.ReLU(),
nn.BatchNorm1d(1024),
nn.Linear(1024, 1),
nn.BatchNorm1d(1),
nn.Sigmoid()
)
def forward(self, x):
return self.model(x)
模型训练
使用二元交叉熵损失函数(BCELoss)和Adam优化器对模型进行训练。训练过程中记录每个epoch的训练损失和验证损失,同时计算验证集的准确率。
# 初始化模型、损失函数和优化器
input_dim = features_tensor.shape[1]
model = FullyConnectedNetwork(input_dim=input_dim)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 训练模型
num_epochs = 100
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
for i, (inputs, targets) in enumerate(train_loader):
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)
epoch_loss = running_loss / train_size
print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {epoch_loss:.4f}")
model.eval()
val_loss = 0.0
correct_predictions = 0
total_predictions = 0
with torch.no_grad():
for inputs, targets in val_loader:
outputs = model(inputs)
loss = criterion(outputs, targets)
val_loss += loss.item() * inputs.size(0)
predicted = (outputs > 0.5).float()
correct_predictions += (predicted == targets).sum().item()
total_predictions += targets.size(0)
val_loss /= val_size
accuracy = correct_predictions / total_predictions
print(f"Epoch [{epoch+1}/{num_epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.4f}")
模型预测
对训练集和验证集进行预测,并输出一些预测结果以供分析。
# 进行预测
def predict(model, data_loader):
model.eval()
predictions = []
with torch.no_grad():
for inputs, _ in data_loader:
outputs = model(inputs)
predictions.extend(outputs.numpy())
return np.array(predictions)
train_predictions = predict(model, train_loader)
val_predictions = predict(model, val_loader)
# 打印一些预测结果
print("Train Predictions:", train_predictions[:10])
print("Validation Predictions:", val_predictions[:10])
总结
本文介绍了一种基于全连接神经网络的股票K线数据预测策略,并使用历史K线数据进行了回测。通过特征工程、模型构建与训练,我们可以实现对未来价格走势的预测。未来的工作可以包括优化模型结构、调整超参数以及引入更多的特征以提高预测精度。
希望本文对您有所帮助,如果有任何问题或建议,欢迎在评论区留言讨论!