参考论文《Forecasting directional movement
of Forex data using LSTM with technical
and macroeconomic indicators》
数据样式:其中label为预测标签,根据论文中的阈值法确定
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import torch
from torch import nn
from torch.nn import functional as F
import torch.optim as optim
# 读取宏观数据并划分数据集,20%用于预测
data_ma = pd.read_csv("data_macro.csv")
Xma_train = data_ma.iloc[:int(0.8*data_ma.shape[0]),1:-1]
Xma_test = data_ma.iloc[int(0.8*data_ma.shape[0]):,1:-1]
yma_train = data_ma.iloc[:int(0.8*data_ma.shape[0]),-1:].values
yma_test = data_ma.iloc[int(0.8*data_ma.shape[0]):,-1:].values
def scaler(X_train,X_test):
"""
数据归一化
"""
mm = MinMaxScaler()
mm.fit(X_train)
mmX_train = mm.transform(X_train)
mmX_test = mm.transform(X_test)
return mmX_train,mmX_test
Xma_train,Xma_test = scaler(Xma_train,Xma_test)
def get_seqdata(X,y,seq_len=13):
"""
获取LSTM的输入:根据序列长度获得训练数据集
"""
n = X.shape[0]
seq_data = []
seq_y = []
for i in range(n-seq_len+1):
seq_data.append(X[i:i+seq_len,:])
seq_y.append(y[i+seq_len-1])
return np.array(seq_data),np.array(seq_y)
def tensor_transform():
"""
转换数据格式
"""
# 转换为(batch,sqe_len,features)的形式
X_train,y_train = get_seqdata(Xma_train,yma_train)
X_test,y_test = get_seqdata(Xma_test,yma_test)
# 转换为torch的输入格式
X_train = torch.from_numpy(X_train).float()
X_test = torch.from_numpy(X_test).float()
y_train = torch.from_numpy(y_train).long()
y_test = torch.from_numpy(y_test).long()
return X_train,X_test,y_train,y_test
class LSTM(nn.Module):
"""
构造LSTM模型
"""
def __init__(self, input_size, hidden_size, num_layers, output_size):
super(LSTM, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_()
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_()
out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
out = self.fc(out[:, -1, :])
out = torch.argmax(F.softmax(out, 1), dim=1)
return out
# lstm 参数
input_size = 8
hidden_size = 16
num_layers = 2
output_size = 3
lr = 0.0001
num_epochs = 100
def model_train(X_train,y_train):
"""
模型训练
"""
loss_list = []
model = LSTM(input_size, hidden_size, num_layers, output_size)
criterion = nn.MultiLabelSoftMarginLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
hist = np.zeros(num_epochs)
lstm = []
for t in range(num_epochs):
y_train_pred = model(X_train)
y_train_pred = torch.tensor(y_train_pred, dtype=torch.float)
y_train_pred = torch.unsqueeze(y_train_pred, 1)
l = criterion(torch.tensor(y_train, dtype=torch.float), y_train_pred)
print("Epoch ", t, "MSE: ", l.item())
hist[t] = l.item()
optimizer.zero_grad()
l.requires_grad_(True)
l.backward()
optimizer.step()
return model
def profit_accuracy(df: pd.DataFrame):
"""
自定义准确率
@return: 返回准确率计算结果
"""
if "y_pred" not in df.columns:
print("输入的数据有问题")
return
print("可交易数量: ", (df.y_pred != 0).sum(), "\t总数:", df.y_pred.shape[0])
true_dec = df.query("y_true == 1 and y_pred == 1").shape[0]
true_inc = df.query("y_true == 2 and y_pred == 2").shape[0]
false_dec_noact = df.query("y_true == 0 and y_pred == 1").shape[0]
false_inc_noact = df.query("y_true == 0 and y_pred == 2").shape[0]
false_inc_dec = df.query("y_true == 1 and y_pred == 2").shape[0]
false_dec_inc = df.query("y_true == 2 and y_pred == 1").shape[0]
accuracy = (true_dec + true_inc) / (true_dec + true_inc + false_dec_inc + false_inc_dec + false_dec_noact + false_inc_noact)
print('accuracy:',accuracy)
return accuracy
if __name__ == '__main__':
# 模型训练
X_train, X_test, y_train, y_test = tensor_transform()
model = model_train(X_train,y_train)
# 模型预测并计算结果
result = pd.DataFrame(columns=['y_true', 'y_pred'])
result['y_true']=y_test.numpy().reshape(-1)
result['y_pred']=model(X_test)
accuracy = profit_accuracy(result)