重要声明:该文章只是技术分享,如有投资盈亏,概不负责!切记投资需谨慎!
1. LSTM 模型简介
长短期记忆是一种时间循环神经网络,论文首次发表于1997年。由于独特的设计结构,LSTM适合于处理和预测时间序列中间隔和延迟非常长的重要事件。LSTM的表现通常比时间循环神经网络及隐马尔科夫模型(HMM)更好,比如用在不分段连续手写识别上。2009年,用LSTM构建的人工神经网络模型赢得过ICDAR手写识别比赛冠军。
LSTM是一种含有LSTM区块(blocks)或其他的一种类神经网络,文献或其他资料中LSTM区块可能被描述成智能网络单元,因为它可以记忆不定时间长度的数值,区块中有一个gate能够决定input是否重要到能被记住及能不能被输出output。
上图底下是四个S函数单元,最左边函数依情况可能成为区块的input,右边三个会经过gate决定input是否能传入区块,左边第二个为input gate,如果这里产出近似于零,将把这里的值挡住,不会进到下一层。左边第三个是forget gate,当这产生值近似于零,将把区块里记住的值忘掉。第四个也就是最右边的input为output gate,他可以决定在区块记忆中的input是否能输出 。
方程:
2. 代码
2.1 获取数据
我们使用雅虎财经的数据(容易获得),需要安装雅虎财经库:
pip install yahoo_fin
获得数据代码:
def shuffle_in_unison(a, b):
state = np.random.get_state()
np.random.shuffle(a)
np.random.set_state(state)
np.random.shuffle(b)
def load_data(ticker, n_steps=50, scale=True, shuffle=True, lookup_step=1, split_by_date=True,
test_size=0.2, feature_columns=['adjclose', 'volume', 'open', 'high', 'low']):
if isinstance(ticker, str):
df = si.get_data(ticker)
elif isinstance(ticker, pd.DataFrame):
df = ticker
else:
raise TypeError("ticker can be either a str or a `pd.DataFrame` instances")
result = {}
result['df'] = df.copy()
for col in feature_columns:
assert col in df.columns, f"'{col}' does not exist in the dataframe."
if "date" not in df.columns:
df["date"] = df.index
if scale:
column_scaler = {}
for column in feature_columns:
scaler = preprocessing.MinMaxScaler()
df[column] = scaler.fit_transform(np.expand_dims(df[column].values, axis=1))
column_scaler[column] = scaler
result["column_scaler"] = column_scaler
df['future'] = df['adjclose'].shift(-lookup_step)
last_sequence = np.array(df[feature_columns].tail(lookup_step))
df.dropna(inplace=True)
sequence_data = []
sequences = deque(maxlen=n_steps)
for entry, target in zip(df[feature_columns + ["date"]].values, df['future'].values):
sequences.append(entry)
if len(sequences) == n_steps:
sequence_data.append([np.array(sequences), target])
last_sequence = list([s[:len(feature_columns)] for s in sequences]) + list(last_sequence)
last_sequence = np.array(last_sequence).astype(np.float32)
result['last_sequence'] = last_sequence
X, y = [], []
for seq, target in sequence_data:
X.append(seq)
y.append(target)
X = np.array(X)
y = np.array(y)
if split_by_date:
train_samples = int((1 - test_size) * len(X))
result["X_train"] = X[:train_samples]
result["y_train"] = y[:train_samples]
result["X_test"] = X[train_samples:]
result["y_test"] = y[train_samples:]
if shuffle:
shuffle_in_unison(result["X_train"], result["y_train"])
shuffle_in_unison(result["X_test"], result["y_test"])
else:
result["X_train"], result["X_test"], result["y_train"], result["y_test"] =train_test_split(X, y, test_size=test_size, shuffle=shuffle)
dates = result["X_test"][:, -1, -1]
result["test_df"] = result["df"].loc[dates]
result["test_df"] = result["test_df"][~result["test_df"].index.duplicated(keep='first')]
result["X_train"] = result["X_train"][:, :, :len(feature_columns)].astype(np.float32)
result["X_test"] = result["X_test"][:, :, :len(feature_columns)].astype(np.float32)
return result
2.2 创建模型
def create_model(sequence_length, n_features, units=256, cell=LSTM, n_layers=2, dropout=0.3,
loss="mean_absolute_error", optimizer="rmsprop", bidirectional=False):
model = Sequential()
for i in range(n_layers):
if i == 0:
if bidirectional:
model.add(Bidirectional(cell(units, return_sequences=True), batch_input_shape=(None, sequence_length, n_features)))
else:
model.add(cell(units, return_sequences=True, batch_input_shape=(None, sequence_length, n_features)))
elif i == n_layers - 1:
if bidirectional:
model.add(Bidirectional(cell(units, return_sequences=False)))
else:
model.add(cell(units, return_sequences=False))
else:
if bidirectional:
model.add(Bidirectional(cell(units, return_sequences=True)))
else:
model.add(cell(units, return_sequences=True))
model.add(Dropout(dropout))
model.add(Dense(1, activation="linear"))
model.compile(loss=loss, metrics=["mean_absolute_error"], optimizer=optimizer)
return model
2.3 模型训练和画图分析
画图分析的代码:
def plot_graph(test_df, N_STEPS, LOOKUP_STEP):
plt.plot(test_df[f'true_adjclose_{LOOKUP_STEP}'], c='b')
plt.plot(test_df[f'adjclose_{LOOKUP_STEP}'], c='r')
plt.xlabel("Days")
plt.ylabel("Price")
plt.legend(["Actual Price", "Predicted Price"])
plt.show()
def get_final_df(model, data, SCALE, LOOKUP_STEP):
buy_profit = lambda current, pred_future, true_future: true_future - current if pred_future > current else 0
sell_profit = lambda current, pred_future, true_future: current - true_future if pred_future < current else 0
X_test = data["X_test"]
y_test = data["y_test"]
y_pred = model.predict(X_test)
if SCALE:
y_test = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(np.expand_dims(y_test, axis=0)))
y_pred = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(y_pred))
test_df = data["test_df"]
test_df[f"adjclose_{LOOKUP_STEP}"] = y_pred
test_df[f"true_adjclose_{LOOKUP_STEP}"] = y_test
test_df.sort_index(inplace=True)
final_df = test_df
final_df["buy_profit"] = list(map(buy_profit, final_df["adjclose"], final_df[f"adjclose_{LOOKUP_STEP}"], final_df[f"true_adjclose_{LOOKUP_STEP}"]))
final_df["sell_profit"] = list(map(sell_profit, final_df["adjclose"], final_df[f"adjclose_{LOOKUP_STEP}"], final_df[f"true_adjclose_{LOOKUP_STEP}"]))
return final_df
def predict(model, data, SCALE, N_STEPS):
last_sequence = data["last_sequence"][-N_STEPS:]
last_sequence = np.expand_dims(last_sequence, axis=0)
prediction = model.predict(last_sequence)
if SCALE:
predicted_price = data["column_scaler"]["adjclose"].inverse_transform(prediction)[0][0]
else:
predicted_price = prediction[0][0]
return predicted_price
训练模型参数设置代码:
N_STEPS = 50
LOOKUP_STEP = 1
SCALE = True
scale_str = f"sc-{int(SCALE)}"
SHUFFLE = True
shuffle_str = f"sh-{int(SHUFFLE)}"
SPLIT_BY_DATE = False
split_by_date_str = f"sbd-{int(SPLIT_BY_DATE)}"
TEST_SIZE = 0.2
FEATURE_COLUMNS = ["adjclose", "volume", "open", "high", "low"]
date_now = time.strftime("2021-11-15")
N_LAYERS = 2
CELL = LSTM
UNITS = 256
DROPOUT = 0.4
BIDIRECTIONAL = False
LOSS = "huber_loss"
OPTIMIZER = "adam"
BATCH_SIZE = 64
EPOCHS = 200
ticker = "IBM"
ticker_data_filename = os.path.join("data", f"{ticker}_{date_now}.csv")
model_name = f"{date_now}_{ticker}-{shuffle_str}-{scale_str}-{split_by_date_str}-\
{LOSS}-{OPTIMIZER}-{CELL.__name__}-seq-{N_STEPS}-step-{LOOKUP_STEP}-layers-{N_LAYERS}-units-{UNITS}"
if BIDIRECTIONAL:
model_name += "-b"
data = load_data(ticker, N_STEPS, scale=SCALE, split_by_date=SPLIT_BY_DATE, shuffle=SHUFFLE, lookup_step=LOOKUP_STEP, test_size=TEST_SIZE, feature_columns=FEATURE_COLUMNS)
data["df"].to_csv(ticker_data_filename)
model = create_model(N_STEPS, len(FEATURE_COLUMNS), loss=LOSS, units=UNITS, cell=CELL, n_layers=N_LAYERS, dropout=DROPOUT, optimizer=OPTIMIZER, bidirectional=BIDIRECTIONAL)
checkpointer = ModelCheckpoint(os.path.join("results", model_name + ".h5"), save_weights_only=True, save_best_only=True, verbose=1)
tensorboard = TensorBoard(log_dir=os.path.join("logs", model_name))
history = model.fit(data["X_train"], data["y_train"], batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(data["X_test"], data["y_test"]), callbacks=[checkpointer, tensorboard], verbose=1)
model_path = os.path.join("results", model_name) + ".h5"
model.load_weights(model_path)
loss, mae = model.evaluate(data["X_test"], data["y_test"], verbose=0)
if SCALE:
mean_absolute_error = data["column_scaler"]["adjclose"].inverse_transform([[mae]])[0][0]
else:
mean_absolute_error = mae
final_df = get_final_df(model, data, SCALE, LOOKUP_STEP)
future_price = predict(model, data, SCALE, N_STEPS)
accuracy_score = (len(final_df[final_df['sell_profit'] > 0]) + len(final_df[final_df['buy_profit'] > 0])) / len(final_df)
total_buy_profit = final_df["buy_profit"].sum()
total_sell_profit = final_df["sell_profit"].sum()
total_profit = total_buy_profit + total_sell_profit
profit_per_trade = total_profit / len(final_df)
print(f"Future price after {LOOKUP_STEP} days is {future_price:.2f}$")
print(f"{LOSS} loss:", loss)
print("Mean Absolute Error:", mean_absolute_error)
print("Accuracy score:", accuracy_score)
print("Total buy profit:", total_buy_profit)
print("Total sell profit:", total_sell_profit)
print("Total profit:", total_profit)
print("Profit per trade:", profit_per_trade)
plot_graph(final_df, N_STEPS, LOOKUP_STEP)
print(final_df.tail(10))
csv_results_folder = "csv-results"
if not os.path.isdir(csv_results_folder):
os.mkdir(csv_results_folder)
csv_filename = os.path.join(csv_results_folder, model_name + ".csv")
final_df.to_csv(csv_filename)
3. 结果测试
我们使用阿里巴巴的数据进行测试:
这里有完整的代码,可以下载使用:
https://download.csdn.net/download/qq_28531269/44879548