python LSTM预测股票实战

最新推荐文章于 2024-08-13 15:48:31 发布

nanjc

最新推荐文章于 2024-08-13 15:48:31 发布

阅读量478

点赞数

文章标签： python lstm 深度学习

本文链接：https://blog.csdn.net/qq_49617724/article/details/129885294

版权

忽然看到一个关于机器学习预测股票的文章，尝试复现了一下代码并补充了一点注释，代码可以直接运行。

原文链接：没忍住，还是用机器学习预测了一下股票 - 知乎 (zhihu.com)

事先准备：python安装numpy，pandas，sklearn，pytorch，tensorflow，matplotlib，tushare库

python代码如下：

import tushare as ts
import numpy as np
import pandas as pd
import sklearn
import torch
import tensorflow.keras as keras
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
import tensorflow as tf
from sklearn.utils import shuffle
import matplotlib.pyplot as plt

ts_code = '000005.SZ'    #股票代码。自己找相应股票代码

start_date = '2006-01-01'
end_date = '2023-01-01'   #股票数据起止日期

pro=ts.pro_api(' ')     #‘’中填写相应接口数据。https://tushare.pro/user/token   前往tushare网站注册，复制个人主页-接口中代码
   #需要一定积分才能查询相应数据

df = pro.daily(
        ts_code=ts_code,
        start_date=start_date,
        end_date=end_date)            #导入数据

#使数据变成LSTM所需的3D样本
def transform_dataset(train_set, test_set, y_train, y_test, n_input, n_output):
    all_data = np.vstack((train_set, test_set))
    y_set = np.vstack((y_train, y_test))[:,0]
    X = np.empty((1, n_input, all_data.shape[1]))
    y = np.empty((1, n_output))
    for i in range(all_data.shape[0] - n_input - n_output):
        X_sample = all_data[i:i + n_input, :]
        y_sample = y_set[i + n_input:i + n_input + n_output]
        if i == 0:
            X[i] = X_sample
            y[i] = y_sample
        else:
            X = np.append(X, np.array([X_sample]), axis=0)
            y = np.append(y, np.array([y_sample.T]), axis=0)
    train_X = X[:train_set.shape[0] - n_input, :, :]
    train_y = y[:train_set.shape[0] - n_input, :]
    test_X = X[train_set.shape[0] -
               n_input:all_data.shape[0] -
               n_input -
               n_output, :, :]
    test_y = y[train_set.shape[0] -
               n_input:all_data.shape[0] -
               n_input -
               n_output, :]
    return train_X, train_y, test_X, test_y


use_cols = ['close', 'open', 'high', 'pre_close', 'vol', 'amount']

df['trade_date'] = pd.to_datetime(df['trade_date'])
atrain=df.truncate(after = '2017-01')
btest=df.truncate(before = '2017-02')
#定义时间，利用truncate函数根据时间拆分数据

train_raw=atrain[use_cols]
test_raw=btest[use_cols]

y_train_raw = train_raw[['close']]
y_test_raw = test_raw[['close']]
# scale the data
sc = MinMaxScaler(feature_range=(0, 1))
y_sc = MinMaxScaler(feature_range=(0, 1))
training_scaled = sc.fit_transform(train_raw.values)
test_scaled = sc.transform(test_raw.values)
y_train_scaled = y_sc.fit_transform(y_train_raw.values)
y_test_scaled = y_sc.transform(y_test_raw.values)
n_input = 63             #预测所需历史数据
n_output =1                #预测未来x天的数据     
train_X, train_y, test_X, test_y = transform_dataset(
    training_scaled, test_scaled,y_train_scaled,y_test_scaled, n_input, n_output)



#一个简单的神经网络，输入层为LSTM，包含20个节点，输出层为普通的Dense，损失函数采用mean_absolute_error
n_timesteps, n_features, n_outputs = train_X.shape[1], train_X.shape[2], train_y.shape[1]
# create a model
model = Sequential()
model.add(keras.layers.LSTM(10, input_shape=(n_timesteps, n_features),kernel_initializer='glorot_uniform',
                kernel_regularizer=keras.regularizers.l2(0.0)))
#model.add(LSTM(20, input_shape=(n_timesteps, n_features),kernel_initializer='glorot_uniform',
#                kernel_regularizer=regularizers.l2(0.0)))

model.add(keras.layers.Dense(n_outputs,kernel_initializer='glorot_uniform',
                kernel_regularizer=keras.regularizers.l2(0.0)))

model.compile(optimizer='adam', loss='mean_absolute_error')
print(model.summary())   #模型摘要


def dropout(x, level):    #定义dropout函数，如果过拟合可以使用
    if level < 0. or level >= 1: #level是概率值，必须在0~1之间
        raise ValueError('Dropout level must be in interval [0, 1[.')
    retain_prob = 1. - level

    # 我们通过binomial函数，生成与x一样的维数向量。binomial函数就像抛硬币一样，我们可以把每个神经元当做抛硬币一样
    # 硬币 正面的概率为p，n表示每个神经元试验的次数
    # 因为我们每个神经元只需要抛一次就可以了所以n=1，size参数是我们有多少个硬币。
    random_tensor = np.random.binomial(n=1, p=retain_prob, size=x.shape) #即将生成一个0、1分布的向量，0表示这个神经元被屏蔽，不工作了，也就是dropout了
    x *= random_tensor
    x /= retain_prob
    return x


#调用fit函数对训练集进行学习。由于时间序列具有很明显的趋势，因此有必要将样本打乱
train_X,train_y = shuffle(train_X,train_y,random_state=42)
# fit the RNN model
history = model.fit(
    train_X,
    train_y,
    epochs=300,
    batch_size=512,
    validation_split=0.3)
figure = plt.Figure()
plt.plot(history.history['loss'],
         'b',
         label='Training loss')
plt.plot(history.history['val_loss'],
         'r',
         label='Validation loss')
plt.legend(loc='upper right')
plt.xlabel('Epochs')
plt.show()



# Finalizing predictions
scaled_preds = model.predict(test_X)
test_preds = np.zeros_like(scaled_preds)
for i in range(scaled_preds.shape[1]):
    test_preds[:, i] = y_sc.inverse_transform(
        [scaled_preds[:, i]]).reshape(1, scaled_preds.shape[0])


test_preds_df = pd.DataFrame(
    test_preds, columns=[f'pred_{i+1}_step' for i in range(test_preds.shape[1])])
test_preds_df['true_value'] = test_raw.values[-len(test_preds):,0]
test_preds_df['naive_pred'] = test_raw.values[-len(test_preds) - 1:-1,0]

test_preds_df[['pred_1_step', 'true_value'
               ]].plot()
from sklearn.metrics import mean_absolute_error
err = mean_absolute_error(test_preds_df['pred_1_step'].values,test_preds_df['true_value'].values)
print(f'abs error for testset is {err}')
#预测值几乎相当于真实值随时间的便宜，基本没啥参考价值。over

没啥价值，不过可以试着玩玩。over