[python lstm股票预测 tensorflow]

import pandas as pd
import numpy as np
import pickle
import os, time
import pickle
from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from keras.layers import LSTM
from tensorflow.keras.layers import Dropout, Dense, Activation, Input, Permute, Reshape
from tensorflow import keras
from tensorflow.keras import utils
from tensorflow.keras.callbacks import CSVLogger, ReduceLROnPlateau

from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import random
seed = 1024
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
df=pd.read_csv('sh.600000.csv') 
df.head()
def createSequence(data, windows, pre_timesteps=1):
    """生成序列数据及对应的标签值"""
    x = []
    y = []
    data = data[["open", "low", "close", "high"]].values
    for i in range(len(data)-windows-pre_timesteps):
        x.append(data[i:i+windows,:])
        y.append(data[i+windows:i+windows+pre_timesteps,:][0])
            
    x = np.array(x, dtype="float64")
    y = np.array(y, dtype="float64")
    
    return x, y
        
def create_pkl(window=10, pre_timesteps=1, train_split=0.9):
    train = pd.read_csv("sh.600000.csv", index_col=0)

    transfer = MinMaxScaler(feature_range=(0, 1))
    train[["open", "low", "close", "high"]]=transfer.fit_transform(train[["open", "low", "close", "high"]])
        
    trainval_seq, trainval_label = createSequence(train, window, pre_timesteps)
    train_num = int(len(trainval_seq)*train_split)
    
    train_seq = trainval_seq[:train_num]
    train_label= trainval_label[:train_num]
    val_seq=trainval_seq[train_num:]
    val_label=trainval_label[train_num:]
    
    # 保存数据
    with open(f'data/train_seq_{window}_{pre_timesteps}_{train_split}.pkl', 'wb') as f:
        pickle.dump(train_seq, f)
    with open(f'data/train_label_{window}_{pre_timesteps}_{train_split}.pkl', 'wb') as f:
        pickle.dump(train_label, f)
    with open(f'data/val_seq_{window}_{pre_timesteps}_{train_split}.pkl', 'wb') as f:
        pickle.dump(val_seq, f)
    with open(f'data/val_label_{window}_{pre_timesteps}_{train_split}.pkl', 'wb') as f:
        pickle.dump(val_label, f)  
        
    return train_seq, train_label, val_seq, val_label
def get_data(train_split=0.9, batchsize=32, window=10, pre_timesteps=1, overwrite=True):
    
    # 判断处理好的数据是否存在
    if not overwrite and os.path.exists(f'data/train_seq_{window}_{pre_timesteps}_{train_split}.pkl'):
        print(f'发现 window={window},pre_timesteps={pre_timesteps},train_split={train_split}的数据')
    else:
        print(f'正在创建window={window},pre_timesteps={pre_timesteps},train_split={train_split}的数据')
        _ = create_pkl(window=window, pre_timesteps=pre_timesteps, train_split=train_split)
    
    # 读取数据
    with open(f'data/train_seq_{window}_{pre_timesteps}_{train_split}.pkl', 'rb') as f:
        train_seq = pickle.load(f)
    with open(f'data/train_label_{window}_{pre_timesteps}_{train_split}.pkl', 'rb') as f:
        train_label = pickle.load(f)
    with open(f'data/val_seq_{window}_{pre_timesteps}_{train_split}.pkl', 'rb') as f:
        val_seq = pickle.load(f)
    with open(f'data/val_label_{window}_{pre_timesteps}_{train_split}.pkl', 'rb') as f:
        val_label = pickle.load(f)
        
    # 数量必须是batchsize的整数倍, 否则出错
    num_train = len(train_seq) - len(train_seq)%batchsize
    num_val   = len(val_seq) - len(val_seq)%batchsize
    
    return (train_seq[:num_train], train_label[:num_train],
            val_seq[:num_val], val_label[:num_val])
d = get_data(train_split=0.9, batchsize=32, window=10, pre_timesteps=1, overwrite=True)
[i.shape for i in d]
def lstm(window, featuresize, pre_t):
    model = Sequential()
    model.add(LSTM(40,  input_shape=(window, featuresize), 
                        return_sequences=True))
    
    model.add(LSTM(40, return_sequences=True))
    model.add(LSTM(40, return_sequences=False))
    # if pre_t==1:
    #     model.add(Reshape((-1,1,40)))
    model.add(Dense(featuresize))
    model.add(Activation("relu"))
    
    return model
epochs        = 200   # epochs
batchsize     = 32   # 
learning_rate = 0.01 # 学习率
featuresize   = 4    # 特征个数
train_split   = 0.9  # train比例

window        = 50 # 窗口大小
pre_timesteps = 1  # lstm 只能为1

# get data
train_seq, train_label, val_seq, val_label = \
        get_data(train_split, batchsize, window, pre_timesteps, False)
for i in [train_seq, train_label, val_seq, val_label]:
    print(i.shape)

# model
model = lstm(window, featuresize, pre_timesteps)
# optimizer
adam = tf.keras.optimizers.Adam(learning_rate=learning_rate)
# 模型编译,设置模型的相关参数:优化器,损失函数和评价指标
model.compile(loss='mse', optimizer=adam, metrics=['acc'])

# 训练模型时每个epoch的参数[epoch,acc,loss,val_acc,val_loss]保存到csv文件中
# loss:训练集损失值,accuracy:训练集准确率,val_loss:测试集损失值,val_accruacy:测试集准确率
log = CSVLogger(f"./lstm_log.csv", separator=",", append=False)

# 当度量停止改进时,降低学习率
reduce = ReduceLROnPlateau(monitor='val_acc', 
                           factor=0.5, 
                           patience=10, 
                           verbose=1,
                           mode='auto', 
                           min_delta=0.001, 
                           cooldown=0,
                           min_lr=0.001)

# train/val
use_time = time.time()
model.fit(train_seq,
          train_label, 
          validation_data=[val_seq, val_label],
          epochs=epochs,
          batch_size=batchsize, 
          verbose=1,
          use_multiprocessing=True,
          # workers=2,
          callbacks=[log, reduce])
# model.save('best.h5')

use_time = time.time()-use_time

loss, acc = model.evaluate(val_seq, val_label, verbose=1)
print('Loss : {}, Accuracy: {}'.format(loss, acc))
print(f'lstm use time :{use_time:.5f}')
loss, acc = model.evaluate(val_seq, val_label, verbose=1)
print('Loss : {}, Accuracy: {}'.format(loss, acc))
# 预测
pre_y = model.predict(val_seq)
y = val_label[:,-1]
pre = pre_y[:,-1]
mae = mean_absolute_error(y, pre)
mape = mean_absolute_percentage_error(y, pre)
mse = mean_squared_error(y, pre)
rmse = mse**0.5
r2 = r2_score(y, pre)
print(f'MAE :{mae}')
print(f'MSE :{mse}')
print(f'MAPE:{mape}')
import matplotlib.pyplot as plt
name = ["open", "low", "close", "high"]
plt.figure(dpi=100)
plt.plot(range(200,250), val_label[200:250,-1], label='label')
plt.plot(range(200,250), pre_y[100:250,-1], label='pred')
plt.title(name[-1])
plt.legend()
plt.show()
import matplotlib.pyplot as plt
name = ["open", "low", "close", "high"]
len_slice = slice(200,250)
for i in range(val_label.shape[1]):
    plt.figure(dpi=100)
    plt.plot(val_label[len_slice,i], label='label')
    plt.plot(pre_y[len_slice,i], label='pred')
    plt.title(name[i])
    plt.legend()
    plt.show()

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

放飞自我的Coder

你的鼓励很棒棒哦~

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值