import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import models,layers,losses,metrics,callbacks
import seaborn as sns
import time
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
tf.random.set_seed(2345)
# 显示所有行(参数设置为None代表)
pd.set_option('display.max_columns', 20)
# 显示所有列
pd.set_option('display.max_rows', 20)
df_data = pd.read_csv('./5m-BTCUSDT行情数据 - 分类.csv').set_index('Datetime')
df_data.reset_index(drop=True)
df_data['return'] = (df_data['Close'] - df_data['Open'])/df_data['Open']
# 0平, 1涨, 2跌
df_data['direct'] = 0
direc = df_data['return'] < 0.0003
df_data['direct'].where(direc, 1, inplace=True)
direc = df_data['return'] > -0.0003
df_data['direct'].where(direc, 2, inplace=True)
df_data.describe().transpose()
df_label = df_data.pop('direct')
# 数据切割
n = len(df_data)
train_df = df_data[0:int(n*0.8)]
train_label = df_label[0:int(n*0.8)]
val_df = df_data[int(n*0.8):int(n*0.9)]
val_label = df_label[int(n*0.8):int(n*0.9)]
test_df = df_data[int(n*0.9):]
test_label = df_label[int(n*0.9):]
# 数据标准化
stat = train_df.describe()
train_mean = stat.loc['mean']
train_std = stat.loc['std']
train_df = (train_df - train_mean) / train_std
val_df = (val_df - train_mean) / train_std
test_df = (test_df - train_mean) / train_std
stat2 = train_df.describe()
df_std = (df_data - train_mean) / train_std
df_std = df_std.melt(var_name='Column', value_name='Normalized')
plt.figure(figsize=(12, 6))
ax = sns.violinplot(x='Column', y='Normalized', data=df_std)
_ = ax.set_xticklabels(df_data.keys(), rotation=90)
# 转为dataset
def batch_dataset(dataset):
dataset_batched = dataset.batch(window_size,drop_remainder=True)
return dataset_batched
def trans_to_dataset(df_x, df_y, window_size):
ds_data = tf.data.Dataset.from_tensor_slices(tf.constant(df_x.values[:-1],dtype =\
tf.float32)).window(window_size,shift=1).flat_map(batch_dataset) # the latest data has no label, so ignore it
ds_label = tf.data.Dataset.from_tensor_slices(tf.one_hot(tf.constant(df_y.values\
[window_size:]), depth=3))
# 在组合到一起之前,应该是可以进行分割!!但是这里先不这样用了
dataset = tf.data.Dataset.zip((ds_data,ds_label)).batch(128).cache()
return dataset
window_size = 20
train_db = trans_to_dataset(train_df, train_label,window_size)
train_db = train_db.shuffle(240620) # .repeat(2)
val_db = trans_to_dataset(val_df, val_label,window_size)
model = tf.keras.models.Sequential([
# tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1),
# input_shape=[None]),
tf.keras.layers.Dense(24),
tf.keras.layers.Dense(48),
tf.keras.layers.LSTM(120, return_sequences=True),
tf.keras.layers.LSTM(120, return_sequences=False), # 必须是False,如果希望只输出一个值的话
tf.keras.layers.Dense(24),
tf.keras.layers.Dense(3)
])
model.build(input_shape=(None, 20, 6))
model.summary()
file_name = time.strftime("%y%m%d%H%M%S", time.localtime())
logdir = "./Tensorborad_logs/" + file_name
# 检查点回调
checkpoint_path = "./data/tf_savedmodel_check_points_{}".format(file_name) +"/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(
checkpoint_path, verbose=1, save_weights_only=True,
# 每5个周期保存一次权重
period=5)
tb_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)
# 如果loss在100个epoch后没有提升,学习率减半。
lr_callback = tf.keras.callbacks.ReduceLROnPlateau(monitor="loss",factor = 0.5, patience = 40)
# 当loss在200个epoch后没有提升,则提前终止训练。
stop_callback = tf.keras.callbacks.EarlyStopping(monitor = "loss", patience= 80)
callbacks_list = [tb_callback,lr_callback,stop_callback,cp_callback]
optimizer = tf.keras.optimizers.Adam(lr=1e-3)
model.compile(loss=tf.losses.CategoricalCrossentropy(from_logits=True),
optimizer=optimizer,
metrics=['accuracy'])
history = model.fit(train_db, epochs=100, callbacks=callbacks_list,
validation_data=val_db, validation_freq=10)
model.evaluate(val_db)
# 保存模型
model.save('./data/tf_savedmodel_{}'.format(file_name), save_format="tf")
print('export saved model.')
#model_loaded = tf.keras.models.load_model('./data/tf_model_savedmodel',compile=False)
#optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
#model_loaded.compile(optimizer=optimizer,loss=MSPE(name = "MSPE"))
#model_loaded.predict(ds_train)
深度学习Tensorflow之预测Stock价格的实例
最新推荐文章于 2021-10-04 22:07:35 发布