使用TensorFlow2.0实现一个简单的股票预测
1、环境
Python3.7.4
PyCharm 2019.1.3 (Professional Edition)
Windows 10
Tensorflow2.0+cuda10.0
2、数据集及预处理
- 数据集格式为一维数据,每行代表一天的数据,将数据集每十一天划分为一组,前十天为训练数据,第十一天作为标签数据,如果大于第十天数据,则为1,否则为0。即问题是一个二分类任务。根据前十天走势判断第十一天上涨还是下跌。
- 预处理过程将数据集每十一天作为一组数据,其中前十天是训练数据,第十一天是标签。数据集取csv文件中前119702行数据(刚好被11整除),119702/11即10882组数据,取百分之八十作为训练集(8705组),百分之二十作为测试集(2177组)。 则数据格式分为训练集样本[8705,10,1],训练集标签[8705,1],测试集样本[2177,10,1],测试集标签[2177,1]。具体过程见代码。
3、源代码
import os
import tensorflow as tf
import numpy as np
import csv
import copy
from tensorflow import keras
from tensorflow.keras import layers
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
def preprocessing():
data = csv.reader(open('股票数据.csv', encoding='ANSI'))
count = 0 #划分训练集和测试集,80%作为训练集
group = 11 #每十一个数据作为一组
train_x = []
train_y = []
test_x = []
test_y = []
ten = 0
group_list = []
line_list = []
for line in data:
if group == 1: #第十一天为标签数据
if float(line[0]) > ten: #第十一天大于第十天为正,即1
list = [1]
if count > 95755:
test_y.append(copy.deepcopy(list))
else:
train_y.append(copy.deepcopy(list))
else:
list = [0]
if count > 95755:
test_y.append(copy.deepcopy(list))
else:
train_y.append(copy.deepcopy(list))
group = 11
if count > 95755:
test_x.append(copy.deepcopy(group_list)) #使用深拷贝
else:
train_x.append(copy.deepcopy(group_list))
group_list.clear()
else:
if group == 2: #保存第十天的数据,用于和第十一天比较
ten = float(line[0])
line_list.append(float(line[0]))
#group_list.append(float(line[0]))
group_list.append(copy.deepcopy(line_list))
line_list.clear()
group = group - 1
count = count + 1
print(tf.constant(train_x)) #[8705,10,1]
print(tf.constant(train_y)) #[8705,1]
print(tf.constant(test_x)) #[2177,10,1]
print(tf.constant(test_y)) #[2177,1]
return tf.constant(train_x),tf.constant(train_y),tf.constant(test_x),tf.constant(test_y)
class MyRNN(keras.Model):
def __init__(self, units):
super(MyRNN, self).__init__()
self.rnn = keras.Sequential([
layers.LSTM(units, dropout=0.5, return_sequences=True),
layers.LSTM(units, dropout=0.5)
]
)
self.outlayer = layers.Dense(1)
def call(self, inputs, training=None):
x = inputs
x = self.rnn(x)
x = self.outlayer(x)
prob = tf.sigmoid(x)
return prob
def main():
train_x,train_y,test_x,test_y = preprocessing()
batchsz = 128
db_train = tf.data.Dataset.from_tensor_slices((train_x, train_y))
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True) # 可能不是batchsz的整数倍,将最后一部分丢弃掉
db_test = tf.data.Dataset.from_tensor_slices((test_x, test_y))
db_test = db_test.batch(batchsz, drop_remainder=True)
print(db_train)
print(db_test)
units = 64
epochs = 40
model = MyRNN(units)
model.compile(optimizer = keras.optimizers.Adam(0.001),
loss = tf.losses.BinaryCrossentropy(),
metrics=['accuracy'])
model.fit(db_train, epochs=epochs, validation_data=db_test)
model.evaluate(db_test)
if __name__ == '__main__':
main()