import os
import pandas as pd
import numpy as np
import datetime
import sys
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.utils import np_utils
import matplotlib.pyplot as plt
import traceback
import gc
train_data = []
train_target = []
x_train = []
y_train = []
x_test = []
y_test = []
all_stocks = pd.DataFrame(columns=['Start', 'Max', 'Min', 'End', 'Volume', 'Amount', 'Ratio', 'Label'])
d1 = datetime.datetime.now()
interval = len(sys.argv)
if (interval == 2):
interval = int(sys.argv[1])
else:
interval = 2
print("interval = ", interval)
with open(r'd:\stock\test.txt', 'w') as fh:
try:
for f, _, fileNames in os.walk(r"d:\zd_zsone\T0002\export"):
for fileName in fileNames:
fh.writelines("for start\n")
fh.flush()
if "SH#0" in fileName:
continue
print(f + "\\" + fileName)
stock = pd.read_csv(f + "\\" + fileName, sep='\t', skiprows=2, skipfooter=1, encoding='ansi',
names=['Date', 'Start', 'Max', 'Min', 'End', 'Volume', 'Amount'], engine='python')
fh.writelines([str(fileName), "\n"])
fh.flush()
if ("1996/12/25" in stock['Date']):
stock.drop(range(len(stock[stock['Date'] < '1996/12/26'])), inplace=True)
stock = stock.reset_index(drop=True)
fh.writelines("1\n")
fh.flush()
del stock['Date']
stock['Ratio'] = None
stock['Label'] = 0
if (len(stock) > interval):
fh.writelines("2\n")
fh.flush()
for i in range(len(stock) - interval):
stock.loc[i, 'Ratio'] = stock.loc[i + interval, 'End'] / stock.loc[i + 1, 'Start'] - 1
stock.loc[i, 'Label'] = stock.loc[i, 'Ratio'] * 100
if ((stock.loc[i, 'Label'] > 100) or (stock.loc[i, 'Label'] < -100)):
stock.drop(i, inplace=True)
fh.writelines('3\n')
fh.flush()
stock = stock.reset_index(drop=True)
stock['Label'] = stock['Label'].astype('int8')
stock.drop([len(stock) - i for i in range(1, interval + 1)], inplace=True)
stock['Start'] = (stock['Start'] - np.mean(stock['Start'])) / np.std(stock['Start'])
stock['Max'] = (stock['Max'] - np.mean(stock['Max'])) / np.std(stock['Max'])
stock['Min'] = (stock['Min'] - np.mean(stock['Min'])) / np.std(stock['Min'])
stock['End'] = (stock['End'] - np.mean(stock['End'])) / np.std(stock['End'])
stock['Volume'] = (stock['Volume'] - np.mean(stock['Volume'])) / np.std(stock['Volume'])
stock['Amount'] = (stock['Amount'] - np.mean(stock['Amount'])) / np.std(stock['Amount'])
fh.writelines('4\n')
fh.flush()
stock['Start'] = stock['Start'].astype('float32')
stock['Max'] = stock['Max'].astype('float32')
stock['Min'] = stock['Min'].astype('float32')
stock['End'] = stock['End'].astype('float32')
stock['Volume'] = stock['Volume'].astype('float32')
stock['Amount'] = stock['Amount'].astype('float32')
tmpstocks = stock.values
fh.writelines(["tmpstocks nums: " + str(len(tmpstocks)), "\n"])
fh.flush()
train_data.extend(tmpstocks[:, 0:6])
train_target.extend(tmpstocks[:, 7])
del stock
gc.collect()
#print("train_data nums: :", len(train_data))
fh.writelines(["train_data[0]: ", str(train_data[0]), "\n"])
fh.writelines(["train_data nums: ", str(len(train_data)), "\n"])
fh.writelines(["train_data size: ", str(sys.getsizeof(train_data)), "\n"])
fh.writelines(["train_target size 1: ", str(sys.getsizeof(train_target)), "\n"])
fh.flush()
min_train_target = min(train_target)
#print("min_train_target: ", min_train_target)
fh.writelines(["min_train_target: ", str(min_train_target), "\n"])
fh.flush()
max_train_target = max(train_target)
#print("max_train_target: ", max_train_target)
fh.writelines(["max_train_target: ", str(max_train_target), "\n"])
fh.flush()
NB_CLASSES = max_train_target - min_train_target + 1
#print("NB_CLASSES: ", NB_CLASSES)
fh.writelines(["NB_CLASSES: ", str(NB_CLASSES), "\n"])
fh.flush()
fh.writelines(["train_target[0] 1: ", str(train_target[0]), "\n"])
#fh.writelines(["train_target 1: ", str(train_target), "\n"])
fh.flush()
train_target = np_utils.to_categorical(train_target, NB_CLASSES)
fh.writelines(["train_target[0] 2: ", str(train_target[0]), "\n"])
#fh.writelines(["train_target 2: ", str(train_target), "\n"])
fh.writelines(["train_target size 2: ", str(sys.getsizeof(train_target)), "\n"])
fh.flush()
x_train, x_test, y_train, y_test = train_test_split(train_data, train_target, test_size=0.2, shuffle=True)
NB_EPOCH = 50
BATCH_SIZE = 512
VERBOSE = 1
OPTIMIZER = SGD(lr=0.001)
N_HIDDEN = 6
VALIDATION_SPLIT = 0.2
DROPOUT = 0.3
model = Sequential()
model.add(Dense(N_HIDDEN, input_shape=(6,)))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))
model.summary()
model.compile(loss='categorical_crossentropy', optimizer=OPTIMIZER, metrics=['accuracy'])
history = model.fit(np.array(x_train), y_train,
batch_size=BATCH_SIZE, epochs=NB_EPOCH,
verbose=VERBOSE, validation_split=VALIDATION_SPLIT)
score = model.evaluate(np.array(x_test), y_test, verbose=VERBOSE)
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
d2 = datetime.datetime.now()
except BaseException as e:
fh.writelines(["The erros is ", traceback.format_exc(), "\n"])
fh.flush()
finally:
os.system('shutdown -s -f -t 59')
使用Keras对股票进行分类
最新推荐文章于 2022-11-08 12:13:53 发布