使用Keras对股票进行分类

import os
import pandas as pd
import numpy as np
import datetime
import sys
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.utils import np_utils
import matplotlib.pyplot as plt
import traceback
import gc

train_data = []
train_target = []
x_train = []
y_train = []
x_test = []
y_test = []
all_stocks = pd.DataFrame(columns=['Start', 'Max', 'Min', 'End', 'Volume', 'Amount', 'Ratio', 'Label'])
d1 = datetime.datetime.now()

interval = len(sys.argv)
if (interval == 2):
    interval = int(sys.argv[1])
else:
    interval = 2
print("interval = ", interval)

with open(r'd:\stock\test.txt', 'w') as fh:
    try:
        for f, _, fileNames in os.walk(r"d:\zd_zsone\T0002\export"):
            for fileName in fileNames:
                fh.writelines("for start\n")
                fh.flush()
                if "SH#0" in fileName:
                    continue
                print(f + "\\" + fileName)
                stock = pd.read_csv(f + "\\" + fileName, sep='\t', skiprows=2, skipfooter=1, encoding='ansi',
                                    names=['Date', 'Start', 'Max', 'Min', 'End', 'Volume', 'Amount'], engine='python')
                fh.writelines([str(fileName), "\n"])
                fh.flush()
                if ("1996/12/25" in stock['Date']):
                    stock.drop(range(len(stock[stock['Date'] < '1996/12/26'])), inplace=True)
                    stock = stock.reset_index(drop=True)
                fh.writelines("1\n")
                fh.flush()
                del stock['Date']
                stock['Ratio'] = None
                stock['Label'] = 0
                if (len(stock) > interval):
                    fh.writelines("2\n")
                    fh.flush()
                    for i in range(len(stock) - interval):
                        stock.loc[i, 'Ratio'] = stock.loc[i + interval, 'End'] / stock.loc[i + 1, 'Start'] - 1
                        stock.loc[i, 'Label'] = stock.loc[i, 'Ratio'] * 100
                        if ((stock.loc[i, 'Label'] > 100) or (stock.loc[i, 'Label'] < -100)):
                            stock.drop(i, inplace=True)
                    fh.writelines('3\n')
                    fh.flush()
                    stock = stock.reset_index(drop=True)
                    stock['Label'] = stock['Label'].astype('int8')
                    stock.drop([len(stock) - i for i in range(1, interval + 1)], inplace=True)
                    stock['Start'] = (stock['Start'] - np.mean(stock['Start'])) / np.std(stock['Start'])
                    stock['Max'] = (stock['Max'] - np.mean(stock['Max'])) / np.std(stock['Max'])
                    stock['Min'] = (stock['Min'] - np.mean(stock['Min'])) / np.std(stock['Min'])
                    stock['End'] = (stock['End'] - np.mean(stock['End'])) / np.std(stock['End'])
                    stock['Volume'] = (stock['Volume'] - np.mean(stock['Volume'])) / np.std(stock['Volume'])
                    stock['Amount'] = (stock['Amount'] - np.mean(stock['Amount'])) / np.std(stock['Amount'])
                    fh.writelines('4\n')
                    fh.flush()
                    stock['Start'] = stock['Start'].astype('float32')
                    stock['Max'] = stock['Max'].astype('float32')
                    stock['Min'] = stock['Min'].astype('float32')
                    stock['End'] = stock['End'].astype('float32')
                    stock['Volume'] = stock['Volume'].astype('float32')
                    stock['Amount'] = stock['Amount'].astype('float32')
                    tmpstocks = stock.values
                    fh.writelines(["tmpstocks nums: " + str(len(tmpstocks)), "\n"])
                    fh.flush()
                    train_data.extend(tmpstocks[:, 0:6])
                    train_target.extend(tmpstocks[:, 7])
                    del stock
                    gc.collect()
        #print("train_data nums: :", len(train_data))
        fh.writelines(["train_data[0]: ", str(train_data[0]), "\n"])
        fh.writelines(["train_data nums: ", str(len(train_data)), "\n"])
        fh.writelines(["train_data size: ", str(sys.getsizeof(train_data)), "\n"])
        fh.writelines(["train_target size 1: ", str(sys.getsizeof(train_target)), "\n"])
        fh.flush()

        min_train_target = min(train_target)
        #print("min_train_target: ", min_train_target)
        fh.writelines(["min_train_target: ", str(min_train_target), "\n"])
        fh.flush()

        max_train_target = max(train_target)
        #print("max_train_target: ", max_train_target)
        fh.writelines(["max_train_target: ", str(max_train_target), "\n"])
        fh.flush()

        NB_CLASSES = max_train_target - min_train_target + 1
        #print("NB_CLASSES: ", NB_CLASSES)
        fh.writelines(["NB_CLASSES: ", str(NB_CLASSES), "\n"])
        fh.flush()
        fh.writelines(["train_target[0] 1: ", str(train_target[0]), "\n"])
        #fh.writelines(["train_target 1: ", str(train_target), "\n"])
        fh.flush()
        train_target = np_utils.to_categorical(train_target, NB_CLASSES)
        fh.writelines(["train_target[0] 2: ", str(train_target[0]), "\n"])
        #fh.writelines(["train_target 2: ", str(train_target), "\n"])
        fh.writelines(["train_target size 2: ", str(sys.getsizeof(train_target)), "\n"])
        fh.flush()
        x_train, x_test, y_train, y_test = train_test_split(train_data, train_target, test_size=0.2, shuffle=True)
        NB_EPOCH = 50
        BATCH_SIZE = 512
        VERBOSE = 1
        OPTIMIZER = SGD(lr=0.001)
        N_HIDDEN = 6
        VALIDATION_SPLIT = 0.2  
        DROPOUT = 0.3
        model = Sequential()
        model.add(Dense(N_HIDDEN, input_shape=(6,)))
        model.add(Activation('relu'))
        model.add(Dropout(DROPOUT))
        model.add(Dense(N_HIDDEN))
        model.add(Activation('relu'))
        model.add(Dropout(DROPOUT))
        model.add(Dense(NB_CLASSES))
        model.add(Activation('softmax'))
        model.summary()
        model.compile(loss='categorical_crossentropy', optimizer=OPTIMIZER, metrics=['accuracy'])
        history = model.fit(np.array(x_train), y_train,
                            batch_size=BATCH_SIZE, epochs=NB_EPOCH,
                            verbose=VERBOSE, validation_split=VALIDATION_SPLIT)
        score = model.evaluate(np.array(x_test), y_test, verbose=VERBOSE)
        plt.plot(history.history['acc'])
        plt.plot(history.history['val_acc'])
        plt.title('model accuracy')
        plt.ylabel('accuracy')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')
        plt.show()
        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')
        plt.show()

        d2 = datetime.datetime.now()
    except BaseException as e:
        fh.writelines(["The erros is ", traceback.format_exc(), "\n"])
        fh.flush()
    finally:
        os.system('shutdown -s -f -t 59')


  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值