GridSearchCV/RandomizedSearchCV

这篇文章所用的数据是以基于tushare数据多维度时间序列lstm预测股票价格为基础经行测试开发。
整体架构与之前的一致。

GridSearchCV + keras.wrappers.scikit_learn 的使用

import pandas as pd
import numpy as np


def create_dataset(X,Y,window_size = 30,predict_size = 5):
    data_X, data_Y = [], []
    for i in range(len(X) - window_size - predict_size + 1):
        a = X[i:(i + window_size)]
        data_X.append(a)
        data_Y.append(Y[i + window_size :i + window_size + predict_size,0])
    return(np.array(data_X), np.array(data_Y))

from sklearn.preprocessing import MinMaxScaler
def minMaxScaler(df,feature_range=(0, 1)):
    scaler = MinMaxScaler(feature_range=feature_range)
    data_raw = pd.DataFrame(df).values.astype("float32")
    scaler.fit(data_raw)
    return scaler

from keras.models import Sequential
from keras.layers.recurrent import LSTM
from keras.layers.core import Dense, Activation, Dropout
from keras.callbacks import EarlyStopping
from keras import optimizers
from keras import backend

def createModel(input_shape,output_shape,optimizer="adam"):
    model = Sequential()
    model.add(LSTM(64, input_shape=input_shape, return_sequences=True))
    model.add(Dense(32))
    # model.add(Dropout(0.0002))
    # model.add(LSTM(64, return_sequences=True))
    # model.add(Dropout(0.0002))
    model.add(LSTM(32))
    model.add(Dense(output_shape))
    model.compile(loss="mean_squared_error", optimizer= optimizer, metrics=['accuracy'])
    model.summary()
    return model

if __name__ == '__main__':
    data = pd.read_csv('../data/data.csv')
    data = data.sort_values('trade_date').reset_index(drop=True)

    temp_data = data.copy()
    # #####################################数据预处理###############################
    scaler = minMaxScaler(temp_data.vol)
    temp_data.vol = \
    scaler.transform(pd.DataFrame(temp_data.vol).values.astype("float32"))

    scaler = minMaxScaler(temp_data.turnover_rate)
    temp_data.turnover_rate = \
    scaler.transform(pd.DataFrame(temp_data.turnover_rate).values.astype("float32"))

    scaler = minMaxScaler(temp_data.net_mf_vol,feature_range=(-1, 1))
    temp_data.net_mf_vol = \
    scaler.transform(pd.DataFrame(temp_data.net_mf_vol).values.astype("float32"))

    scaler = minMaxScaler(temp_data.close)
    temp_data.close = \
    scaler.transform(pd.DataFrame(temp_data.close).values.astype("float32"))
    # #############################################################################
    # #####################################构造数据#################################
    print(temp_data.isnull().any())
    train_data = temp_data[temp_data['trade_date'] <= 20191216]
    test_data = temp_data[temp_data['trade_date'] >= 20190301]



    train_X = pd.DataFrame(train_data[['close', 'vol', 'turnover_rate', 'net_mf_vol']]).values
    train_Y = pd.DataFrame(train_data['close']).values
    test_X = pd.DataFrame(test_data[['close', 'vol', 'turnover_rate', 'net_mf_vol']]).values
    test_Y = pd.DataFrame(test_data['close']).values


    window_size = 90
    predict_size = 5
    train_X,train_Y = create_dataset(train_X, train_Y, window_size=window_size, predict_size=predict_size)
    test_X,test_Y = create_dataset(test_X, test_Y, window_size=window_size, predict_size=predict_size)
    print(f'train_X.shape: {train_X.shape}   train_Y.shape: {train_Y.shape} \n'
          f'test_X.shape: {test_X.shape}     test_Y.shape: {test_Y.shape} ')

    # #############################################################################
    # #####################################构造模型#################################

    early_stopping = EarlyStopping('loss', patience=50)
    adam = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, decay=0.0, amsgrad=False)
    sgd = optimizers.SGD(lr=0.001, momentum=0.0, decay=1e-4, nesterov=False)
    input_shape = (train_X.shape[1], train_X.shape[2])
    output_shape = train_Y.shape[1]

    # 第1次测试 大约60倍时间
    from sklearn.model_selection import GridSearchCV
    from keras.wrappers import scikit_learn  # https://keras.io/zh/scikit-learn-api/


    model = scikit_learn.KerasRegressor(build_fn=createModel, nb_epoch=2)
    param_grid = dict(epochs=[150, 300, 450],
                      batch_size=[16, 32],
                      input_shape = [input_shape],
                      output_shape = [output_shape]
                      )
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
    grid_result=grid.fit(train_X, train_Y,verbose=2, callbacks=[early_stopping])
    backend.clear_session()

    print('Best: {} using {}'.format(grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, std, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, std, param))

    # 第2次测试 大约20倍时间
    model = scikit_learn.KerasRegressor(build_fn=createModel, nb_epoch=2)
    param_grid = dict(epochs=[150],
                      batch_size=[32],
                      optimizer=['sgd', 'rmsprop', 'adam', 'adagrad'],
                      input_shape = [input_shape],
                      output_shape = [output_shape]
                      )
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
    grid_result=grid.fit(train_X, train_Y,verbose=2, callbacks=[early_stopping])
    backend.clear_session()

    print('Best: {} using {}'.format(grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, std, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, std, param))

    # 第3次测试
    model = scikit_learn.KerasRegressor(build_fn=createModel, nb_epoch=2)
    param_grid = dict(epochs=[150, 300, 450],
                      batch_size=[16, 32],
                      optimizer=['sgd', 'rmsprop', 'adam', 'adagrad'],
                      input_shape = [input_shape],
                      output_shape = [output_shape]
                      )
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
    grid_result=grid.fit(train_X, train_Y,verbose=2, callbacks=[early_stopping])
    backend.clear_session()

    print('Best: {} using {}'.format(grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, std, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, std, param))

    # #############################################################################

RandomizedSearchCV + keras.wrappers.scikit_learn 的使用

import pandas as pd
import numpy as np


def create_dataset(X, Y, window_size=30, predict_size=5):
    data_X, data_Y = [], []
    for i in range(len(X) - window_size - predict_size + 1):
        a = X[i:(i + window_size)]
        data_X.append(a)
        data_Y.append(Y[i + window_size:i + window_size + predict_size, 0])
    return (np.array(data_X), np.array(data_Y))


from sklearn.preprocessing import MinMaxScaler


def minMaxScaler(df, feature_range=(0, 1)):
    scaler = MinMaxScaler(feature_range=feature_range)
    data_raw = pd.DataFrame(df).values.astype("float32")
    scaler.fit(data_raw)
    return scaler


from keras.models import Sequential
from keras.layers.recurrent import LSTM
from keras.layers.core import Dense, Activation, Dropout
from keras.callbacks import EarlyStopping
from keras import optimizers
from keras import backend


def createModel(input_shape, output_shape, optimizer="adam"):
    model = Sequential()
    model.add(LSTM(64, input_shape=input_shape, return_sequences=True))
    model.add(Dense(32))
    # model.add(Dropout(0.0002))
    # model.add(LSTM(64, return_sequences=True))
    # model.add(Dropout(0.0002))
    model.add(LSTM(32))
    model.add(Dense(output_shape))
    model.compile(loss="mean_squared_error", optimizer=optimizer, metrics=['accuracy'])
    model.summary()
    return model


if __name__ == '__main__':
    data = pd.read_csv('../data/data.csv')
    data = data.sort_values('trade_date').reset_index(drop=True)

    temp_data = data.copy()
    # #####################################数据预处理###############################
    scaler = minMaxScaler(temp_data.vol)
    temp_data.vol = \
        scaler.transform(pd.DataFrame(temp_data.vol).values.astype("float32"))

    scaler = minMaxScaler(temp_data.turnover_rate)
    temp_data.turnover_rate = \
        scaler.transform(pd.DataFrame(temp_data.turnover_rate).values.astype("float32"))

    scaler = minMaxScaler(temp_data.net_mf_vol, feature_range=(-1, 1))
    temp_data.net_mf_vol = \
        scaler.transform(pd.DataFrame(temp_data.net_mf_vol).values.astype("float32"))

    scaler = minMaxScaler(temp_data.close)
    temp_data.close = \
        scaler.transform(pd.DataFrame(temp_data.close).values.astype("float32"))
    # #############################################################################
    # #####################################构造数据#################################
    print(temp_data.isnull().any())
    train_data = temp_data[temp_data['trade_date'] <= 20191216]
    test_data = temp_data[temp_data['trade_date'] >= 20190301]

    train_X = pd.DataFrame(train_data[['close', 'vol', 'turnover_rate', 'net_mf_vol']]).values
    train_Y = pd.DataFrame(train_data['close']).values
    test_X = pd.DataFrame(test_data[['close', 'vol', 'turnover_rate', 'net_mf_vol']]).values
    test_Y = pd.DataFrame(test_data['close']).values

    window_size = 90
    predict_size = 5
    train_X, train_Y = create_dataset(train_X, train_Y, window_size=window_size, predict_size=predict_size)
    test_X, test_Y = create_dataset(test_X, test_Y, window_size=window_size, predict_size=predict_size)
    print(f'train_X.shape: {train_X.shape}   train_Y.shape: {train_Y.shape} \n'
          f'test_X.shape: {test_X.shape}     test_Y.shape: {test_Y.shape} ')

    # #############################################################################
    # #####################################构造模型#################################

    early_stopping = EarlyStopping('loss', patience=50)
    adam = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, decay=0.0, amsgrad=False)
    sgd = optimizers.SGD(lr=0.001, momentum=0.0, decay=1e-4, nesterov=False)
    input_shape = (train_X.shape[1], train_X.shape[2])
    output_shape = train_Y.shape[1]

    from datetime import datetime
    start_time = datetime.now()


    # 第1次测试
    from sklearn.model_selection import RandomizedSearchCV
    from keras.wrappers import scikit_learn  # https://keras.io/zh/scikit-learn-api/

    model = scikit_learn.KerasRegressor(build_fn=createModel, nb_epoch=2)
    distributions  = dict(epochs=[150, 300, 450],
                      batch_size=[16, 32],
                      optimizer=['sgd', 'rmsprop', 'adam', 'adagrad'],
                      input_shape=[input_shape],
                      output_shape=[output_shape]
                      )
    grid = RandomizedSearchCV(estimator=model, param_distributions=distributions, n_jobs=1)
    grid_result = grid.fit(train_X, train_Y, verbose=2, callbacks=[early_stopping])
    backend.clear_session()

    end_time = datetime.now()
    print((end_time - start_time).seconds) # 10920

    print('Best: {} using {}'.format(grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, std, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, std, param))
    # #############################################################################

测试效果

人工调参
在这里插入图片描述
在这里插入图片描述
自动调参
在这里插入图片描述
在这里插入图片描述

  • 2
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值