HPO超参调优方法

 数据集来源 (kaggle的mobile数据集)

视频教程

import optuna
import pandas as pd
import numpy as np
import torch
from hyperopt.pyll import scope
from sklearn import ensemble, metrics, model_selection, preprocessing, pipeline, decomposition
from functools import partial
from skopt import space, gp_minimize
from hyperopt import hp, fmin, tpe, Trials
def optimize(params, param_names, x, y):
    params = dict(zip(param_names, params))
    model = ensemble.RandomForestClassifier(**params)
    kf = model_selection.StratifiedKFold(n_splits=5)
    accuracies = []
    for idx in kf.split(X=x, y=y):
        train_idx, test_idx = idx[0], idx[1]
        xtrain = x[train_idx]
        ytrain = y[train_idx]
        xtest = x[test_idx]
        ytest = y[test_idx]

        model.fit(xtrain, ytrain)
        preds = model.predict(xtest)
        fold_acc = metrics.accuracy_score(ytest, preds)
        accuracies.append(fold_acc)
    return -1*np.mean(accuracies)

def optimize_1(params, x, y):
    model = ensemble.RandomForestClassifier(**params)
    kf = model_selection.StratifiedKFold(n_splits=5)
    accuracies = []
    for idx in kf.split(X=x, y=y):
        train_idx, test_idx = idx[0], idx[1]
        xtrain = x[train_idx]
        ytrain = y[train_idx]
        xtest = x[test_idx]
        ytest = y[test_idx]

        model.fit(xtrain, ytrain)
        preds = model.predict(xtest)
        fold_acc = metrics.accuracy_score(ytest, preds)
        accuracies.append(fold_acc)
    return -1*np.mean(accuracies)
def optimize_2(trial, x, y):
    criterion = trial.suggest_categorical("criterion", ["gini", "entropy"])
    n_estimators = trial.suggest_int("n_estimators", 100, 1500)
    max_depth = trial.suggest_int("max_depth", 3, 15)
    max_features = trial.suggest_uniform("max_features", 0.1, 1.0)


    model = ensemble.RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        max_features=max_features,
        criterion=criterion,

    )
    kf = model_selection.StratifiedKFold(n_splits=5)
    accuracies = []
    for idx in kf.split(X=x, y=y):
        train_idx, test_idx = idx[0], idx[1]
        xtrain = x[train_idx]
        ytrain = y[train_idx]
        xtest = x[test_idx]
        ytest = y[test_idx]

        model.fit(xtrain, ytrain)
        preds = model.predict(xtest)
        fold_acc = metrics.accuracy_score(ytest, preds)
        accuracies.append(fold_acc)
    return -1*np.mean(accuracies)

if __name__ == "__main__":
    df = pd.read_csv("../Data/archive/train.csv")

    X = df.drop("price_range", axis=1).values
    y = df.price_range.values
    classifier = ensemble.RandomForestClassifier(n_jobs=4)
    '''
    网格搜索
    '''
    param_grid_1 = {
        "n_estimators": [100, 200, 300, 400],
        "max_depth": [1, 3],
        "criterion": ["gini", "entropy"],
    }
    '''
    随机搜索
    '''
    param_grid_2 = {
        "n_estimators": np.arange(100, 1500, 100),
        "max_depth": np.arange(1, 20),
        "criterion": ["gini", "entropy"],
    }

    model1 = model_selection.GridSearchCV(
        estimator=classifier,
        param_grid=param_grid_1,
        scoring="accuracy",
        verbose=10,
        n_jobs=4,
        cv=5,
    )
    model2 = model_selection.RandomizedSearchCV(
        estimator=classifier,
        param_distributions=param_grid_2,
        n_iter=10,
        scoring="accuracy",
        verbose=10,
        n_jobs=4,
        cv=5,
    )

    scl = preprocessing.StandardScaler()
    pca = decomposition.PCA()
    rf = ensemble.RandomForestClassifier(n_jobs=8)
    classifier1 = pipeline.Pipeline([("scaling", scl), ("pca", pca), ("rf", rf)])
    param_grid_3={
        "pca__n_components": np.arange(5, 10),
        "rf__n_estimators": np.arange(100, 1500, 100),
        "rf__max_depth": np.arange(1, 20),
        "rf__criterion": ["gini", "entropy"],
    }
    model3 = model_selection.RandomizedSearchCV(
        estimator=classifier1,
        param_distributions=param_grid_3,
        n_iter=10,
        scoring="accuracy",
        verbose=10,
        n_jobs=1,
        cv=5,
    )
    # model.fit(X, y)
    # print(model.best_score_)
    # print(model.best_estimator_)
    '''
    test space
    '''
    # param_space=[
    #     space.Integer(3, 15, name="max_depth"),
    #     space.Integer(100, 600, name="n_estimators"),
    #     space.Categorical(["gini", "entropy"], name="criterion"),
    #     space.Real(0.01, 1, prior="uniform", name="max_features")
    # ]
    # param_names=["max_depth", "n_estimators", "criterion", "max_features"]
    # optimization_function = partial(
    #     optimize,
    #     param_names=param_names,
    #     x=X,
    #     y=y
    # )
    # result = gp_minimize(
    #     optimization_function,
    #     dimensions = param_space,
    #     n_calls = 15,
    #     n_random_starts=10,
    #     verbose = 10,
    # )
    # print(dict(zip(param_names, result.x)))
    '''
    hyperopt
    '''
    # param_space1={
    #     "max_depth": scope.int(hp.quniform("max_depth", 3, 15, 1)),
    #     "n_estimators": scope.int(hp.quniform("n_estimators", 100, 600, 1)),
    #     "criterion": hp.choice("criterion", ["gini", "entropy"]),
    #     "max_features": hp.uniform("max_features", 0.01, 1),
    # }
    # optimization_function_1 = partial(
    #     optimize_1,
    #     x=X,
    #     y=y
    # )
    # trials = Trials()
    #
    # result = fmin(
    #     optimization_function_1,
    #     space=param_space1,
    #     algo=tpe.suggest,
    #     max_evals=15,
    #     trials=trials,
    # )
    # print(result)
    '''
    optuna
    '''
    optimization_function = partial(optimize_2, x=X, y=y)
    study = optuna.create_study(study_name='test', direction="minimize", storage='sqlite:///db.sqlite3')
    # study = optuna.create_study(direction="minimize")
    study.optimize(optimization_function, n_trials=15)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值