python - 生成 optuna 网格的函数提供了 sklearn 管道

Question

我将 sklearn 和 optuna 一起用于 HPO。我想创建一个自定义函数，它将 sklearn 管道作为输入并返回 optuna 特定的网格。返回 sklearn 特定的参数网格（即字典）似乎更直接（duh）；这就是我到目前为止所得到的：

def grid_from_estimator(estimator, type = 'sklearn'):

    estimator_name = estimator.named_steps['estimator'].__class__.__name__

    if type == 'sklearn':
        if estimator_name=='LogisticRegression':
            params =  {
                'estimator__penalty': ['l1','elasticnet'],
                'estimator__C': np.logspace(-4, 4, 20)
            }
        elif estimator_name=='LGBMClassifier':
            params = {
                'estimator__n_estimators': np.arange(100, 1000, 200),
                'estimator__boosting_type':['gbdt','dart'],
                'estimator__max_depth': np.arange(6, 12),
                'estimator__num_leaves': np.arange(30, 150,5),
                'estimator__learning_rate': [1e-2/2 , 1e-2, 1e-1/2, 1e-1, 0.5, 1],
                'estimator__min_child_samples': np.arange(20, 100, 5),
                'estimator__subsample': np.arange(0.65, 1, 0.05),
                'estimator__colsample_bytree': np.arange(0.4, 0.75, 0.05),
                'estimator__reg_alpha': [0, 1e-1, 1, 2, 5, 7, 10, 50, 100],
                'estimator__reg_lambda': [0, 1e-1, 1, 5, 10, 20, 50, 100],
                'estimator__iterations': np.arange(100, 800, 100),
                'estimator__objective': 'binary'
            }
    elif type == 'optuna':
        if estimator_name == 'LogisticRegression':
            params = {
                'estimator__penalty': trial.suggest_categorical('penalty', ['l1', 'elasticnet']),
                'estimator__C': trial.suggest.suggest_loguniform('c', -4, 4)
            }
        elif estimator_name == 'LGBMClassifier':
            params = {
                'estimator__n_estimators': trial.suggest_int('n_estimators', 100, 1000),
                'estimator__boosting_type': trial.suggest_categorical('boosting_type', ['gbdt', 'dart']),
                'estimator__max_depth': trial.suggest_int('max_depth', 6, 12),
                'estimator__num_leaves': trial.suggest_int('num_leaves', 30, 150, 5),
                'estimator__learning_rate': trial.suggest_float('learning_rate', 1e-4, 1),
                'estimator__min_child_samples': trial.suggest_int('min_child_samples', 20, 100),
                'estimator__subsample': trial.suggest_float('subsample', 0.5, 1),
                'estimator__colsample_bytree': trial.suggest_float('colsample_bytree', 0.4, 0.75),
                'estimator__reg_alpha': trial.suggest_float('reg_alpha', 1e-2, 10),
                'estimator__reg_lambda': trial.suggest_float('reg_lambda', 1e-2, 10)
            }

    return params

“ trial.suggest_ ...”部分不断“抱怨”并返回错误；虽然我理解原因，但我看不到任何解决方法。这甚至可能吗？有任何想法吗？感谢您的支持！

score 1 · Accepted Answer

我认为，这应该有效，

def grid_from_estimator(estimator, trial, type = 'sklearn'):
    pass

def your_objective_function(trial):
    params = grid_from_estimator('LogisticRegression', trial, 'optuna')
    #Rest of the code here.


def tune_model():
    study = optuna.create_study()
    study.optimize(your_objective_function, n_trials=20)

tune_model()

score 0 · Accepted Answer

使用 optuna 的示例方法ask and tell interface。

代码

import optuna
import numpy as np


def optuna_objective(estimator_name, params):
    if estimator_name == 'LogisticRegression':
        x = params['x']
        y = params['y']
        return (x - 2) ** 2 + y

    if estimator_name == 'LGBMClassifier':
        # estimator__n_estimators = params['estimator__n_estimators']
        # return accuracy
        pass

    return None


def grid_from_estimator(estimator_name, type_='sklearn', study=None):
    params, trial = None, None

    if type_ == 'sklearn':
        if estimator_name == 'LogisticRegression':
            params =  {
                'estimator__penalty': ['l1','elasticnet'],
                'estimator__C': np.logspace(-4, 4, 20)
            }
        elif estimator_name == 'LGBMClassifier':
            params = {
                'estimator__n_estimators': np.arange(100, 1000, 200),
                'estimator__boosting_type':['gbdt','dart'],
                'estimator__max_depth': np.arange(6, 12),
                'estimator__num_leaves': np.arange(30, 150,5),
                'estimator__learning_rate': [1e-2/2 , 1e-2, 1e-1/2, 1e-1, 0.5, 1],
                'estimator__min_child_samples': np.arange(20, 100, 5),
                'estimator__subsample': np.arange(0.65, 1, 0.05),
                'estimator__colsample_bytree': np.arange(0.4, 0.75, 0.05),
                'estimator__reg_alpha': [0, 1e-1, 1, 2, 5, 7, 10, 50, 100],
                'estimator__reg_lambda': [0, 1e-1, 1, 5, 10, 20, 50, 100],
                'estimator__iterations': np.arange(100, 800, 100),
                'estimator__objective': 'binary'
            }

    elif type_ == 'optuna':
        trial = study.ask()
        if estimator_name == 'LogisticRegression':
            params = {
                'x': trial.suggest_float('x', -10, 10),
                'y': trial.suggest_float('y', -10, 10)
            }
            # params = {
                # 'estimator__penalty': trial.suggest_categorical('estimator__penalty', ['l1', 'elasticnet']),
                # 'estimator__C': trial.suggest_float('estimator__C', -4, 4)
            # }
        elif estimator_name == 'LGBMClassifier':
            params = {
                'estimator__n_estimators': trial.suggest_int('estimator__n_estimators', 100, 1000),
                'estimator__boosting_type': trial.suggest_categorical('estimator__boosting_type', ['gbdt', 'dart']),
                'estimator__max_depth': trial.suggest_int('estimator__max_depth', 6, 12),
                'estimator__num_leaves': trial.suggest_int('estimator__num_leaves', 30, 150, 5),
                'estimator__learning_rate': trial.suggest_float('estimator__learning_rate', 1e-4, 1),
                'estimator__min_child_samples': trial.suggest_int('estimator__min_child_samples', 20, 100),
                'estimator__subsample': trial.suggest_float('estimator__subsample', 0.5, 1),
                'estimator__colsample_bytree': trial.suggest_float('estimator__colsample_bytree', 0.4, 0.75),
                'estimator__reg_alpha': trial.suggest_float('estimator__reg_alpha', 1e-2, 10),
                'estimator__reg_lambda': trial.suggest_float('estimator__reg_lambda', 1e-2, 10)
            }

    return params, trial


# (1) sklearn example
print('SKLEARN')
estimator_name = 'LogisticRegression'
optimizer_type = 'sklearn'
params, _ = grid_from_estimator(estimator_name, type_=optimizer_type)
print(params)

print()
# (2) Optuna example with ask and tell interface.
print('OPTUNA')
study = optuna.create_study(direction='maximize')
n_trials = 10
estimator_name = 'LogisticRegression'
optimizer_type = 'optuna'
for _ in range(n_trials):
    params, trial = grid_from_estimator(estimator_name, type_=optimizer_type, study=study)
    objective_value = optuna_objective(estimator_name, params)
    study.tell(trial, objective_value)  # tell the pair of trial and objective value
    print(f'trialnum: {trial.number}, params: {params}, value: {objective_value}')

best_params = study.best_params
best_x = best_params["x"]
best_y = best_params["y"]
best_value = study.best_value
best_trial_num = study.best_trial.number
print(f"best x: {best_x}, best y: {best_y}, (x - 2)^2 + y: {(best_x - 2) ** 2 + best_y}, best_value: {best_value}, best_trial_num: {best_trial_num}")  # trial num starts at 0

输出

SKLEARN
{'estimator__penalty': ['l1', 'elasticnet'], 'estimator__C': array([1.00000000e-04, 2.63665090e-04, 6.95192796e-04, 1.83298071e-03,
       4.83293024e-03, 1.27427499e-02, 3.35981829e-02, 8.85866790e-02,
       2.33572147e-01, 6.15848211e-01, 1.62377674e+00, 4.28133240e+00,
       1.12883789e+01, 2.97635144e+01, 7.84759970e+01, 2.06913808e+02,
       5.45559478e+02, 1.43844989e+03, 3.79269019e+03, 1.00000000e+04])}

OPTUNA
[I 2021-11-25 19:03:09,673] A new study created in memory with name: no-name-f5046b21-f579-4c74-8046-79420c256d4a
trialnum: 0, params: {'x': 2.905894660287128, 'y': -4.537699327718261}, value: -3.7170541921815303
trialnum: 1, params: {'x': -9.275103438355583, 'y': -5.925000918692578}, value: 121.2029566269253
trialnum: 2, params: {'x': -2.9531168045205103, 'y': 5.253730464314739}, value: 29.78709654353821
trialnum: 3, params: {'x': 3.766902399344163, 'y': 3.778408673279479}, value: 6.900352762087639
trialnum: 4, params: {'x': -0.897563829823584, 'y': -0.887774211794973}, value: 7.508101936106943
trialnum: 5, params: {'x': -2.2256917634354645, 'y': 3.8017184220598903}, value: 21.658189301626216
trialnum: 6, params: {'x': -6.333366980619912, 'y': 9.87067058585388}, value: 79.3156758195401
trialnum: 7, params: {'x': 2.570258991787558, 'y': -0.1959178948625162}, value: 0.1292774228520457
trialnum: 8, params: {'x': 2.94430596072913, 'y': 4.318454050149043}, value: 5.210167797617609
trialnum: 9, params: {'x': 5.972023459737699, 'y': 4.165369460555215}, value: 19.942339825261854
best x: -9.275103438355583, best y: -5.925000918692578, (x - 2)^2 + y: 121.2029566269253, best_value: 121.2029566269253, best_trial_num: 1

python - 生成 optuna 网格的函数提供了 sklearn 管道

2 回答 2

代码

输出

Related

Reference