我正在尝试使用 optuna 来调整 LGBM 的超参数,但它会报告标题提到的错误。奇怪的是我y
的是熊猫系列。
错误如下所示:
[1158] valid_0's auc: 0.812934 valid_0's binary_logloss: 0.509509
[W 2021-10-01 22:14:20,509] Trial 0 failed because of the following error: ValueError('y should be a 1d array, got an array of shape (191584, 2) instead.',)
Traceback (most recent call last):
我的代码在下面列出。
y = train['claim']
X = train.drop(['id', 'claim'], axis=1)
# tuning with optuna
def objective(trial, X, y):
param_grid = {
"device_type": trial.suggest_categorical("device_type", ['gpu']),
"n_estimators": trial.suggest_categorical("n_estimators", [10000]),
"learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
"num_leaves": trial.suggest_int("num_leaves", 20, 3000, step=20),
"max_depth": trial.suggest_int("max_depth", 3, 12),
"min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 200, 10000, step=100),
"lambda_l1": trial.suggest_int("lambda_l1", 0, 100, step=5),
"lambda_l2": trial.suggest_int("lambda_l2", 0, 100, step=5),
"min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 15),
"bagging_fraction": trial.suggest_float(
"bagging_fraction", 0.2, 0.95, step=0.1
),
"bagging_freq": trial.suggest_categorical("bagging_freq", [1]),
"feature_fraction": trial.suggest_float(
"feature_fraction", 0.2, 0.95, step=0.1
),
}
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1121218)
cv_scores = np.empty(5)
for idx, (train_idx, test_idx) in enumerate(cv.split(X, y)):
X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
y_train, y_test = y[train_idx], y[test_idx]
model = lgbm.LGBMClassifier(objective="binary", **param_grid)
model.fit(
X_train,
y_train,
eval_set=[(X_test, y_test)],
eval_metric="auc",
early_stopping_rounds=100,
callbacks=[
LightGBMPruningCallback(trial, "auc")
], # Add a pruning callback
)
preds = model.predict_proba(X_test)
cv_scores[idx] = roc_auc_score(y_test, preds)
return np.mean(cv_scores)
study = optuna.create_study(direction="maximize", study_name="LGBM Classifier")
func = lambda trial: objective(trial, X, y)
study.optimize(func, n_trials=20)
print(f"\tBest value (rmse): {study.best_value:.5f}")
print(f"\tBest params:")
for key, value in study.best_params.items():
print(f"\t\t{key}: {value}")