我正在尝试使用 BayesSearchCV 调整 xgboost 模型以进行多类分类。这是我的代码。
n_iterations = 50
estimator = xgb.XGBClassifier(
n_jobs=-1,
objective="multi:softmax",
eval_metric="merror",
verbosity=0,
num_class=3)
search_space = {
"learning_rate": (0.01, 1.0, "log-uniform"),
"min_child_weight": (0, 10),
"max_depth": (1, 50),
"max_delta_step": (0, 10),
"subsample": (0.01, 1.0, "uniform"),
"colsample_bytree": (0.01, 1.0, "log-uniform"),
"colsample_bylevel": (0.01, 1.0, "log-uniform"),
"reg_lambda": (1e-9, 1000, "log-uniform"),
"reg_alpha": (1e-9, 1.0, "log-uniform"),
"gamma": (1e-9, 0.5, "log-uniform"),
"min_child_weight": (0, 5),
"n_estimators": (5, 5000),
"scale_pos_weight": (1e-6, 500, "log-uniform"),
}
cv = GroupKFold(n_splits=10)
#cv = StratifiedKFold(n_splits=3, shuffle=True)
bayes_cv_tuner = BayesSearchCV(
estimator=estimator,
search_spaces=search_space,
scoring="accuracy",
cv=cv,
n_jobs=-1,
n_iter=n_iterations,
verbose=0,
refit=True,
)
import pandas as pd
import numpy as np
def print_status(optimal_result):
"""Shows the best parameters found and accuracy attained of the search so far."""
models_tested = pd.DataFrame(bayes_cv_tuner.cv_results_)
best_parameters_so_far = pd.Series(bayes_cv_tuner.best_params_)
print(
"Model #{}\nBest accuracy so far: {}\nBest parameters so far: {}\n".format(
len(models_tested),
np.round(bayes_cv_tuner.best_score_, 3),
bayes_cv_tuner.best_params_,
)
)
clf_type = bayes_cv_tuner.estimator.__class__.__name__
models_tested.to_csv(clf_type + "_cv_results_summary.csv")
result = bayes_cv_tuner.fit(X, y, callback=print_status, groups=data.groups)
当我运行它时,一切都很好,直到它到达模型 10,返回此错误:
Traceback (most recent call last):
File "<ipython-input-189-dc299c53649b>", line 1, in <module>
result = bayes_cv_tuner.fit(X, y, callback=print_status, groups=data_nobands2.AGREEMENT_NUMBER2)
File "C:\Users\CatKa\Anaconda3\lib\site-packages\skopt\searchcv.py", line 694, in fit
groups=groups, n_points=n_points_adjusted
File "C:\Users\CatKa\Anaconda3\lib\site-packages\skopt\searchcv.py", line 565, in _step
params = optimizer.ask(n_points=n_points)
File "C:\Users\CatKa\Anaconda3\lib\site-packages\skopt\optimizer\optimizer.py", line 417, in ask
opt._tell(x, y_lie)
File "C:\Users\CatKa\Anaconda3\lib\site-packages\skopt\optimizer\optimizer.py", line 553, in _tell
n_samples=self.n_points, random_state=self.rng))
File "C:\Users\CatKa\Anaconda3\lib\site-packages\skopt\space\space.py", line 963, in transform
columns[j] = self.dimensions[j].transform(columns[j])
File "C:\Users\CatKa\Anaconda3\lib\site-packages\skopt\space\space.py", line 162, in transform
return self.transformer.transform(X)
File "C:\Users\CatKa\Anaconda3\lib\site-packages\skopt\space\transformers.py", line 304, in transform
X = transformer.transform(X)
File "C:\Users\CatKa\Anaconda3\lib\site-packages\skopt\space\transformers.py", line 251, in transform
"be greater than %f" % self.low)
ValueError: All integer values shouldbe greater than 0.000000
我显然已经用谷歌搜索了,但没有发现任何有用的东西。有任何想法吗?
顺便说一句,以防万一,我的数据集中根本没有负值。