我用 sklearn 库创建了一个模型。为了使用 dask 工作人员,我正在尝试使用 dask_ml 库创建相同的东西。下面是我的代码
from dask_ml.xgboost import XGBRegressor
from dask_ml.model_selection import RandomizedSearchCV
client = Client(os.environ['dask_scheduler'], timeout=10000)
parameters = {
'n_estimators': [200],
'max_depth': range(5, 30),
'learning_rate': scipy.stats.uniform(0.05, 0.15),
'objective': ['reg:squarederror'],
'booster': ['gbtree', 'gblinear', 'dart'],
'min_child_weight': [3, 5, 7],
'subsample': [0.5, 1, 1.5],
'colsample_bytree': [0.5, 0.75, 1]
}
grid = RandomizedSearchCV(
XGBRegressor(feature_names=feature_names),
param_distributions=parameters,
scoring='neg_mean_absolute_error', # Use MAE to select the best model
n_jobs=-1,
random_state=12345
) # Use a seed for repeatability
start = time()
with joblib.parallel_backend('dask'):
grid.fit(X, y)
elapsed_time = (time() - start) / 60
print("Training the model took %.2f minutes" % elapsed_time)
以下是我得到的错误:
File "C:\Users\myuser\Documents\code\model.py",
line 70, in tune_hyperparameters grid.fit(X, y)
File "C:\Users\myuser\Documents\code\condavirtualenv\lib\site-packages\dask_ml\model_selection\_search.py",
line 1236, in fit dsk, keys, n_splits, _ = build_cv_graph(
File "C:\Users\myuser\Documents\code\condavirtualenv\lib\site-packages\dask_ml\model_selection\_search.py",
line 216, in build_cv_graph normalize_estimator(estimator),
File "C:\Users\myuser\Documents\code\condavirtualenv\lib\site-packages\dask_ml\model_selection\_normalize.py",
line 38, in normalize_estimator val = getattr(est, attr)
File "C:\Users\myuser\Documents\code\condavirtualenv\lib\site-packages\xgboost\sklearn.py",
line 542, in feature_importances_ b = self.get_booster()
File "C:\Users\myuser\Documents\code\condavirtualenv\lib\site-packages\xgboost\sklearn.py",
line 193, in get_booster raise XGBoostError('need to call fit or load_model beforehand')