1

我正在尝试构建一个基本的 ML 管道,该管道将在调整超参数的同时选择功能。

代码如下。

#pipeline for full feature selection - hyperparametertuning
starttime = timeit.default_timer()
scaler = StandardScaler()

rfegbm = RFECV(estimator = LGBMRegressor(learning_rate='0.1',
                                         n_jobs=-2),
               step = 1, 
               cv = 4, 
               scoring = 'r2', 
               n_jobs = 4, 
               verbose = 2)

pipe = Pipeline([('scaler', scaler),
                 ('rfegbm', rfegbm)])

searchspace = {'rfegbm__estimator__num_leaves': Integer(500, 50000, prior='log-uniform'),
               'rfegbm__estimator__max_depth': Integer(2, 2000),
               'rfegbm__estimator__n_estimators': Integer(50, 1000)}
            
search1 = BayesSearchCV(pipe, searchspace, 
                       n_iter = 15,
                       cv = 4, 
                       n_jobs = 4, 
                       verbose = 3)

search1.fit (X_train,y_train)

joblib.dump(search1.best_estimator_, 'USEPbestestimator.pk1', compress = 1)

endtime = timeit.default_timer()
duration = endtime-starttime
print(duration,'Seconds')

它似乎运行良好,尽管速度很慢。但是,当模型似乎已完成拟合时,它会抛出 AttributeError 并停止。当我尝试访问 search1 时,它似乎不合适,我找不到任何 best_estimator_ 或任何东西。我究竟做错了什么?

错误代码如下。

AttributeError                            Traceback (most recent call last)
<ipython-input-5-fbb04498cc8f> in <module>
     24                        verbose = 3)
     25 
---> 26 search1.fit (X_train,y_train)
     27 
     28 joblib.dump(result.best_estimator_, 'USEPbestestimator.pk1', compress = 1)

~\anaconda3\lib\site-packages\skopt\searchcv.py in fit(self, X, y, groups, callback, **fit_params)
    464             self.optimizer_kwargs_ = dict(self.optimizer_kwargs)
    465 
--> 466         super().fit(X=X, y=y, groups=groups, **fit_params)
    467 
    468         # BaseSearchCV never ranked train scores,

~\anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
     61             extra_args = len(args) - len(all_args)
     62             if extra_args <= 0:
---> 63                 return f(*args, **kwargs)
     64 
     65             # extra_args > 0

~\anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
    839                 return results
    840 
--> 841             self._run_search(evaluate_candidates)
    842 
    843             # multimetric is determined here because in the case of a callable

~\anaconda3\lib\site-packages\skopt\searchcv.py in _run_search(self, evaluate_candidates)
    510                 n_points_adjusted = min(n_iter, n_points)
    511 
--> 512                 optim_result = self._step(
    513                     search_space, optimizer,
    514                     evaluate_candidates, n_points=n_points_adjusted

~\anaconda3\lib\site-packages\skopt\searchcv.py in _step(self, search_space, optimizer, evaluate_candidates, n_points)
    406         params_dict = [point_asdict(search_space, p) for p in params]
    407 
--> 408         all_results = evaluate_candidates(params_dict)
    409         # Feed the point and objective value back into optimizer
    410         # Optimizer minimizes objective, hence provide negative score

~\anaconda3\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params, cv, more_results)
    793                               n_splits, n_candidates, n_candidates * n_splits))
    794 
--> 795                 out = parallel(delayed(_fit_and_score)(clone(base_estimator),
    796                                                        X, y,
    797                                                        train=train, test=test,

~\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
   1052 
   1053             with self._backend.retrieval_context():
-> 1054                 self.retrieve()
   1055             # Make sure that we get a last message telling us we are done
   1056             elapsed_time = time.time() - self._start_time

~\anaconda3\lib\site-packages\joblib\parallel.py in retrieve(self)
    931             try:
    932                 if getattr(self._backend, 'supports_timeout', False):
--> 933                     self._output.extend(job.get(timeout=self.timeout))
    934                 else:
    935                     self._output.extend(job.get())

~\anaconda3\lib\site-packages\joblib\_parallel_backends.py in wrap_future_result(future, timeout)
    540         AsyncResults.get from multiprocessing."""
    541         try:
--> 542             return future.result(timeout=timeout)
    543         except CfTimeoutError as e:
    544             raise TimeoutError from e

~\anaconda3\lib\concurrent\futures\_base.py in result(self, timeout)
    437                 raise CancelledError()
    438             elif self._state == FINISHED:
--> 439                 return self.__get_result()
    440             else:
    441                 raise TimeoutError()

~\anaconda3\lib\concurrent\futures\_base.py in __get_result(self)
    386     def __get_result(self):
    387         if self._exception:
--> 388             raise self._exception
    389         else:
    390             return self._result

AttributeError: 'NoneType' object has no attribute 'predict'
4

0 回答 0