0

我正在实现一个自定义转换器以在我的管道中使用,它在初始化时使用参数myClusters(现在称为 K-Means Clustering,但只是一个测试类):

class KMeansClustering:
    
    def __init__(self, myClusters = None):
        self.myClusters = myClusters
    
    def fit(self, X, y = None):
        return self
    
    def transform(self, X, y = None):
        
        return X.copy()

管道本身通过 skicit-optimize 进行调整:

from skopt import BayesSearchCV
from sklearn.pipeline import Pipeline
from sklearn import tree

opt = BayesSearchCV(
    Pipeline([
        ('KMeans', KMeansClustering()),
        ('DecTree', tree.DecisionTreeClassifier(random_state = 0))
             ]),
    {'KMeans__myClusters' : myClusters,
     'DecTree__max_depth': list(range(1, 41)),
     'DecTree__max_features': list(range(1, 93)),
     'DecTree__min_samples_leaf': list(range(1, 1001)),
     'DecTree__min_samples_split': list(range(1, 1001)),
    },
    cv = 10,
    random_state=0, 
    n_jobs = -1, 
    refit = False
)

现在调用opt.fit(X, y)给我以下错误:

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-26-c7a7ae6901e0> in <module>
----> 1 opt.fit(X, y)

D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\searchcv.py in fit(self, X, y, groups, callback)
    690                 n_points_adjusted = min(n_iter, n_points)
    691 
--> 692                 optim_result = self._step(
    693                     X, y, search_space, optimizer,
    694                     groups=groups, n_points=n_points_adjusted

D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\searchcv.py in _step(self, X, y, search_space, optimizer, groups, n_points)
    563 
    564         # get parameter values to evaluate
--> 565         params = optimizer.ask(n_points=n_points)
    566 
    567         # convert parameters to python native types

D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\optimizer\optimizer.py in ask(self, n_points, strategy)
    393         X = []
    394         for i in range(n_points):
--> 395             x = opt.ask()
    396             X.append(x)
    397 

D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\optimizer\optimizer.py in ask(self, n_points, strategy)
    365         """
    366         if n_points is None:
--> 367             return self._ask()
    368 
    369         supported_strategies = ["cl_min", "cl_mean", "cl_max"]

D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\optimizer\optimizer.py in _ask(self)
    432             # our random state.
    433             if self._initial_samples is None:
--> 434                 return self.space.rvs(random_state=self.rng)[0]
    435             else:
    436                 # The samples are evaluated starting form initial_samples[0]

D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\space\space.py in rvs(self, n_samples, random_state)
    892 
    893         for dim in self.dimensions:
--> 894             columns.append(dim.rvs(n_samples=n_samples, random_state=rng))
    895 
    896         # Transpose

D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\space\space.py in rvs(self, n_samples, random_state)
    690             return self.inverse_transform([(choices)])
    691         elif self.transform_ == "normalize":
--> 692             return self.inverse_transform(list(choices))
    693         else:
    694             return [self.categories[c] for c in choices]

D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\space\space.py in inverse_transform(self, Xt)
    677         # The concatenation of all transformed dimensions makes Xt to be
    678         # of type float, hence the required cast back to int.
--> 679         inv_transform = super(Categorical, self).inverse_transform(Xt)
    680         if isinstance(inv_transform, list):
    681             inv_transform = np.array(inv_transform)

D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\space\space.py in inverse_transform(self, Xt)
    166            original space.
    167         """
--> 168         return self.transformer.inverse_transform(Xt)
    169 
    170     def set_transformer(self):

D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\space\transformers.py in inverse_transform(self, X)
    307     def inverse_transform(self, X):
    308         for transformer in self.transformers[::-1]:
--> 309             X = transformer.inverse_transform(X)
    310         return X

D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\space\transformers.py in inverse_transform(self, Xt)
    214         else:
    215             Xt = np.asarray(Xt)
--> 216         return [
    217             self.inverse_mapping_[int(np.round(i))] for i in Xt
    218         ]

D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\space\transformers.py in <listcomp>(.0)
    215             Xt = np.asarray(Xt)
    216         return [
--> 217             self.inverse_mapping_[int(np.round(i))] for i in Xt
    218         ]
    219 

KeyError: 15

我不确定如何处理这个问题,但是,如果myClusters从转换器中删除变量,并且在管道中,不会引发错误。

编辑 12/07/2021:我认为这里的问题是BayesSearchCV无法处理多种算法,并且一次只能优化一种算法。

但是,这个问题可以很容易地通过使用该函数skopt.gp_minimize()来解决。正如在此示例中所见,这允许通过简单地将参数传递给目标函数并在此函数中执行工作流来使用自定义工作流。

4

0 回答 0