我正在实现一个自定义转换器以在我的管道中使用,它在初始化时使用参数myClusters(现在称为 K-Means Clustering,但只是一个测试类):
class KMeansClustering:
def __init__(self, myClusters = None):
self.myClusters = myClusters
def fit(self, X, y = None):
return self
def transform(self, X, y = None):
return X.copy()
管道本身通过 skicit-optimize 进行调整:
from skopt import BayesSearchCV
from sklearn.pipeline import Pipeline
from sklearn import tree
opt = BayesSearchCV(
Pipeline([
('KMeans', KMeansClustering()),
('DecTree', tree.DecisionTreeClassifier(random_state = 0))
]),
{'KMeans__myClusters' : myClusters,
'DecTree__max_depth': list(range(1, 41)),
'DecTree__max_features': list(range(1, 93)),
'DecTree__min_samples_leaf': list(range(1, 1001)),
'DecTree__min_samples_split': list(range(1, 1001)),
},
cv = 10,
random_state=0,
n_jobs = -1,
refit = False
)
现在调用opt.fit(X, y)
给我以下错误:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-26-c7a7ae6901e0> in <module>
----> 1 opt.fit(X, y)
D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\searchcv.py in fit(self, X, y, groups, callback)
690 n_points_adjusted = min(n_iter, n_points)
691
--> 692 optim_result = self._step(
693 X, y, search_space, optimizer,
694 groups=groups, n_points=n_points_adjusted
D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\searchcv.py in _step(self, X, y, search_space, optimizer, groups, n_points)
563
564 # get parameter values to evaluate
--> 565 params = optimizer.ask(n_points=n_points)
566
567 # convert parameters to python native types
D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\optimizer\optimizer.py in ask(self, n_points, strategy)
393 X = []
394 for i in range(n_points):
--> 395 x = opt.ask()
396 X.append(x)
397
D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\optimizer\optimizer.py in ask(self, n_points, strategy)
365 """
366 if n_points is None:
--> 367 return self._ask()
368
369 supported_strategies = ["cl_min", "cl_mean", "cl_max"]
D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\optimizer\optimizer.py in _ask(self)
432 # our random state.
433 if self._initial_samples is None:
--> 434 return self.space.rvs(random_state=self.rng)[0]
435 else:
436 # The samples are evaluated starting form initial_samples[0]
D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\space\space.py in rvs(self, n_samples, random_state)
892
893 for dim in self.dimensions:
--> 894 columns.append(dim.rvs(n_samples=n_samples, random_state=rng))
895
896 # Transpose
D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\space\space.py in rvs(self, n_samples, random_state)
690 return self.inverse_transform([(choices)])
691 elif self.transform_ == "normalize":
--> 692 return self.inverse_transform(list(choices))
693 else:
694 return [self.categories[c] for c in choices]
D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\space\space.py in inverse_transform(self, Xt)
677 # The concatenation of all transformed dimensions makes Xt to be
678 # of type float, hence the required cast back to int.
--> 679 inv_transform = super(Categorical, self).inverse_transform(Xt)
680 if isinstance(inv_transform, list):
681 inv_transform = np.array(inv_transform)
D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\space\space.py in inverse_transform(self, Xt)
166 original space.
167 """
--> 168 return self.transformer.inverse_transform(Xt)
169
170 def set_transformer(self):
D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\space\transformers.py in inverse_transform(self, X)
307 def inverse_transform(self, X):
308 for transformer in self.transformers[::-1]:
--> 309 X = transformer.inverse_transform(X)
310 return X
D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\space\transformers.py in inverse_transform(self, Xt)
214 else:
215 Xt = np.asarray(Xt)
--> 216 return [
217 self.inverse_mapping_[int(np.round(i))] for i in Xt
218 ]
D:\Program Files (x86)\Anaconda\lib\site-packages\skopt\space\transformers.py in <listcomp>(.0)
215 Xt = np.asarray(Xt)
216 return [
--> 217 self.inverse_mapping_[int(np.round(i))] for i in Xt
218 ]
219
KeyError: 15
我不确定如何处理这个问题,但是,如果myClusters
从转换器中删除变量,并且在管道中,不会引发错误。
编辑 12/07/2021:我认为这里的问题是BayesSearchCV
无法处理多种算法,并且一次只能优化一种算法。
但是,这个问题可以很容易地通过使用该函数skopt.gp_minimize()
来解决。正如在此示例中所见,这允许通过简单地将参数传递给目标函数并在此函数中执行工作流来使用自定义工作流。