我正在构建机器学习模型管道。我有一个自定义函数,它将更改特定列的值。我已经定义了自定义变压器,它单独工作正常。但是,如果我从管道中调用它,它会给我带来错误。
示例数据框
df = pd.DataFrame({'y': [4,5,6], 'a':[3,2,3], 'b' : [2,3,4]})
import numpy as np
import pandas as pd
import sklearn
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
class Extractor(BaseEstimator, TransformerMixin):
def __init__(self):
return None
def fit(self, x, y=None):
return self
def map_values(self, x):
if x in [1.0,2.0,3.0]:
return "Class A"
if x in [4.0,5.0,6.0]:
return "Class B"
if x in [7.0,8.0]:
return "Class C"
if x in [9.0,10.0]:
return "Class D"
else:
return "Other"
def transform(self, X):
return self
def fit_transform(self, X):
X = X.copy()
X = X.apply(lambda x : self.map_values(x))
return X
e = Extractor()
e.fit_transform(df['a'])
0 Class A
1 Clas C
2 Other
3 Class B
Name: a, dtype: object
管道
features = ['a']
numeric_features=['b']
numeric_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='median'))])
custom_transformer = Pipeline(steps=[
('map_value', Extractor())])
preprocessor = ColumnTransformer(
transformers=[
('num', numeric_transformer, numeric_features),
('time',custom_transformer, features)])
X_new = df[['a','b']]
y_new = df['y']
X_transform = preprocessor.fit_transform(X_new,y_new)
TypeError: All estimators should implement fit and transform, or can be 'drop' or 'passthrough' specifiers. 'Pipeline(steps=[('map_value', Extractor())])' (type <class 'sklearn.pipeline.Pipeline'>) doesn't.
我想让自定义处理器在管道中工作。