通常,当我需要规范化数据时,我会按照以下方式进行:
from sklearn.preprocessing import MinMaxScaler
normalizer = MinMaxScaler()
X_train_norm = normalizer.fit_transform(X_train.values)
X_test_norm = normalizer.transform(X_test.values)
当我执行时, 是否X_test根据X_train使用由 找到的最佳归一化器进行缩放,然后 ?RandomizedSearchCVresults = search.fit(X_train, y_train)y_pred = search.predict(X_test)
import math
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
grid = dict()
sqrt_records = round(math.sqrt(X_train.shape[0]))
grid['model__n_neighbors'] = list(range(3, sqrt_records*2, 2))
grid['model__p'] = [1, 2]
grid['model__weights'] = ['uniform', 'distance']
grid['normalizer'] = [MinMaxScaler(), MaxAbsScaler(),
StandardScaler(), RobustScaler()]
steps = [('normalizer', None), ('model', model)]
pipeline = Pipeline(steps=steps)
cv = StratifiedKFold(n_splits=3)
search = RandomizedSearchCV(pipeline, grid, scoring='f1_weighted', n_jobs=-1,
n_iter=10, cv=cv, verbose=2, refit='f1_weighted',
random_state=42)
# Training
results = search.fit(X_train, y_train)
params = results.best_params_
print('Best Config: %s ' % params)
# Classification
y_pred = search.predict(X_test)