我安装了 Pycaret 和 Pycaret[full] 版本 2.3.6
并在从 pycaret.regression 导入所有内容后使用 setup()
from pycaret.regression import *
env = setup(data = train, target = 'result', silent = True)
但得到以下错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_39/3529229441.py in <module>
----> 1 env = setup(data = train, target = 'result', silent = True, transform_target = True, imputation_type = 'iterative')
/opt/conda/lib/python3.7/site-packages/pycaret/regression.py in setup(data, target, train_size, test_data, preprocess, imputation_type, iterative_imputation_iters, categorical_features, categorical_imputation, categorical_iterative_imputer, ordinal_features, high_cardinality_features, high_cardinality_method, numeric_features, numeric_imputation, numeric_iterative_imputer, date_features, ignore_features, normalize, normalize_method, transformation, transformation_method, handle_unknown_categorical, unknown_categorical_method, pca, pca_method, pca_components, ignore_low_variance, combine_rare_levels, rare_level_threshold, bin_numeric_features, remove_outliers, outliers_threshold, remove_multicollinearity, multicollinearity_threshold, remove_perfect_collinearity, create_clusters, cluster_iter, polynomial_features, polynomial_degree, trigonometry_features, polynomial_threshold, group_features, group_names, feature_selection, feature_selection_threshold, feature_selection_method, feature_interaction, feature_ratio, interaction_threshold, transform_target, transform_target_method, data_split_shuffle, data_split_stratify, fold_strategy, fold, fold_shuffle, fold_groups, n_jobs, use_gpu, custom_pipeline, html, session_id, log_experiment, experiment_name, log_plots, log_profile, log_data, silent, verbose, profile, profile_kwargs)
645 verbose=verbose,
646 profile=profile,
--> 647 profile_kwargs=profile_kwargs,
648 )
649
/opt/conda/lib/python3.7/site-packages/pycaret/internal/tabular.py in setup(data, target, ml_usecase, available_plots, train_size, test_data, preprocess, imputation_type, iterative_imputation_iters, categorical_features, categorical_imputation, categorical_iterative_imputer, ordinal_features, high_cardinality_features, high_cardinality_method, numeric_features, numeric_imputation, numeric_iterative_imputer, date_features, ignore_features, normalize, normalize_method, transformation, transformation_method, handle_unknown_categorical, unknown_categorical_method, pca, pca_method, pca_components, ignore_low_variance, combine_rare_levels, rare_level_threshold, bin_numeric_features, remove_outliers, outliers_threshold, remove_multicollinearity, multicollinearity_threshold, remove_perfect_collinearity, create_clusters, cluster_iter, polynomial_features, polynomial_degree, trigonometry_features, polynomial_threshold, group_features, group_names, feature_selection, feature_selection_threshold, feature_selection_method, feature_interaction, feature_ratio, interaction_threshold, fix_imbalance, fix_imbalance_method, transform_target, transform_target_method, data_split_shuffle, data_split_stratify, fold_strategy, fold, fold_shuffle, fold_groups, n_jobs, use_gpu, custom_pipeline, html, session_id, log_experiment, experiment_name, log_plots, log_profile, log_data, silent, verbose, profile, profile_kwargs, display)
1353 test_data = pd.concat([X_test, y_test], axis=1)
1354
-> 1355 train_data = prep_pipe.fit_transform(train_data)
1356 # workaround to also transform target
1357 dtypes.final_training_columns.append(target)
/opt/conda/lib/python3.7/site-packages/sklearn/pipeline.py in fit_transform(self, X, y, **fit_params)
365 """
366 fit_params_steps = self._check_fit_params(**fit_params)
--> 367 Xt = self._fit(X, y, **fit_params_steps)
368
369 last_step = self._final_estimator
/opt/conda/lib/python3.7/site-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params_steps)
294 message_clsname='Pipeline',
295 message=self._log_message(step_idx),
--> 296 **fit_params_steps[name])
297 # Replace the transformer of the step with the fitted
298 # transformer. This is necessary when loading the transformer
/opt/conda/lib/python3.7/site-packages/joblib/memory.py in __call__(self, *args, **kwargs)
347
348 def __call__(self, *args, **kwargs):
--> 349 return self.func(*args, **kwargs)
350
351 def call_and_shelve(self, *args, **kwargs):
/opt/conda/lib/python3.7/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
738 with _print_elapsed_time(message_clsname, message):
739 if hasattr(transformer, 'fit_transform'):
--> 740 res = transformer.fit_transform(X, y, **fit_params)
741 else:
742 res = transformer.fit(X, y, **fit_params).transform(X)
/opt/conda/lib/python3.7/site-packages/pycaret/internal/preprocess.py in fit_transform(self, data, y)
1148 self, data, y=None
1149 ): # There is no transformation happening in training data set, its all about test
-> 1150 self.fit(data)
1151 return data
1152
/opt/conda/lib/python3.7/site-packages/pycaret/internal/preprocess.py in fit(self, data, y)
1112 if self.replacement_strategy == "least frequent":
1113 self.ph_train_level.loc[0, i] = list(
-> 1114 data[i].value_counts().sort_values().index
1115 )
1116 else:
/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py in __setitem__(self, key, value)
721
722 iloc = self if self.name == "iloc" else self.obj.iloc
--> 723 iloc._setitem_with_indexer(indexer, value, self.name)
724
725 def _validate_key(self, key, axis: int):
/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py in _setitem_with_indexer(self, indexer, value, name)
1728 if take_split_path:
1729 # We have to operate column-wise
-> 1730 self._setitem_with_indexer_split_path(indexer, value, name)
1731 else:
1732 self._setitem_single_block(indexer, value, name)
/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py in _setitem_with_indexer_split_path(self, indexer, value, name)
1784
1785 raise ValueError(
-> 1786 "Must have equal len keys and value "
1787 "when setting with an iterable"
1788 )
ValueError: Must have equal len keys and value when setting with an iterable
数据集链接是:Kaggle Water,到处都是水,一滴也不喝!
| categoryA | categoryB | categoryC | categoryD | categoryE | categoryF | featureA | featureB | featureC | featureD | featureE | featureF | featureG | featureH | featureI | compositionA | compositionB | compositionC | compositionD | compositionE | compositionF | compositionG | compositionH | compositionI | compositionJ | unit | result |
| --------- | --------- | ---------- | --------- | --------- | --------- | ----------- | --------- | -------- | ---------- | --------- | -------- | ------------ | --------- | ---------- | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | -------- | -------- | - | - | - | - | - | --- | ----- | ----- | ----- | ----- | ----- |
| catA\_1 | catB\_0 | catC\_718 | catD\_0 | catE\_0 | catF\_0 | 75808.375 | 4.457840 | 0.005718 | 122.299437 | 30.831906 | 0.0 | 2.806036e+12 | 0.000000 | 71176346.0 | 0.0 | 10.00 | 26.00 | 0.0 | 32.00 | 3.0 | 0.00 | 9.00 | 22.26 | 20.00 | unit\_6 | 0.000458 |
| catA\_1 | catB\_0 | catC\_1309 | catD\_0 | catE\_0 | catF\_0 | 75808.375 | 4.457840 | 0.005718 | 122.299437 | 30.831906 | 0.0 | 2.806036e+12 | 0.000000 | 71176346.0 | 0.0 | 10.00 | 26.00 | 0.0 | 32.00 | 3.0 | 0.00 | 9.00 | 22.26 | 20.00 | unit\_6 | 0.000335 |
| catA\_15 | catB\_0 | catC\_1309 | catD\_0 | catE\_0 | catF\_0 | 75808.375 | 4.457840 | 0.005718 | 122.299437 | 30.831906 | 0.0 | 2.806036e+12 | 0.000000 | 71176346.0 | 0.0 | 10.00 | 26.00 | 0.0 | 32.00 | 3.0 | 0.00 | 9.00 | 22.26 | 20.00 | unit\_4 | 0.054072 |
| catA\_0 | catB\_0 | catC\_935 | catD\_0 | catE\_0 | catF\_0 | 75808.375 | 4.457840 | 0.005718 | 122.299437 | 30.831906 | 2.0 | 2.806036e+12 | 0.000000 | 71176346.0 | 0.0 | 10.00 | 26.00 | 0.0 | 32.00 | 3.0 | 0.00 | 9.00 | 22.26 | 20.00 | unit\_5 | 0.061143 |
| catA\_22 | catB\_0 | catC\_1325 | catD\_0 | catE\_2 | catF\_0 | \-40055.250 | 4.363288 | 0.729194 | 93.677197 | 15.047884 | 4.0 | 1.464509e+12 | 87.158924 | 50941692.0 | 0.0 | 8.00 | 14.00 | 0.0 | 49.00 | 3.0 | 2.00 | 9.00 | 16.84 | 15.00 | unit\_15 | 0.015439 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| catA\_3 | catB\_0 | catC\_639 | catD\_0 | catE\_0 | catF\_0 | 75808.375 | 4.457840 | 0.005718 | 122.299437 | 30.831906 | 0.0 | 2.806036e+12 | 0.000000 | 71176346.0 | 0.0 | 10.00 | 26.00 | 0.0 | 32.00 | 3.0 | 0.00 | 9.00 | 22.26 | 20.00 | unit\_1 | 0.008260 |
| catA\_8 | catB\_0 | catC\_445 | catD\_0 | catE\_0 | catF\_0 | 75808.375 | 4.457840 | 0.005718 | 122.299437 | 30.831906 | 0.0 | 2.806036e+12 | 0.000000 | 71176346.0 | 0.0 | 10.00 | 26.00 | 0.0 | 32.00 | 3.0 | 0.00 | 9.00 | 22.26 | 20.00 | unit\_9 | 0.002395 |
| catA\_25 | catB\_0 | catC\_445 | catD\_0 | catE\_0 | catF\_1 | 75808.375 | 4.457840 | 0.005718 | 122.299437 | 30.831906 | 0.0 | 2.806036e+12 | 0.000000 | 71176346.0 | 0.0 | 10.00 | 26.00 | 0.0 | 32.00 | 3.0 | 0.00 | 9.00 | 22.26 | 20.00 | unit\_12 | 0.150982 |
| catA\_7 | catB\_0 | catC\_2322 | catD\_0 | catE\_5 | catF\_0 | 18927.000 | 21.007457 | 0.000387 | 18.148547 | 14.177182 | 0.0 | 2.526922e+11 | 0.000000 | 2147000.0 | 0.0 | 9.24 | 13.13 | 0.0 | 35.88 | 1.7 | 0.73 | 1.45 | 28.12 | 37.83 | unit\_8 | 0.488908 |
| catA\_9 | catB\_0 | catC\_2322 | catD\_0 | catE\_5 | catF\_0 | 18927.000 | 21.007457 | 0.000387 | 18.148547 | 14.177182 | 0.0 | 2.526922e+11 | 0.000000 | 2147000.0 | 0.0 | 9.24 | 13.13 | 0.0 | 35.88 | 1.7 | 0.73 | 1.45 | 28.12 | 37.83 | unit\_10 | 0.232833 |
目标列是result