python - Pycaret 设置错误使用可迭代设置时必须具有相等的 len 键和值

Question

我安装了 Pycaret 和 Pycaret[full] 版本 2.3.6

并在从 pycaret.regression 导入所有内容后使用 setup()

from pycaret.regression import *

env = setup(data = train, target = 'result', silent = True)

但得到以下错误：

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/tmp/ipykernel_39/3529229441.py in <module>
----> 1 env = setup(data = train, target = 'result', silent = True, transform_target = True, imputation_type = 'iterative')

/opt/conda/lib/python3.7/site-packages/pycaret/regression.py in setup(data, target, train_size, test_data, preprocess, imputation_type, iterative_imputation_iters, categorical_features, categorical_imputation, categorical_iterative_imputer, ordinal_features, high_cardinality_features, high_cardinality_method, numeric_features, numeric_imputation, numeric_iterative_imputer, date_features, ignore_features, normalize, normalize_method, transformation, transformation_method, handle_unknown_categorical, unknown_categorical_method, pca, pca_method, pca_components, ignore_low_variance, combine_rare_levels, rare_level_threshold, bin_numeric_features, remove_outliers, outliers_threshold, remove_multicollinearity, multicollinearity_threshold, remove_perfect_collinearity, create_clusters, cluster_iter, polynomial_features, polynomial_degree, trigonometry_features, polynomial_threshold, group_features, group_names, feature_selection, feature_selection_threshold, feature_selection_method, feature_interaction, feature_ratio, interaction_threshold, transform_target, transform_target_method, data_split_shuffle, data_split_stratify, fold_strategy, fold, fold_shuffle, fold_groups, n_jobs, use_gpu, custom_pipeline, html, session_id, log_experiment, experiment_name, log_plots, log_profile, log_data, silent, verbose, profile, profile_kwargs)
    645         verbose=verbose,
    646         profile=profile,
--> 647         profile_kwargs=profile_kwargs,
    648     )
    649 

/opt/conda/lib/python3.7/site-packages/pycaret/internal/tabular.py in setup(data, target, ml_usecase, available_plots, train_size, test_data, preprocess, imputation_type, iterative_imputation_iters, categorical_features, categorical_imputation, categorical_iterative_imputer, ordinal_features, high_cardinality_features, high_cardinality_method, numeric_features, numeric_imputation, numeric_iterative_imputer, date_features, ignore_features, normalize, normalize_method, transformation, transformation_method, handle_unknown_categorical, unknown_categorical_method, pca, pca_method, pca_components, ignore_low_variance, combine_rare_levels, rare_level_threshold, bin_numeric_features, remove_outliers, outliers_threshold, remove_multicollinearity, multicollinearity_threshold, remove_perfect_collinearity, create_clusters, cluster_iter, polynomial_features, polynomial_degree, trigonometry_features, polynomial_threshold, group_features, group_names, feature_selection, feature_selection_threshold, feature_selection_method, feature_interaction, feature_ratio, interaction_threshold, fix_imbalance, fix_imbalance_method, transform_target, transform_target_method, data_split_shuffle, data_split_stratify, fold_strategy, fold, fold_shuffle, fold_groups, n_jobs, use_gpu, custom_pipeline, html, session_id, log_experiment, experiment_name, log_plots, log_profile, log_data, silent, verbose, profile, profile_kwargs, display)
   1353             test_data = pd.concat([X_test, y_test], axis=1)
   1354 
-> 1355         train_data = prep_pipe.fit_transform(train_data)
   1356         # workaround to also transform target
   1357         dtypes.final_training_columns.append(target)

/opt/conda/lib/python3.7/site-packages/sklearn/pipeline.py in fit_transform(self, X, y, **fit_params)
    365         """
    366         fit_params_steps = self._check_fit_params(**fit_params)
--> 367         Xt = self._fit(X, y, **fit_params_steps)
    368 
    369         last_step = self._final_estimator

/opt/conda/lib/python3.7/site-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params_steps)
    294                 message_clsname='Pipeline',
    295                 message=self._log_message(step_idx),
--> 296                 **fit_params_steps[name])
    297             # Replace the transformer of the step with the fitted
    298             # transformer. This is necessary when loading the transformer

/opt/conda/lib/python3.7/site-packages/joblib/memory.py in __call__(self, *args, **kwargs)
    347 
    348     def __call__(self, *args, **kwargs):
--> 349         return self.func(*args, **kwargs)
    350 
    351     def call_and_shelve(self, *args, **kwargs):

/opt/conda/lib/python3.7/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
    738     with _print_elapsed_time(message_clsname, message):
    739         if hasattr(transformer, 'fit_transform'):
--> 740             res = transformer.fit_transform(X, y, **fit_params)
    741         else:
    742             res = transformer.fit(X, y, **fit_params).transform(X)

/opt/conda/lib/python3.7/site-packages/pycaret/internal/preprocess.py in fit_transform(self, data, y)
   1148         self, data, y=None
   1149     ):  # There is no transformation happening in training data set, its all about test
-> 1150         self.fit(data)
   1151         return data
   1152 

/opt/conda/lib/python3.7/site-packages/pycaret/internal/preprocess.py in fit(self, data, y)
   1112             if self.replacement_strategy == "least frequent":
   1113                 self.ph_train_level.loc[0, i] = list(
-> 1114                     data[i].value_counts().sort_values().index
   1115                 )
   1116             else:

/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py in __setitem__(self, key, value)
    721 
    722         iloc = self if self.name == "iloc" else self.obj.iloc
--> 723         iloc._setitem_with_indexer(indexer, value, self.name)
    724 
    725     def _validate_key(self, key, axis: int):

/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py in _setitem_with_indexer(self, indexer, value, name)
   1728         if take_split_path:
   1729             # We have to operate column-wise
-> 1730             self._setitem_with_indexer_split_path(indexer, value, name)
   1731         else:
   1732             self._setitem_single_block(indexer, value, name)

/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py in _setitem_with_indexer_split_path(self, indexer, value, name)
   1784 
   1785                 raise ValueError(
-> 1786                     "Must have equal len keys and value "
   1787                     "when setting with an iterable"
   1788                 )

ValueError: Must have equal len keys and value when setting with an iterable

数据集链接是：Kaggle Water，到处都是水，一滴也不喝！

| categoryA | categoryB | categoryC  | categoryD | categoryE | categoryF | featureA    | featureB  | featureC | featureD   | featureE  | featureF | featureG     | featureH  | featureI   | compositionA | compositionB | compositionC | compositionD | compositionE | compositionF | compositionG | compositionH | compositionI | compositionJ | unit     | result   |
| --------- | --------- | ---------- | --------- | --------- | --------- | ----------- | --------- | -------- | ---------- | --------- | -------- | ------------ | --------- | ---------- | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | -------- | -------- | - | - | - | - | - | --- | ----- | ----- | ----- | ----- | ----- |
| catA\_1   | catB\_0   | catC\_718  | catD\_0   | catE\_0   | catF\_0   | 75808.375   | 4.457840  | 0.005718 | 122.299437 | 30.831906 | 0.0      | 2.806036e+12 | 0.000000  | 71176346.0 | 0.0          | 10.00        | 26.00        | 0.0          | 32.00        | 3.0          | 0.00         | 9.00         | 22.26        | 20.00        | unit\_6  | 0.000458 |
| catA\_1   | catB\_0   | catC\_1309 | catD\_0   | catE\_0   | catF\_0   | 75808.375   | 4.457840  | 0.005718 | 122.299437 | 30.831906 | 0.0      | 2.806036e+12 | 0.000000  | 71176346.0 | 0.0          | 10.00        | 26.00        | 0.0          | 32.00        | 3.0          | 0.00         | 9.00         | 22.26        | 20.00        | unit\_6  | 0.000335 |
| catA\_15  | catB\_0   | catC\_1309 | catD\_0   | catE\_0   | catF\_0   | 75808.375   | 4.457840  | 0.005718 | 122.299437 | 30.831906 | 0.0      | 2.806036e+12 | 0.000000  | 71176346.0 | 0.0          | 10.00        | 26.00        | 0.0          | 32.00        | 3.0          | 0.00         | 9.00         | 22.26        | 20.00        | unit\_4  | 0.054072 |
| catA\_0   | catB\_0   | catC\_935  | catD\_0   | catE\_0   | catF\_0   | 75808.375   | 4.457840  | 0.005718 | 122.299437 | 30.831906 | 2.0      | 2.806036e+12 | 0.000000  | 71176346.0 | 0.0          | 10.00        | 26.00        | 0.0          | 32.00        | 3.0          | 0.00         | 9.00         | 22.26        | 20.00        | unit\_5  | 0.061143 |
| catA\_22  | catB\_0   | catC\_1325 | catD\_0   | catE\_2   | catF\_0   | \-40055.250 | 4.363288  | 0.729194 | 93.677197  | 15.047884 | 4.0      | 1.464509e+12 | 87.158924 | 50941692.0 | 0.0          | 8.00         | 14.00        | 0.0          | 49.00        | 3.0          | 2.00         | 9.00         | 16.84        | 15.00        | unit\_15 | 0.015439 |
| ...       | ...       | ...        | ...       | ...       | ...       | ...         | ...       | ...      | ...        | ...       | ...      | ...          | ...       | ...        | ...          | ...          | ...          | ...          | ...          | ...          | ...          | ...          | ...          | ...          | ...      | ...      |
| catA\_3   | catB\_0   | catC\_639  | catD\_0   | catE\_0   | catF\_0   | 75808.375   | 4.457840  | 0.005718 | 122.299437 | 30.831906 | 0.0      | 2.806036e+12 | 0.000000  | 71176346.0 | 0.0          | 10.00        | 26.00        | 0.0          | 32.00        | 3.0          | 0.00         | 9.00         | 22.26        | 20.00        | unit\_1  | 0.008260 |
| catA\_8   | catB\_0   | catC\_445  | catD\_0   | catE\_0   | catF\_0   | 75808.375   | 4.457840  | 0.005718 | 122.299437 | 30.831906 | 0.0      | 2.806036e+12 | 0.000000  | 71176346.0 | 0.0          | 10.00        | 26.00        | 0.0          | 32.00        | 3.0          | 0.00         | 9.00         | 22.26        | 20.00        | unit\_9  | 0.002395 |
| catA\_25  | catB\_0   | catC\_445  | catD\_0   | catE\_0   | catF\_1   | 75808.375   | 4.457840  | 0.005718 | 122.299437 | 30.831906 | 0.0      | 2.806036e+12 | 0.000000  | 71176346.0 | 0.0          | 10.00        | 26.00        | 0.0          | 32.00        | 3.0          | 0.00         | 9.00         | 22.26        | 20.00        | unit\_12 | 0.150982 |
| catA\_7   | catB\_0   | catC\_2322 | catD\_0   | catE\_5   | catF\_0   | 18927.000   | 21.007457 | 0.000387 | 18.148547  | 14.177182 | 0.0      | 2.526922e+11 | 0.000000  | 2147000.0  | 0.0          | 9.24         | 13.13        | 0.0          | 35.88        | 1.7          | 0.73         | 1.45         | 28.12        | 37.83        | unit\_8  | 0.488908 |
| catA\_9   | catB\_0   | catC\_2322 | catD\_0   | catE\_5   | catF\_0   | 18927.000   | 21.007457 | 0.000387 | 18.148547  | 14.177182 | 0.0      | 2.526922e+11 | 0.000000  | 2147000.0  | 0.0          | 9.24         | 13.13        | 0.0          | 35.88        | 1.7          | 0.73         | 1.45         | 28.12        | 37.83        | unit\_10 | 0.232833 |

目标列是result

python - Pycaret 设置错误 使用可迭代设置时必须具有相等的 len 键和值

0 回答 0

Related

Reference

python - Pycaret 设置错误使用可迭代设置时必须具有相等的 len 键和值