我一直在尝试一个我感兴趣的代码项目。我的数据集来自外汇市场,有 10 个特征,超过 70000 个属性,并且已经被分成一个训练集和测试集,但是我的 CatboostRegressor 一直出现这个错误。我需要对我的数据集做什么才能使回归器工作?或者是别的什么??
from catboost import CatBoostRegressor, Pool
train_data = Pool(zar_train, label=['bidclose', 'askclose'])
test_data = Pool(zar_test, label=['bidclose', 'askclose'])
eval_data = zar_val
eval_dataset = Pool(eval_data, label=['bidclose', 'askclose'])
model = CatBoostRegressor(learning_rate=0.03,
custom_metric=['Logloss',
'AUC:hints=skip_train~false'], score_function='Accuracy')
model.fit(train_data, test_data)
print(model.get_best_score())
错误:
---------------------------------------------------------------------------
CatBoostError Traceback (most recent call last)
<ipython-input-8-e0aa9e711bf9> in <module>
1 from catboost import CatBoostRegressor, Pool
----> 2 train_data = Pool(zar_train, label=['bidclose', 'askclose'])
3 test_data = Pool(zar_test, label=['bidclose', 'askclose'])
4 eval_data = zar_val
5 eval_dataset = Pool(eval_data, label=['bidclose', 'askclose'])
~\anaconda3\lib\site-packages\catboost\core.py in __init__(self, data, label, cat_features, text_features, embedding_features, column_description, pairs, delimiter, has_header, ignore_csv_quoting, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, feature_names, thread_count, log_cout, log_cerr)
615 )
616
--> 617 self._init(data, label, cat_features, text_features, embedding_features, pairs, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, feature_names, thread_count)
618 super(Pool, self).__init__()
619
~\anaconda3\lib\site-packages\catboost\core.py in _init(self, data, label, cat_features, text_features, embedding_features, pairs, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, feature_names, thread_count)
1085 if len(np.shape(label)) == 1:
1086 label = np.expand_dims(label, 1)
-> 1087 self._check_label_shape(label, samples_count)
1088 if feature_names is not None:
1089 self._check_feature_names(feature_names, features_count)
~\anaconda3\lib\site-packages\catboost\core.py in _check_label_shape(self, label, samples_count)
730 """
731 if len(label) != samples_count:
--> 732 raise CatBoostError("Length of label={} and length of data={} is different.".format(len(label), samples_count))
733
734 def _check_baseline_type(self, baseline):
CatBoostError: Length of label=2 and length of data=44908 is different.