我是 scikits-learn 的新手,我想使用cross_validation.cross_val_score
withmetrics.precision_recall_fscore_support
这样我就可以获得所有相关的交叉验证指标,而不必为了准确性、一次为了精度、一次为了召回和一次为了准确而运行我的交叉验证f1。但是当我尝试这个时,我得到一个 ValueError:
from sklearn.datasets import fetch_20newsgroups
from sklearn.svm import LinearSVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import metrics
from sklearn import cross_validation
import numpy as np
data_train = fetch_20newsgroups(subset='train', #categories=categories,
shuffle=True, random_state=42)
clf = LinearSVC(loss='l1', penalty='l2')
vectorizer = TfidfVectorizer(
sublinear_tf=False,
max_df=0.5,
min_df=2,
ngram_range = (1,1),
use_idf=False,
stop_words='english')
X_train = vectorizer.fit_transform(data_train.data)
# Cross-validate:
scores = cross_validation.cross_val_score(
clf, X_train, data_train.target, cv=5,
scoring=metrics.precision_recall_fscore_support)
这是错误:
File "<stdin>", line 3, in <module>
File "sklearn/cross_validation.py", line 1148, in cross_val_score
for train, test in cv)
File "sklearn/externals/joblib/parallel.py", line 514, in __call__
self.dispatch(function, args, kwargs)
File "sklearn/externals/joblib/parallel.py", line 311, in dispatch
job = ImmediateApply(func, args, kwargs)
File "sklearn/externals/joblib/parallel.py", line 135, in __init__
self.results = func(*args, **kwargs)
File "sklearn/cross_validation.py", line 1075, in _cross_val_score
score = scorer(estimator, X_test, y_test)
File "sklearn/metrics/metrics.py", line 1261, in precision_recall_fscore_support
print beta
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
注意,您需要 .14-git 版本才能在cross_validation.cross_val_score
.
import sklearn
sklearn.__version__
'0.14-git'