我正在使用 RFECV 训练一些数据,以通过适当数量的功能获得最佳精度。但我一直收到与标题中提到的相同的错误。下面是代码。
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.cross_validation import StratifiedKFold
from sklearn.feature_selection import RFECV
import scipy.io as sio
import numpy as np
from sklearn.metrics import roc_curve,auc
data=sio.loadmat('B.mat')#B is trainData,shape (68L,160L)
X=data['B']
label=sio.loadmat('label.mat')#label is the target values,shape(68L,)
y=label['label'].reshape(68)
# Create the RFE object and compute a cross-validated score.
cv=StratifiedKFold(y,7)
random_state = np.random.RandomState(0)
svc = SVC(kernel="linear",probability=True,random_state=random_state)
# The "accuracy" scoring is proportional to the number of correct
# classifications
rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(y, 7),
scoring='accuracy')
#rfecv.fit(trainData,trainLabel)
for i, (train, test) in enumerate(cv):
probas_ = rfecv.fit(X[train], y[train]).predict_proba(X[test])
# Compute ROC curve and area the curve
fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
#mean_tpr += interp(mean_fpr, fpr, tpr)
#mean_tpr[0] = 0.0
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))
print("Optimal number of features : %d" % rfecv.n_features_)
# Plot number of features VS. cross-validation scores
plt.figure()
plt.xlabel("Number of features selected")
plt.ylabel("Cross validation score (nb of correct classifications)")
plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)
plt.show()
错误发生在 中probas_ = rfecv.fit(X[train], y[train]).predict_proba(X[test])
,我真的不知道错误是什么。整个回溯如下:
Traceback (most recent call last):
File "G:/zhouzhen/python/plot_rfe_with_cross_validation.py", line 37, in <module>
probas_ = rfecv.fit(X[train], y[train]).predict_proba(X[test])
File "C:\Anaconda2\lib\site-packages\sklearn\feature_selection\rfe.py", line 416, in fit
X_train, y_train = _safe_split(self.estimator, X, y, train)
File "C:\Anaconda2\lib\site-packages\sklearn\cross_validation.py", line 1591, in _safe_split
X_subset = safe_indexing(X, indices)
File "C:\Anaconda2\lib\site-packages\sklearn\utils\__init__.py", line 163, in safe_indexing
return X.take(indices, axis=0)
IndexError: index 58 is out of bounds for size 58
Process finished with exit code 1
我对此进行了调试,发现它令人困惑。