0

我正在使用 RFECV 训练一些数据,以通过适当数量的功能获得最佳精度。但我一直收到与标题中提到的相同的错误。下面是代码。

import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.cross_validation import StratifiedKFold
from sklearn.feature_selection import RFECV
import scipy.io as sio
import numpy as np
from sklearn.metrics import roc_curve,auc

data=sio.loadmat('B.mat')#B is trainData,shape (68L,160L)
X=data['B']

label=sio.loadmat('label.mat')#label is the target values,shape(68L,)
y=label['label'].reshape(68)
# Create the RFE object and compute a cross-validated score.
cv=StratifiedKFold(y,7)
random_state = np.random.RandomState(0)
svc = SVC(kernel="linear",probability=True,random_state=random_state)
# The "accuracy" scoring is proportional to the number of correct
# classifications
rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(y, 7),
              scoring='accuracy')


#rfecv.fit(trainData,trainLabel)

for i, (train, test) in enumerate(cv):
    probas_ = rfecv.fit(X[train], y[train]).predict_proba(X[test])
    # Compute ROC curve and area the curve
    fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
    #mean_tpr += interp(mean_fpr, fpr, tpr)
    #mean_tpr[0] = 0.0
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))


print("Optimal number of features : %d" % rfecv.n_features_)

# Plot number of features VS. cross-validation scores
plt.figure()
plt.xlabel("Number of features selected")
plt.ylabel("Cross validation score (nb of correct classifications)")
plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)
plt.show()

错误发生在 中probas_ = rfecv.fit(X[train], y[train]).predict_proba(X[test]),我真的不知道错误是什么。整个回溯如下:

Traceback (most recent call last):
  File "G:/zhouzhen/python/plot_rfe_with_cross_validation.py", line 37, in <module>
    probas_ = rfecv.fit(X[train], y[train]).predict_proba(X[test])
  File "C:\Anaconda2\lib\site-packages\sklearn\feature_selection\rfe.py", line 416, in fit
    X_train, y_train = _safe_split(self.estimator, X, y, train)
  File "C:\Anaconda2\lib\site-packages\sklearn\cross_validation.py", line 1591, in _safe_split
    X_subset = safe_indexing(X, indices)
  File "C:\Anaconda2\lib\site-packages\sklearn\utils\__init__.py", line 163, in safe_indexing
    return X.take(indices, axis=0)
IndexError: index 58 is out of bounds for size 58

Process finished with exit code 1

我对此进行了调试,发现它令人困惑。

4

0 回答 0