python - liblinear svm 无法收敛后的内存错误

Question

在收到与 liblinear 无法收敛有关的错误消息后，将生成以下错误跟踪。我试图了解错误指的是什么，以及如何防范它。

svm 来自 SKLearn，这里是设置它的代码。

svc = LinearSVC(class_weight='balanced',verbose=1,max_iter=2000)
train_sizes, train_scores, valid_scores = learning_curve(svc,xtscale,np.ravel(ytran), 
        train_sizes=[10000,20000,30000],scoring=make_scorer(accuracy_score),n_jobs=4,verbose=2)

错误跟踪

    .C:\Python27\lib\site-packages\sklearn\svm\base.py:924: ConvergenceWarning: Liblinear failed to conv
    erge, increase the number of iterations.
      "the number of iterations.", ConvergenceWarning)
    ............[CV] ................................ no parameters to be set -13.2min
    .........Traceback (most recent call last):
      File "C:\MachineLearning\SFCrime\crime.py", line 59, in <module>
        train_sizes=[10000,20000,30000],scoring=make_scorer(accuracy_score),n_jobs=4,verbose=2)
      File "C:\Python27\lib\site-packages\sklearn\learning_curve.py", line 153, in learning_curve
        for train, test in cv for n_train_samples in train_sizes_abs)
      File "C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.py", line 812, in __call__
        self.retrieve()
      File "C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.py", line 762, in retrieve
        raise exception
    sklearn.externals.joblib.my_exceptions.JoblibMemoryError: JoblibMemoryError
    ___________________________________________________________________________
    Multiprocessing exception:
    ...........................................................................
    C:\MachineLearning\SFCrime\crime.py in <module>()
         54         # param_grid = {'C': [0.5, 1, 10]}
         55  #      gs = grid_search.GridSearchCV(svc, param_grid,n_jobs=4,verbose=1)
         56  #      gs.fit(testData[:,0:-2],np.ravel(testData[:,-1]))
         57         #print gs.best_estimator_
         58         train_sizes, train_scores, valid_scores = learning_curve(svc,xtscale,np.ravel(ytran)
    ,
    ---> 59                 train_sizes=[10000,20000,30000],scoring=make_scorer(accuracy_score),n_jobs=4
    ,verbose=2)
         60         #svc.fit(testData[:,0:7],np.ravel(testData[:,7]))
         61         #valData = xydecider[np.random.randint(0,xydecider.shape[0],10000)]
         62         #print svc.predict(xtransf.transform(np.matrix([2015,7,14,8,35, -122.3935620,37.7782
    485])))
         63

    ...........................................................................
    C:\Python27\lib\site-packages\sklearn\learning_curve.py in learning_curve(estimator=LinearSVC(C=1.0,
     class_weight='balanced', dual=T...', random_state=None, tol=0.0001,
         verbose=1), X=array([[  0.00000000e+00,   1.73165030e+00,  -4....603501e-02,   3.21082024e-02,
     -1.99147226e-02]]), y=array([ 1,  2,  2, ...,  3,  5, 13]), train_sizes=[10000, 20000, 30000], cv=[
    (array([107733, 192190, 212425, ..., 878046, 878047, 878048]), array([     0,      1,      2, ..., 4
    65259, 466404, 486091])), (array([     0,      1,      2, ..., 878046, 878047, 878048]), array([1077
    33, 192190, 212425, ..., 718561, 718572, 718573])), (array([     0,      1,      2, ..., 718561, 718
    572, 718573]), array([316490, 337880, 481804, ..., 878046, 878047, 878048]))], scoring=make_scorer(a
    ccuracy_score), exploit_incremental_learning=False, n_jobs=4, pre_dispatch='all', verbose=2)
        148             scorer, verbose) for train, test in cv)
        149     else:
        150         out = parallel(delayed(_fit_and_score)(
        151             clone(estimator), X, y, scorer, train[:n_train_samples], test,
        152             verbose, parameters=None, fit_params=None, return_train_score=True)
    --> 153             for train, test in cv for n_train_samples in train_sizes_abs)
            cv = [(array([107733, 192190, 212425, ..., 878046, 878047, 878048]), array([     0,      1,
         2, ..., 465259, 466404, 486091])), (array([     0,      1,      2, ..., 878046, 878047, 878048]
    ), array([107733, 192190, 212425, ..., 718561, 718572, 718573])), (array([     0,      1,      2, ..
    ., 718561, 718572, 718573]), array([316490, 337880, 481804, ..., 878046, 878047, 878048]))]
        154         out = np.array(out)[:, :2]
        155         n_cv_folds = out.shape[0] // n_unique_ticks
        156         out = out.reshape(n_cv_folds, n_unique_ticks, 2)
        157

    ...........................................................................
    C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=
    4), iterable=<generator object <genexpr>>)
        807             if pre_dispatch == "all" or n_jobs == 1:
        808                 # The iterable was consumed all at once by the above for loop.
        809                 # No need to wait for async callbacks to trigger to
        810                 # consumption.
        811                 self._iterating = False
    --> 812             self.retrieve()
            self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=4)>
        813             # Make sure that we get a last message telling us we are done
        814             elapsed_time = time.time() - self._start_time
        815             self._print('Done %3i out of %3i | elapsed: %s finished',
        816                         (len(self._output), len(self._output),

    ---------------------------------------------------------------------------
    Sub-process traceback:
    ---------------------------------------------------------------------------
    MemoryError                                        Wed Jan 13 12:56:08 2016
    PID: 5784                             Python 2.7.10: C:\Python27\python.exe
    ...........................................................................
    C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.pyc in __call__(self=<sklearn.extern
    als.joblib.parallel.BatchedCalls object>)
         67     def __init__(self, iterator_slice):
         68         self.items = list(iterator_slice)
         69         self._size = len(self.items)
         70
         71     def __call__(self):
    ---> 72         return [func(*args, **kwargs) for func, args, kwargs in self.items]
         73
         74     def __len__(self):
         75         return self._size
         76

    ...........................................................................
    C:\Python27\lib\site-packages\sklearn\cross_validation.pyc in _fit_and_score(estimator=LinearSVC(C=1
    .0, class_weight='balanced', dual=T...', random_state=None, tol=0.0001,
         verbose=1), X=memmap([[  0.00000000e+00,   1.73165030e+00,  -4...603501e-02,   3.21082024e-02,
     -1.99147226e-02]]), y=memmap([ 1,  2,  2, ...,  3,  5, 13]), scorer=make_scorer(accuracy_score), tr
    ain=array([107733, 192190, 212425, ..., 309456, 309457, 309460]), test=memmap([     0,      1,
    2, ..., 465259, 466404, 486091]), verbose=2, parameters=None, fit_params={}, return_train_score=True
    , return_parameters=False, error_score='raise')
       1519     if parameters is not None:
       1520         estimator.set_params(**parameters)
       1521
       1522     start_time = time.time()
       1523
    -> 1524     X_train, y_train = _safe_split(estimator, X, y, train)
       1525     X_test, y_test = _safe_split(estimator, X, y, test, train)
       1526
       1527     try:
       1528         if y_train is None:

    ...........................................................................
    C:\Python27\lib\site-packages\sklearn\cross_validation.pyc in _safe_split(estimator=LinearSVC(C=1.0,
     class_weight='balanced', dual=T...', random_state=None, tol=0.0001,
         verbose=1), X=memmap([[  0.00000000e+00,   1.73165030e+00,  -4...603501e-02,   3.21082024e-02,
     -1.99147226e-02]]), y=memmap([ 1,  2,  2, ...,  3,  5, 13]), indices=array([107733, 192190, 212425,
     ..., 309456, 309457, 309460]), train_indices=None)
       1586             if train_indices is None:
       1587                 X_subset = X[np.ix_(indices, indices)]
       1588             else:
       1589                 X_subset = X[np.ix_(indices, train_indices)]
       1590         else:
    -> 1591             X_subset = safe_indexing(X, indices)
       1592
       1593     if y is not None:
       1594         y_subset = safe_indexing(y, indices)
       1595     else:

    ...........................................................................
    C:\Python27\lib\site-packages\sklearn\utils\__init__.pyc in safe_indexing(X=memmap([[  0.00000000e+0
    0,   1.73165030e+00,  -4...603501e-02,   3.21082024e-02,  -1.99147226e-02]]), indices=array([107733,
     192190, 212425, ..., 309456, 309457, 309460]))
        158             return X.copy().iloc[indices]
        159     elif hasattr(X, "shape"):
        160         if hasattr(X, 'take') and (hasattr(indices, 'dtype') and
        161                                    indices.dtype.kind == 'i'):
        162             # This is often substantially faster than X[indices]
    --> 163             return X.take(indices, axis=0)
        164         else:
        165             return X[indices]
        166     else:
        167         return [X[idx] for idx in indices]

    MemoryError:
    ______________

_____________________________________________________________

score 0 · Accepted Answer

尝试增加您的迭代值。可能是 SVM 需要比您允许的更多的迭代才能收敛。另外，如果 liblinear 无法收敛到您的数据，我会尝试使用 sklearn.svm.SVC 作为不同的算法。

python - liblinear svm 无法收敛后的内存错误

1 回答 1

Related

Reference