python - 在 Python 中训练 OpenCV NormalBayesClassifier

Question

我正在尝试使用 NormalBayesClassifier 对 Foscam 9821W 网络摄像头生成的图像进行分类。它们是 1280x720，最初是彩色的，但我将它们转换为灰度进行分类。

我有一些 Python 代码（在http://pastebin.com/YxYWRMGs上），它试图迭代火腿/垃圾邮件图像集来训练分类器，但是每当我调用 train() OpenCV 都会尝试分配大量内存并抛出异常。

mock@behemoth:~/OpenFos/code/experiments$ ./cvbayes.py --ham=../training/ham --spam=../training/spam
Image is a <type 'numpy.ndarray'> (720, 1280)
...
*** trying to train with 8 images
responses is [2, 2, 2, 2, 2, 2, 1, 1]
OpenCV Error: Insufficient memory (Failed to allocate 6794772480020 bytes) in OutOfMemoryError, file /build/buildd/opencv-2.3.1/modules/core/src/alloc.cpp, line 52
Traceback (most recent call last):
  File "./cvbayes.py", line 124, in <module>
    classifier = cw.train()
  File "./cvbayes.py", line 113, in train
    classifier.train(matrixData,matrixResp)
cv2.error: /build/buildd/opencv-2.3.1/modules/core/src/alloc.cpp:52: error: (-4) Failed to allocate 6794772480020 bytes in function OutOfMemoryError

我对 Python 有经验，但对 OpenCV 是新手，所以我怀疑我错过了一些关键的预处理。

我想使用它的图像示例位于https://mocko.org.uk/choowoos/?m=20130515。我有大量可用的训练数据，但最初我只使用 8 张图像。

有人能告诉我我做错了什么导致 NormalBayesClassifier 爆炸吗？

score 6 · Accepted Answer

最终发现了问题——我使用 NormalBayesClassifier 错误。它并不意味着直接输入数十张高清图像：首先应该使用 OpenCV 的其他算法来处理它们。

我最终做了以下事情： + 将图像裁剪到可能包含对象的区域 + 将图像转换为灰度 + 使用 cv2.goodFeaturesToTrack() 从裁剪区域收集特征以训练分类器。

少数特征对我有用，也许是因为我已经裁剪了图像，并且它很幸运地包含了在一个类中被遮挡的高对比度对象。

以下代码获得了高达 95% 的人口正确率：

#!/usr/bin/env python
# -*- coding: utf-8 -*- 


import cv2
import sys, os.path, getopt
import numpy, random




def _usage():

    print
    print "cvbayes trainer"
    print
    print "Options:"
    print
    print "-m    --ham=     path to dir of ham images"
    print "-s    --spam=    path to dir of spam images"
    print "-h    --help     this help text"
    print "-v    --verbose  lots more output"
    print



def _parseOpts(argv):

    """
    Turn options + args into a dict of config we'll follow.  Merge in default conf.
    """

    try:
        opts, args = getopt.getopt(argv[1:], "hm:s:v", ["help", "ham=", 'spam=', 'verbose'])
    except getopt.GetoptError as err:
        print(err) # will print something like "option -a not recognized"
        _usage()
        sys.exit(2)

    optsDict = {}

    for o, a in opts:
        if o == "-v":
            optsDict['verbose'] = True
        elif o in ("-h", "--help"):
            _usage()
            sys.exit()
        elif o in ("-m", "--ham"):
            optsDict['ham'] = a
        elif o in ('-s', '--spam'):
            optsDict['spam'] = a
        else:
            assert False, "unhandled option"

    for mandatory_arg in ('ham', 'spam'):
        if mandatory_arg not in optsDict:
            print "Mandatory argument '%s' was missing; cannot continue" % mandatory_arg
            sys.exit(0)

    return optsDict     




class ClassifierWrapper(object):

    """
    Setup and encapsulate a naive bayes classifier based on OpenCV's 
    NormalBayesClassifier.  Presently we do not use it intelligently,
    instead feeding in flattened arrays of B&W pixels.
    """

    def __init__(self):
        super(ClassifierWrapper,self).__init__()
        self.classifier     = cv2.NormalBayesClassifier()
        self.data           = []
        self.responses      = []


    def _load_image_features(self, f):
        image_colour    = cv2.imread(f)
        image_crop      = image_colour[327:390, 784:926]        # Use the junction boxes, luke
        image_grey      = cv2.cvtColor(image_crop, cv2.COLOR_BGR2GRAY)
    features        = cv2.goodFeaturesToTrack(image_grey, 4, 0.02, 3)
        return features.flatten()


    def train_from_file(self, f, cl):
        features    = self._load_image_features(f)
        self.data.append(features)
        self.responses.append(cl)


    def train(self, update=False):
        matrix_data     = numpy.matrix( self.data ).astype('float32')
        matrix_resp     = numpy.matrix( self.responses ).astype('float32')
        self.classifier.train(matrix_data, matrix_resp, update=update)
        self.data       = []
        self.responses  = []


    def predict_from_file(self, f):
        features    = self._load_image_features(f)
        features_matrix = numpy.matrix( [ features ] ).astype('float32')
        retval, results = self.classifier.predict( features_matrix )
        return results




if __name__ == "__main__":

    opts = _parseOpts(sys.argv)

    cw = ClassifierWrapper()

    ham     = os.listdir(opts['ham'])
    spam    = os.listdir(opts['spam'])
    n_training_samples  = min( [len(ham),len(spam)])
    print "Will train on %d samples for equal sets" % n_training_samples

    for f in random.sample(ham, n_training_samples):
        img_path    = os.path.join(opts['ham'], f)
        print "ham: %s" % img_path
        cw.train_from_file(img_path, 2)

    for f in random.sample(spam, n_training_samples):
        img_path    = os.path.join(opts['spam'], f)
        print "spam: %s" % img_path
        cw.train_from_file(img_path, 1)

    cw.train()

    print
    print

    # spam dir much bigger so mostly unused, let's try predict() on all of it
    print "predicting on all spam..."
    n_wrong = 0
    n_files = len(os.listdir(opts['spam']))
    for f in os.listdir(opts['spam']):
        img_path    = os.path.join(opts['spam'], f)
        result = cw.predict_from_file(img_path)
        print "%s\t%s" % (result, img_path)
        if result[0][0] == 2:
            n_wrong += 1

    print
    print "got %d of %d wrong = %.1f%%" % (n_wrong, n_files, float(n_wrong)/n_files * 100, )

现在我正在用垃圾邮件的一个随机子集对其进行训练，仅仅是因为它的数量要多得多，而且每个类的训练数据量应该大致相等。如果数据更好（例如，当光照不同时，总是包括黎明和黄昏的样本），它可能会更高。

也许即使是 NormalBayesClassifier 也不是这项工作的错误工具，我应该尝试跨连续帧进行运动检测 - 但至少互联网现在有一个示例可供挑选。

score 1 · Accepted Answer

值得注意的是，它尝试分配的内存量是 (720 * 1280) ^ 2 * 8。我认为这实际上可能是它需要的内存量。

我希望贝叶斯模型可以让您对 train() 进行顺序调用，所以尝试重新采样大小，然后一次在一张图像上调用 train()？

score 0 · Accepted Answer

还有一点需要注意，在最新的 OpenCV 版本中，您可能需要创建一个 NaiveBayseianClassifier，例如

import cv2

Classifier = cv2.ml.NormalBayesClassifier_create()

如下训练分类器，cv2.ml.ROW_SAMPLE 指定如何构建数据矩阵，即按行或按列opencV

classifier.train(matrix_data, cv2.ml.ROW_SAMPLE, matrix_resp)

python - 在 Python 中训练 OpenCV NormalBayesClassifier

3 回答 3

Related

Reference