1

我正在尝试构建一个能够从任何文档/卡片上读取文本的文档扫描仪。但是,它有时无法正确识别信用卡上的文本。准确度不错,但肯定有改进的余地。我使用了 VisionTextRecognition 框架并使用了所有适合设置文本识别的标准设置。

这就是我必须设置的文本识别请求

textRecognitionRequest = VNRecognizeTextRequest(completionHandler: { (request, error) in
            if let results = request.results, !results.isEmpty {
                if let requestResults = request.results as? [VNRecognizedTextObservation] {
                    var foundText = ""
                    for observation in recognizedText {
                        guard let candidate = observation.topCandidates(1).first else { continue }
                     foundText.append(candidate.string + "\n")
                    }
                }
            }
        }) 
        textRecognitionRequest.recognitionLevel = .accurate
        textRecognitionRequest.usesLanguageCorrection = true

有没有人有任何建议通过在某些时候通过预处理或后处理扫描以编程方式改进识别?

4

1 回答 1

2

更新:我做了一个完全开源的项目,可以帮助你做你需要的。看看:https ://github.com/ethanwa/credit-card-scanner-and-validator

**

除了添加一些预设值来专门查找之外,您无法做太多事情来提高准确性,这对 CC 编号没有意义,因此我什至不会费心展示该代码。您需要依靠 Apple 来改进他们的文本识别模型,因为 iOS 会对其进行迭代以使其真正得到改进。

同时,我建议您可以做以下两件事:

  1. 对您认为收到的信用卡号进行验证。例如,Visa 以 4 开头,MasterCard 以 5 开头,Discover 以 6 开头,Amex 以 3 开头,等等。它们有特定的长度等等。见这里:https ://www.freeformatter.com/credit-card-number-generator-validator.html

  2. 在相机源上一遍又一遍地迭代,直到你得到一个验证的数字。我不确定您当前是否只是在为卡拍照并处理该图像(听起来像您正在做的那样),但您应该每秒处理许多图像,直到获得有效的 CC。这很可能是 Apple 在手机上通过 Apple Pay 添加卡或使用银行应用程序以数字方式存入支票(查找有效路由和帐号)时的做法。

这是我的意思的一个例子......

我编写了这段代码,它可以在任何给定的文本中挑选和验证 ISBN 编号(基本上是 10 和 13 位编目书籍的数字,其中有一个校验位用于验证),并将继续查找,直到找到所有数字然后验证。它工作得非常好,而且速度非常快。查看这个Swift 5.3代码:

import UIKit
import Vision
import Photos
import AVFoundation

class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
    
    var recognizedText = ""
    var finalText = ""
    var image: UIImage?
    var processing = false
    
    @IBOutlet weak var nameLabel: UILabel!
    @IBOutlet weak var setLabel: UILabel!
    @IBOutlet weak var numberLabel: UILabel!
    
    lazy var textDetectionRequest: VNRecognizeTextRequest = {
        let request = VNRecognizeTextRequest(completionHandler: self.handleDetectedText)
        request.recognitionLevel = .accurate
        request.usesLanguageCorrection = false
        return request
    }()
    
    private let videoOutput = AVCaptureVideoDataOutput()
    private let captureSession = AVCaptureSession()
    private lazy var previewLayer: AVCaptureVideoPreviewLayer = {
        let preview = AVCaptureVideoPreviewLayer(session: self.captureSession)
        preview.videoGravity = .resizeAspect
        return preview
    }()

    // MARK: AV
    
    override func viewDidLoad() {
        super.viewDidLoad()
        self.addCameraInput()
        self.addVideoOutput()
    }
    
    private func addCameraInput() {
        let device = AVCaptureDevice.default(for: .video)!
        let cameraInput = try! AVCaptureDeviceInput(device: device)
        self.captureSession.addInput(cameraInput)
    }
    
    override func viewDidLayoutSubviews() {
        super.viewDidLayoutSubviews()
        self.previewLayer.frame = self.view.bounds
    }
    
    private func addVideoOutput() {
        self.videoOutput.videoSettings = [(kCVPixelBufferPixelFormatTypeKey as NSString) : NSNumber(value: kCVPixelFormatType_32BGRA)] as [String : Any]
        self.videoOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "my.image.handling.queue"))
        self.captureSession.addOutput(self.videoOutput)
    }
    
    func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection)
    {
        if !processing
        {
            guard let frame = CMSampleBufferGetImageBuffer(sampleBuffer) else {
                debugPrint("unable to get image from sample buffer")
                return
            }
            print("did receive image frame")
            // process image here
        
            self.processing = true
            
            let ciimage : CIImage = CIImage(cvPixelBuffer: frame)
            let theimage : UIImage = self.convert(cmage: ciimage)
            
            self.image = theimage
            processImage()
        }
    }

    // Convert CIImage to CGImage
    func convert(cmage:CIImage) -> UIImage
    {
         let context:CIContext = CIContext.init(options: nil)
         let cgImage:CGImage = context.createCGImage(cmage, from: cmage.extent)!
         let image:UIImage = UIImage.init(cgImage: cgImage)
         return image
    }
    
    // AV
    
    func processImage()
    {
        DispatchQueue.main.async {
            self.nameLabel.text = ""
            self.setLabel.text = ""
            self.numberLabel.text = ""
        }
        
        guard let image = image, let cgImage = image.cgImage else { return }
        
        let requests = [textDetectionRequest]
        let imageRequestHandler = VNImageRequestHandler(cgImage: cgImage, orientation: .right, options: [:])
        DispatchQueue.global(qos: .userInitiated).async {
            do {
                try imageRequestHandler.perform(requests)
            } catch let error {
                print("Error: \(error)")
            }
        }
    }
    
    fileprivate func handleDetectedText(request: VNRequest?, error: Error?)
    {
        self.finalText = ""
        
        if let error = error {
            print(error.localizedDescription)
            self.processing = false
            return
        }
        guard let results = request?.results, results.count > 0 else {
            print("No text was found.")
            self.processing = false
            return
        }

        if let requestResults = request?.results as? [VNRecognizedTextObservation] {
            self.recognizedText = ""
            for observation in requestResults {
                guard let candidiate = observation.topCandidates(1).first else { return }
                self.recognizedText += candidiate.string
                self.recognizedText += " "
            }
            
            var replaced = self.recognizedText.replacingOccurrences(of: "-", with: "")
            replaced = String(replaced.filter { !"\n\t\r".contains($0) })
            let replacedArr = replaced.components(separatedBy: " ")
            
            for here in replacedArr
            {
                let final = here.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)

                if (final.count == 10 || final.count == 13) && final.containsISBNnums && Validate.isbn(final) // validate barcode
                {
                    self.finalText += final
                    print(final)
                    self.captureSession.stopRunning()
                    DispatchQueue.main.async {
                        self.previewLayer.removeFromSuperlayer()
                    }
                    break
                }
            }

            DispatchQueue.main.async {
                self.numberLabel.text = self.finalText
            }
        }
        
        self.processing = false
    }
    
    // MARK: Buttons

    // This is a live camera view that will start a capture session
    @IBAction func takePhoto(_ sender: Any) {
        self.view.layer.addSublayer(self.previewLayer)
        self.captureSession.startRunning()
    }
    
    @IBAction func choosePhoto(_ sender: Any) {
        presentPhotoPicker(type: .photoLibrary)
    }
    
    fileprivate func presentPhotoPicker(type: UIImagePickerController.SourceType) {
        let controller = UIImagePickerController()
        controller.sourceType = type
        controller.delegate = self
        present(controller, animated: true, completion: nil)
    }
}

extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate {
    
    func imagePickerControllerDidCancel(_ picker: UIImagePickerController) {
        dismiss(animated: true, completion: nil)
    }
    
    func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [UIImagePickerController.InfoKey : Any]) {
        
        dismiss(animated: true, completion: nil)
        image = info[.originalImage] as? UIImage
        processImage()
    }
}

extension String {
    var containsISBNnums: Bool {
        guard self.count > 0 else { return false }
        let nums: Set<Character> = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "X"]
        return Set(self).isSubset(of: nums)
    }
}
于 2020-11-24T09:31:30.927 回答