你好我对 Swift 比较陌生,还在学习新东西。我想要构建一个文本识别器,从图像中过滤出所有数字并对其进行排序。例如,当我有一张包含多个号码的信用卡时。我如何识别特定字段(IBAN、BIC 等)。最后,用户界面中的这个字段应该自动填充卡片输入图像。我的第一种方法是构建主要的文本识别器。那行得通。但我不知道,如何继续对为图像捕获的所有数据进行排序。有小费吗?谢谢
struct ScanDocumentView: UIViewControllerRepresentable {
@Environment(\.presentationMode) var presentationMode
@Binding var recognizedText: String
@Binding var recognizedImage: Image
func makeCoordinator() -> Coordinator {
Coordinator(recognizedText: $recognizedText, recognizedImage: $recognizedImage, parent: self)
}
func makeUIViewController(context: Context) -> VNDocumentCameraViewController {
let documentViewController = VNDocumentCameraViewController()
documentViewController.delegate = context.coordinator
return documentViewController
}
func updateUIViewController(_ uiViewController: VNDocumentCameraViewController, context: Context) {
// nothing to do here
}
class Coordinator: NSObject, VNDocumentCameraViewControllerDelegate {
var recognizedText: Binding<String>
var recognizedImage: Binding<Image>
var parent: ScanDocumentView
init(recognizedText: Binding<String>, recognizedImage: Binding<Image>,parent: ScanDocumentView) {
self.recognizedText = recognizedText
self.recognizedImage = recognizedImage
self.parent = parent
}
func documentCameraViewController(_ controller: VNDocumentCameraViewController, didFinishWith scan: VNDocumentCameraScan) {
let extractedImages = extractImages(from: scan)
let processedText = recognizeText(from: extractedImages)
recognizedText.wrappedValue = processedText
let originalImage = scan.imageOfPage(at: 0)
let newImage = compressedImage(originalImage)
processImage(newImage)
parent.presentationMode.wrappedValue.dismiss()
}
fileprivate func extractImages(from scan: VNDocumentCameraScan) -> [CGImage] {
var extractedImages = [CGImage]()
for index in 0..<scan.pageCount {
let extractedImage = scan.imageOfPage(at: index)
guard let cgImage = extractedImage.cgImage else { continue }
extractedImages.append(cgImage)
}
return extractedImages
}
private func processImage(_ uiImage: UIImage) {
recognizedImage.wrappedValue = Image(uiImage: uiImage)
}
func compressedImage(_ originalImage: UIImage) -> UIImage {
guard let imageData = originalImage.jpegData(compressionQuality: 1),
let reloadedImage = UIImage(data: imageData) else {
return originalImage
}
return reloadedImage
}
fileprivate func recognizeText(from images: [CGImage]) -> String {
var entireRecognizedText = ""
let recognizeTextRequest = VNRecognizeTextRequest { (request, error) in
guard error == nil else { return }
guard let observations = request.results as? [VNRecognizedTextObservation] else { return }
let maximumRecognitionCandidates = 1
for observation in observations {
guard let candidate = observation.topCandidates(maximumRecognitionCandidates).first else { continue }
entireRecognizedText += "\(candidate.string)\n"
}
}
recognizeTextRequest.recognitionLevel = .accurate
for image in images {
let requestHandler = VNImageRequestHandler(cgImage: image, options: [:])
try? requestHandler.perform([recognizeTextRequest])
}
return entireRecognizedText