我编译了这个例子。
https://developer.apple.com/documentation/vision/recognizing_objects_in_live_capture
它在 iPhone 7 Plus 上对我来说无法正常工作。绘制的矩形未覆盖检测到的项目。
我创建了自己的应用程序进行调查。检测到的对象作为标准化边界返回。但是,边界在 Y 方向上可以为负。添加 0.2 的校正会使它们重新对齐。
检测似乎是从纵向框架的中心裁剪一个正方形来进行检测。我创建了一个正方形叠加层,当对象移出正方形到顶部或底部时,检测停止。正方形的顶部和底部在归一化坐标中分别为 0 和 1.0。
测试应用程序将数据从 传递captureOutput
到VNImageRequestHandler
. 设置请求的代码也在下面。知道为什么观察结果有时在 Y 方向上是负的吗?为什么我需要添加偏移量才能将它们带回单位正方形并将它们与图像对齐?
我在我的测试应用程序中将相机设置为 4K。尚未尝试任何其他设置。
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
return
}
//let exifOrientation = exifOrientationFromDeviceOrientation()
let exifOrientation = CGImagePropertyOrientation.up
let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: exifOrientation, options: [:])
do {
try imageRequestHandler.perform(self.requests)
} catch {
print(error)
}
}
@discardableResult
func setupVision() -> NSError? {
// Setup Vision parts
let error: NSError! = nil
guard let modelURL = Bundle.main.url(forResource: "ResistorModel", withExtension: "mlmodelc") else {
return NSError(domain: "VisionObjectRecognitionViewController", code: -1, userInfo: [NSLocalizedDescriptionKey: "Model file is missing"])
}
do {
let visionModel = try VNCoreMLModel(for: MLModel(contentsOf: modelURL))
let objectRecognition = VNCoreMLRequest(model: visionModel, completionHandler: { (request, error) in
DispatchQueue.main.async(execute: {
// perform all the UI updates on the main queue
if let results = request.results {
self.drawVisionRequestResults(results)
}
})
})
self.requests = [objectRecognition]
} catch let error as NSError {
print("Model loading went wrong: \(error)")
}
return error
}
func drawVisionRequestResults(_ results: [Any]) {
var pipCreated = false
CATransaction.begin()
CATransaction.setValue(kCFBooleanTrue, forKey: kCATransactionDisableActions)
detectionOverlay.sublayers = nil // remove all the old recognized objects
for observation in results where observation is VNRecognizedObjectObservation {
guard let objectObservation = observation as? VNRecognizedObjectObservation else {
continue
}
// Select only the label with the highest confidence.
let topLabelObservation = objectObservation.labels[0]
if topLabelObservation.identifier == "resistor" {
if (objectObservation.boundingBox.minX < 0.5) && (objectObservation.boundingBox.maxX > 0.5) && (objectObservation.boundingBox.minY < 0.3) && (objectObservation.boundingBox.maxY > 0.3) {
//print(objectObservation.boundingBox.minX)
//print(objectObservation.boundingBox.minY)
let bb = CGRect(x: objectObservation.boundingBox.minX, y:0.8 - objectObservation.boundingBox.maxY, width: objectObservation.boundingBox.width, height: objectObservation.boundingBox.height)
//let bb = CGRect(x: 0.5,y: 0.5,width: 0.5,height: 0.5)
//let objectBounds = VNImageRectForNormalizedRect(bb, 500, 500)
let objectBounds = VNImageRectForNormalizedRect(bb, Int(detectionOverlay.bounds.width), Int(detectionOverlay.bounds.width))
// print(objectBounds)
// print(objectBounds.minX)
// print(objectBounds.minY)
// print(objectBounds.width)
// print(objectBounds.height)
print(objectObservation.boundingBox)
// print(objectBounds.minX)
// print(objectBounds.minY)
// print(objectBounds.width)
// print(objectBounds.height)
let textLayer = self.createTextSubLayerInBounds(objectBounds,
identifier: topLabelObservation.identifier,
confidence: topLabelObservation.confidence)
let shapeLayer = self.createRoundedRectLayerWithBounds(objectBounds)
shapeLayer.addSublayer(textLayer)
detectionOverlay.addSublayer(shapeLayer)
if !pipCreated {
pipCreated = true
let pip = Pip(imageBuffer: self.imageBuffer!)
if self.pip {
pipView.image = pip?.uiImage
} else {
pipView.image = nil
}
}
}
}
}
CATransaction.commit()
doingStuff = false
}