我的想法:获取本地视频的每一帧。然后将帧转换为 CGImage
guard let cgImage = self.imageFromFrame(sampleBuffer) else{return}
然后使用 humanBodyPoseRequest 从 CGImage 中检测身体
let imageRequestHandler = VNImageRequestHandler(cgImage: cgImage)
// Use Vision to find human body poses in the frame.
do { try imageRequestHandler.perform([self.humanBodyPoseRequest]) } catch {
assertionFailure("Human Pose Request failed: \(error)")
}
let poses = Pose.fromObservations(self.humanBodyPoseRequest.results)
然后在CGImage上绘制检测到的位姿,然后获取UIImage并设置为UIImageView
private func drawPoses(_ poses: [Pose]?, onto frame: CGImage) {
print("+++++++++++++++++++++")
// Create a default render format at a scale of 1:1.
let renderFormat = UIGraphicsImageRendererFormat()
renderFormat.scale = 1.0
// Create a renderer with the same size as the frame.
let frameSize = CGSize(width: frame.width, height: frame.height)
let poseRenderer = UIGraphicsImageRenderer(size: frameSize,
format: renderFormat)
// Draw the frame first and then draw pose wireframes on top of it.
let frameWithPosesRendering = poseRenderer.image { rendererContext in
// The`UIGraphicsImageRenderer` instance flips the Y-Axis presuming
// we're drawing with UIKit's coordinate system and orientation.
let cgContext = rendererContext.cgContext
// Get the inverse of the current transform matrix (CTM).
let inverse = cgContext.ctm.inverted()
// Restore the Y-Axis by multiplying the CTM by its inverse to reset
// the context's transform matrix to the identity.
cgContext.concatenate(inverse)
// Draw the camera image first as the background.
let imageRectangle = CGRect(origin: .zero, size: frameSize)
cgContext.draw(frame, in: imageRectangle)
// Create a transform that converts the poses' normalized point
// coordinates `[0.0, 1.0]` to properly fit the frame's size.
let pointTransform = CGAffineTransform(scaleX: frameSize.width,
y: frameSize.height)
guard let poses = poses else { return }
// Draw all the poses Vision found in the frame.
for pose in poses {
// Draw each pose as a wireframe at the scale of the image.
pose.drawWireframeToContext(cgContext, applying: pointTransform)
}
}
// Update the UI's full-screen image view on the main thread.
print("??????????????????????????")
// self.uiimages.append(frameWithPosesRendering)
DispatchQueue.main.async { self.imageView!.image = frameWithPosesRendering }
}
问题是画图后,视频变慢了,请看效果视频https://youtu.be/vYj6f4bKrCU,下面是我所有的代码:
import AVFoundation
import UIKit
import Vision
class VideoViewController: UIViewController {
private var imageView: UIImageView?
var videoUrl: URL? // use your own url
var frames: [CGImage]?
var uiimages: [UIImage] = []
private var generator: AVAssetImageGenerator?
private let humanBodyPoseRequest = VNDetectHumanBodyPoseRequest()
private func drawPoses(_ poses: [Pose]?, onto frame: CGImage) {
print("+++++++++++++++++++++")
// Create a default render format at a scale of 1:1.
let renderFormat = UIGraphicsImageRendererFormat()
renderFormat.scale = 1.0
// Create a renderer with the same size as the frame.
let frameSize = CGSize(width: frame.width, height: frame.height)
let poseRenderer = UIGraphicsImageRenderer(size: frameSize,
format: renderFormat)
// Draw the frame first and then draw pose wireframes on top of it.
let frameWithPosesRendering = poseRenderer.image { rendererContext in
// The`UIGraphicsImageRenderer` instance flips the Y-Axis presuming
// we're drawing with UIKit's coordinate system and orientation.
let cgContext = rendererContext.cgContext
// Get the inverse of the current transform matrix (CTM).
let inverse = cgContext.ctm.inverted()
// Restore the Y-Axis by multiplying the CTM by its inverse to reset
// the context's transform matrix to the identity.
cgContext.concatenate(inverse)
// Draw the camera image first as the background.
let imageRectangle = CGRect(origin: .zero, size: frameSize)
cgContext.draw(frame, in: imageRectangle)
// Create a transform that converts the poses' normalized point
// coordinates `[0.0, 1.0]` to properly fit the frame's size.
let pointTransform = CGAffineTransform(scaleX: frameSize.width,
y: frameSize.height)
guard let poses = poses else { return }
// Draw all the poses Vision found in the frame.
for pose in poses {
// Draw each pose as a wireframe at the scale of the image.
pose.drawWireframeToContext(cgContext, applying: pointTransform)
}
}
// Update the UI's full-screen image view on the main thread.
print("??????????????????????????")
// self.uiimages.append(frameWithPosesRendering)
DispatchQueue.main.async { self.imageView!.image = frameWithPosesRendering }
}
private func imageFromFrame(_ buffer: Frame) -> CGImage? {
guard let imageBuffer = buffer.imageBuffer else {
print("The frame doesn't have an underlying image buffer.")
return nil
}
// Create a Core Image context.
let ciContext = CIContext(options: nil)
// Create a Core Image image from the sample buffer.
let ciImage = CIImage(cvPixelBuffer: imageBuffer)
// Generate a Core Graphics image from the Core Image image.
guard let cgImage = ciContext.createCGImage(ciImage,
from: ciImage.extent) else {
print("Unable to create an image from a frame.")
return nil
}
return cgImage
}
override func viewDidLoad() {
super.viewDidLoad()
// Do any additional setup after loading the view.
view.backgroundColor = .white
imageView = UIImageView(frame: safeAreaFrame(self))
imageView!.contentMode = .scaleAspectFit
view.addSubview(imageView!)
DispatchQueue.global().async {
// 获取视频地址
guard let videoPath = Bundle.main.path(forResource: "jumpjack", ofType: "mp4") else {
print("get video path failed")
return
}
self.videoUrl = URL(fileURLWithPath: videoPath)
let asset = AVAsset(url: self.videoUrl!)
let reader = try! AVAssetReader(asset: asset)
let videoTrack = asset.tracks(withMediaType: AVMediaType.video)[0]
// read video frames as BGRA
let trackReaderOutput = AVAssetReaderTrackOutput(track: videoTrack, outputSettings: [String(kCVPixelBufferPixelFormatTypeKey): NSNumber(value: kCVPixelFormatType_32BGRA)])
trackReaderOutput.supportsRandomAccess = true
reader.add(trackReaderOutput)
reader.startReading()
while let sampleBuffer = trackReaderOutput.copyNextSampleBuffer() {
// CMSampleBuffer
print("sample at time \(CMSampleBufferGetPresentationTimeStamp(sampleBuffer))")
//CVImageBuffer
if let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
// process each CVPixelBufferRef here
// see CVPixelBufferGetWidth, CVPixelBufferLockBaseAddress, CVPixelBufferGetBaseAddress, etc
guard let cgImage = self.imageFromFrame(sampleBuffer) else{return}
let imageRequestHandler = VNImageRequestHandler(cgImage: cgImage)
// Use Vision to find human body poses in the frame.
do { try imageRequestHandler.perform([self.humanBodyPoseRequest]) } catch {
assertionFailure("Human Pose Request failed: \(error)")
}
let poses = Pose.fromObservations(self.humanBodyPoseRequest.results)
// Send the frame and poses, if any, to the delegate on the main queue.
self.drawPoses(poses, onto: cgImage)
}
}
}
}}
似乎在获取本地视频的每一帧之后
trackReaderOutput.copyNextSampleBuffer()
有一些耗时的任务,所以视频变慢。可能是我的想法是错误的,谁能告诉我如何解决这个问题或给我另一个样本来实现这个目标?