0

我的想法:获取本地视频的每一帧。然后将帧转换为 CGImage

 guard let cgImage = self.imageFromFrame(sampleBuffer) else{return}

然后使用 humanBodyPoseRequest 从 CGImage 中检测身体

                let imageRequestHandler = VNImageRequestHandler(cgImage: cgImage)

                // Use Vision to find human body poses in the frame.
                do { try imageRequestHandler.perform([self.humanBodyPoseRequest]) } catch {
                    assertionFailure("Human Pose Request failed: \(error)")
                }

                let poses = Pose.fromObservations(self.humanBodyPoseRequest.results)

然后在CGImage上绘制检测到的位姿,然后获取UIImage并设置为UIImageView

 private func drawPoses(_ poses: [Pose]?, onto frame: CGImage) {
        print("+++++++++++++++++++++")
        // Create a default render format at a scale of 1:1.
        let renderFormat = UIGraphicsImageRendererFormat()
        renderFormat.scale = 1.0

        // Create a renderer with the same size as the frame.
        let frameSize = CGSize(width: frame.width, height: frame.height)
        let poseRenderer = UIGraphicsImageRenderer(size: frameSize,
                                                   format: renderFormat)

        // Draw the frame first and then draw pose wireframes on top of it.
        let frameWithPosesRendering = poseRenderer.image { rendererContext in
            // The`UIGraphicsImageRenderer` instance flips the Y-Axis presuming
            // we're drawing with UIKit's coordinate system and orientation.
            let cgContext = rendererContext.cgContext

            // Get the inverse of the current transform matrix (CTM).
            let inverse = cgContext.ctm.inverted()

            // Restore the Y-Axis by multiplying the CTM by its inverse to reset
            // the context's transform matrix to the identity.
            cgContext.concatenate(inverse)

            // Draw the camera image first as the background.
            let imageRectangle = CGRect(origin: .zero, size: frameSize)
            cgContext.draw(frame, in: imageRectangle)

            // Create a transform that converts the poses' normalized point
            // coordinates `[0.0, 1.0]` to properly fit the frame's size.
            let pointTransform = CGAffineTransform(scaleX: frameSize.width,
                                                   y: frameSize.height)

            guard let poses = poses else { return }

            // Draw all the poses Vision found in the frame.
            for pose in poses {
                // Draw each pose as a wireframe at the scale of the image.
                pose.drawWireframeToContext(cgContext, applying: pointTransform)
            }
        }

        // Update the UI's full-screen image view on the main thread.
        print("??????????????????????????")
//        self.uiimages.append(frameWithPosesRendering)
        DispatchQueue.main.async { self.imageView!.image = frameWithPosesRendering }
    }

问题是画图后,视频变慢了,请看效果视频https://youtu.be/vYj6f4bKrCU,下面是我所有的代码:

import AVFoundation
import UIKit
import Vision

class VideoViewController: UIViewController {
    private var imageView: UIImageView?

    var videoUrl: URL? // use your own url
    var frames: [CGImage]?
    var uiimages: [UIImage] = []

    private var generator: AVAssetImageGenerator?
    private let humanBodyPoseRequest = VNDetectHumanBodyPoseRequest()

 private func drawPoses(_ poses: [Pose]?, onto frame: CGImage) {
        print("+++++++++++++++++++++")
        // Create a default render format at a scale of 1:1.
        let renderFormat = UIGraphicsImageRendererFormat()
        renderFormat.scale = 1.0

        // Create a renderer with the same size as the frame.
        let frameSize = CGSize(width: frame.width, height: frame.height)
        let poseRenderer = UIGraphicsImageRenderer(size: frameSize,
                                                   format: renderFormat)

        // Draw the frame first and then draw pose wireframes on top of it.
        let frameWithPosesRendering = poseRenderer.image { rendererContext in
            // The`UIGraphicsImageRenderer` instance flips the Y-Axis presuming
            // we're drawing with UIKit's coordinate system and orientation.
            let cgContext = rendererContext.cgContext

            // Get the inverse of the current transform matrix (CTM).
            let inverse = cgContext.ctm.inverted()

            // Restore the Y-Axis by multiplying the CTM by its inverse to reset
            // the context's transform matrix to the identity.
            cgContext.concatenate(inverse)

            // Draw the camera image first as the background.
            let imageRectangle = CGRect(origin: .zero, size: frameSize)
            cgContext.draw(frame, in: imageRectangle)

            // Create a transform that converts the poses' normalized point
            // coordinates `[0.0, 1.0]` to properly fit the frame's size.
            let pointTransform = CGAffineTransform(scaleX: frameSize.width,
                                                   y: frameSize.height)

            guard let poses = poses else { return }

            // Draw all the poses Vision found in the frame.
            for pose in poses {
                // Draw each pose as a wireframe at the scale of the image.
                pose.drawWireframeToContext(cgContext, applying: pointTransform)
            }
        }

        // Update the UI's full-screen image view on the main thread.
        print("??????????????????????????")
//        self.uiimages.append(frameWithPosesRendering)
        DispatchQueue.main.async { self.imageView!.image = frameWithPosesRendering }
    }

    private func imageFromFrame(_ buffer: Frame) -> CGImage? {
        guard let imageBuffer = buffer.imageBuffer else {
            print("The frame doesn't have an underlying image buffer.")
            return nil
        }

        // Create a Core Image context.
        let ciContext = CIContext(options: nil)

        // Create a Core Image image from the sample buffer.
        let ciImage = CIImage(cvPixelBuffer: imageBuffer)

        // Generate a Core Graphics image from the Core Image image.
        guard let cgImage = ciContext.createCGImage(ciImage,
                                                    from: ciImage.extent) else {
            print("Unable to create an image from a frame.")
            return nil
        }

        return cgImage
    }

    override func viewDidLoad() {
        super.viewDidLoad()

        // Do any additional setup after loading the view.
        view.backgroundColor = .white

        imageView = UIImageView(frame: safeAreaFrame(self))
        imageView!.contentMode = .scaleAspectFit
        view.addSubview(imageView!)
        
        DispatchQueue.global().async {
            // 获取视频地址
            guard let videoPath = Bundle.main.path(forResource: "jumpjack", ofType: "mp4") else {
                print("get video path failed")
                return
            }
            self.videoUrl = URL(fileURLWithPath: videoPath)
            let asset = AVAsset(url: self.videoUrl!)
            let reader = try! AVAssetReader(asset: asset)
            
            let videoTrack = asset.tracks(withMediaType: AVMediaType.video)[0]

            // read video frames as BGRA
            let trackReaderOutput = AVAssetReaderTrackOutput(track: videoTrack, outputSettings: [String(kCVPixelBufferPixelFormatTypeKey): NSNumber(value: kCVPixelFormatType_32BGRA)])
            trackReaderOutput.supportsRandomAccess = true

            reader.add(trackReaderOutput)
            reader.startReading()
        

            while let sampleBuffer = trackReaderOutput.copyNextSampleBuffer() {
                // CMSampleBuffer
                print("sample at time \(CMSampleBufferGetPresentationTimeStamp(sampleBuffer))")
                //CVImageBuffer
            if let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
                // process each CVPixelBufferRef here
                // see CVPixelBufferGetWidth, CVPixelBufferLockBaseAddress, CVPixelBufferGetBaseAddress, etc

                guard let cgImage = self.imageFromFrame(sampleBuffer) else{return}
                
                let imageRequestHandler = VNImageRequestHandler(cgImage: cgImage)

                // Use Vision to find human body poses in the frame.
                do { try imageRequestHandler.perform([self.humanBodyPoseRequest]) } catch {
                    assertionFailure("Human Pose Request failed: \(error)")
                }

                let poses = Pose.fromObservations(self.humanBodyPoseRequest.results)

                // Send the frame and poses, if any, to the delegate on the main queue.
                self.drawPoses(poses, onto: cgImage)
            }
        }
    }
    }}

似乎在获取本地视频的每一帧之后

trackReaderOutput.copyNextSampleBuffer()

有一些耗时的任务,所以视频变慢。可能是我的想法是错误的,谁能告诉我如何解决这个问题或给我另一个样本来实现这个目标?

4

0 回答 0