0

我有一个奇怪的问题。我正在捕获 .mov/.wav/.aiff 文件的一些音频样本来播放它们。我正在使用以下代码捕获未更改的 PCM 样本(如果可用),否则将它们转换为 32 位浮点数。

NSError *error = nil;
AVAssetReader *assetReader= [[[AVAssetReader alloc] initWithAsset:self.movieAsset error:&error] autorelease];
NSArray *audioTracks=[movieAsset tracksWithMediaType:AVMediaTypeAudio];
AVAssetReaderTrackOutput* audioReaderOutput=nil;
AVAssetTrack *mainAudioTrack = nil;
CMTimeRange audioRange;
if ([audioTracks count]) {
    mainAudioTrack=[audioTracks objectAtIndex:0];
    audioRange = mainAudioTrack.timeRange;
    CMTimeRange readingRange = CMTimeRangeMake(kCMTimeZero,audioRange.duration);
    assetReader.timeRange = readingRange;
    NSArray* formatDesc = mainAudioTrack.formatDescriptions;
    if ([formatDesc count]) {
        CMAudioFormatDescriptionRef item = (CMAudioFormatDescriptionRef)[formatDesc objectAtIndex:0];
        const AudioStreamBasicDescription* pcmAudioDescription = CMAudioFormatDescriptionGetStreamBasicDescription (item);
        NSDictionary * outputSettings;
        memcpy(&audioDescription,pcmAudioDescription,sizeof(AudioStreamBasicDescription));

        if (pcmAudioDescription->mFormatID != kAudioFormatLinearPCM ) {

            // Resample
            outputSettings = [NSDictionary dictionaryWithObjectsAndKeys:
                              [NSNumber numberWithInt:kAudioFormatLinearPCM], AVFormatIDKey,
                              [NSNumber numberWithFloat:pcmAudioDescription->mSampleRate], AVSampleRateKey,
                              [NSNumber numberWithInt:pcmAudioDescription->mChannelsPerFrame], AVNumberOfChannelsKey,
                              [NSNumber numberWithInt:32], AVLinearPCMBitDepthKey,
                              [NSNumber numberWithBool:YES], AVLinearPCMIsFloatKey,
                              nil];

            audioDescription.mFormatID = kAudioFormatLinearPCM;
            audioDescription.mBitsPerChannel = 32;
            audioDescription.mFramesPerPacket = 1;
            audioDescription.mChannelsPerFrame = pcmAudioDescription->mChannelsPerFrame;
            audioDescription.mBytesPerFrame = audioDescription.mBitsPerChannel / 8 * audioDescription.mChannelsPerFrame;
            audioDescription.mBytesPerPacket = audioDescription.mFramesPerPacket * audioDescription.mBytesPerFrame;
            audioDescription.mFormatFlags = kAudioFormatFlagIsFloat;

        } else {

            outputSettings = [NSDictionary dictionaryWithObjectsAndKeys:
                              [NSNumber numberWithInt:kAudioFormatLinearPCM], AVFormatIDKey,
                              nil];

            audioDescription.mFormatID = kAudioFormatLinearPCM;
        }

        audioReaderOutput=[[[AVAssetReaderTrackOutput alloc]
                            initWithTrack:mainAudioTrack
                            outputSettings:outputSettings] autorelease];
        if([assetReader canAddOutput:audioReaderOutput]) [assetReader addOutput:audioReaderOutput];
        else audioReaderOutput = nil;

    }

}

if (audioReaderOutput) {
    if([assetReader startReading]==YES){
        CMSampleBufferRef buffer = 0;
        NSTimeInterval duration = 0.0;

        while([assetReader status]==AVAssetReaderStatusReading){
            if (audioReaderOutput != nil) {
                buffer=[audioReaderOutput copyNextSampleBuffer];
                if (buffer) {
                    CMTime sampleDuration = CMSampleBufferGetDuration (buffer);
                    CMTime currentSampleTime = CMSampleBufferGetOutputPresentationTimeStamp (buffer);
                    MovieSample* sample = [[[MovieSample alloc] init] autorelease];
                    sample.sampleTime = currentSampleTime;
                    sample.sampleBuffer = buffer;
                    sample.sampleDuration = sampleDuration;
                    [self.audioStore addObject:sample];
                    //NSLog(@"Adding sample %lld %lld %f %f",currentSampleTime.value,sampleDuration.value,currentOutputSampleTime.value/(NSTimeInterval)currentOutputSampleTime.timescale,
                         // (currentOutputSampleTime.value+sampleDuration.value)/(NSTimeInterval)currentOutputSampleTime.timescale);
                    NSLog(@"Received PCM buffer with [TIMESTAMP:%.1fms]", CMTimeGetSeconds(currentSampleTime) * 1000);
                    NSLog(@"Buffer contains [SAMPLES:%ld]", CMSampleBufferGetNumSamples(buffer));
                    NSLog(@"Buffer contains [DURATION:%.1fms] worth of audio", CMTimeGetSeconds(sampleDuration) * 1000);
                    duration += CMTimeGetSeconds(CMSampleBufferGetDuration(buffer));
                }

            }

        }
        NSLog(@"Total samples duration: %f", duration);
        NSLog(@"Total track reported duration: %f", audioRange.duration.value/(NSTimeInterval)audioRange.duration.timescale);

    }
    else {
        DLog(@"could not start Audio reading asset.");
        DLog(@"reader status: %ld", [assetReader status]);
    }


}

现在奇怪的是,无论我打开什么文件,它总是会错过文件中的一些样本。以下循环http://www.vvertex.com/loop.wav的持续时间从 Audacity 和音轨持续时间 mainAudioTrack.timeRange 报告为 3.75,而我的代码转储打印样本长度和播放偏移量是:3.657120

这是转储:

2014-03-06 10:48:15.721 FrameDecoder[665:303] 收到 PCM 缓冲区 [TIMESTAMP:0.0ms] 2014-03-06 10:48:15.721 FrameDecoder[665:303] 缓冲区包含 [SAMPLES:8192] 2014 -03-06 10:48:15.721 FrameDecoder[665:303] 缓冲区包含 [DURATION:185.8ms] 价值的音频 2014-03-06 10:48:15.721 FrameDecoder[665:303] 接收到的带有 [TIMESTAMP:185.8] 的 PCM 缓冲区ms] 2014-03-06 10:48:15.721 FrameDecoder[665:303] 缓冲区包含 [SAMPLES:8192] 2014-03-06 10:48:15.721 FrameDecoder[665:303] 缓冲区包含 [DURATION:185.8ms] 值音频 2014-03-06 10:48:15.722 FrameDecoder[665:303] 接收到 PCM 缓冲区 [TIMESTAMP:371.5ms] 2014-03-06 10:48:15.722 FrameDecoder[665:303] 缓冲区包含 [SAMPLES:8192 ] 2014-03-06 10:48:15.722 FrameDecoder[665:303] 缓冲区包含 [DURATION:185.8ms] 价值的音频 2014-03-06 10:48:15.722 FrameDecoder[665:303] 接收到的 PCM 缓冲区带有 [TIMESTAMP:557.3ms] 2014-03-06 10:48:15.722 FrameDecoder[665:303] 缓冲区包含 [SAMPLES:8192] 2014-03-06 10:48:15.722 FrameDecoder[665:303 ] 缓冲区包含 [DURATION:185.8ms] 价值的音频 2014-03-06 10:48:15.723 FrameDecoder[665:303] 接收到的 PCM 缓冲区与 [TIMESTAMP:743.0ms] 2014-03-06 10:48:15.723 FrameDecoder[ 665:303] 缓冲区包含 [SAMPLES:8192] 2014-03-06 10:48:15.723 FrameDecoder[665:303] 缓冲区包含 [DURATION:185.8ms] 价值的音频 2014-03-06 10:48:15.723 FrameDecoder[ 665:303] 使用 [TIMESTAMP:928.8ms] 接收 PCM 缓冲区 2014-03-06 10:48:15.726 FrameDecoder[665:303] 缓冲区包含 [SAMPLES:8192] 2014-03-06 10:48:15.726 FrameDecoder[665 :303] 缓冲区包含 [DURATION:185.8ms] 价值的音频 2014-03-06 10:48:15.727 FrameDecoder[665:303] 接收到的 PCM 缓冲区与 [TIMESTAMP:1114.6ms] 2014-03-06 10:48:15.727 FrameDecoder[665:303] 缓冲区包含 [SAMPLES:8192] 2014-03-06 10:48:15.727 FrameDecoder[665:303] 缓冲区包含 [DURATION:185.8ms] 价值的音频 2014-03-06 10:48: 15.727 FrameDecoder[665:303] 使用 [TIMESTAMP:1300.3ms] 接收 PCM 缓冲区 2014-03-06 10:48:15.727 FrameDecoder[665:303] 缓冲区包含 [SAMPLES:8192] 2014-03-06 10:48:15.727 FrameDecoder[665:303] 缓冲区包含 [DURATION:185.8ms] 价值的音频 2014-03-06 10:48:15.727 FrameDecoder[665:303] 接收到的带有 [TIMESTAMP:1486.1ms] 的 PCM 缓冲区 2014-03-06 10: 48:15.728 FrameDecoder[665:303] 缓冲区包含 [SAMPLES:8192] 2014-03-06 10:48:15.728 FrameDecoder[665:303] 缓冲区包含 [DURATION:185.8ms] 价值的音频 2014-03-06 10: 48:15.728 FrameDecoder[665:303] 接收到的 PCM 缓冲区 [TIMESTAMP:1671.8ms] 2014-03-06 10:48:15.728 FrameDecoder[665:303] 缓冲区包含 [SAMPLES:8192] 2014-03-06 10:48:15.728 FrameDecoder[665:303] 缓冲区包含 [DURATION:185.8ms] 价值的音频 2014-03-06 10:48:15.728 FrameDecoder[665:303] 接收到的带有 [ TIMESTAMP:1857.6ms] 2014-03-06 10:48:15.729 FrameDecoder[665:303] 缓冲区包含 [SAMPLES:8192] 2014-03-06 10:48:15.729 FrameDecoder[665:303] 缓冲区包含 [DURATION:185.8 ms] 价值音频 2014-03-06 10:48:15.729 FrameDecoder[665:303] 接收到的 PCM 缓冲区与 [TIMESTAMP:2043.4ms] 2014-03-06 10:48:15.729 FrameDecoder[665:303] 缓冲区包含 [ SAMPLES:8192] 2014-03-06 10:48:15.729 FrameDecoder[665:303] 缓冲区包含 [DURATION:185.8ms] 价值的音频 2014-03-06 10:48:15.729 FrameDecoder[665:303] 接收的 PCM 缓冲区与 [TIMESTAMP:2229.1ms] 2014-03-06 10:48:15.730 FrameDecoder[665:303] 缓冲区包含 [SAMPLES:8192] 2014-03-06 10:48:15.730 FrameDecoder[665:303] 缓冲区包含 [DURATION:185.8ms] 价值的音频 2014-03-06 10:48:15.730 FrameDecoder[665:303] 接收到的 PCM 缓冲区与 [TIMESTAMP:2414.9ms] 2014-03-06 10:48:15.730 FrameDecoder [665:303] 缓冲区包含 [SAMPLES:8192] 2014-03-06 10:48:15.730 FrameDecoder[665:303] 缓冲区包含 [DURATION:185.8ms] 价值的音频 2014-03-06 10:48:15.731 FrameDecoder [665:303] 使用 [TIMESTAMP:2600.6ms] 接收 PCM 缓冲区 2014-03-06 10:48:15.731 FrameDecoder[665:303] 缓冲区包含 [SAMPLES:8192] 2014-03-06 10:48:15.731 FrameDecoder[ 665:303] 缓冲区包含 [DURATION:185.8ms] 价值的音频 2014-03-06 10:48:15.731 FrameDecoder[665:303] 接收到的 PCM 缓冲区与 [TIMESTAMP:2786.4ms] 2014-03-06 10:48: 15.731 FrameDecoder[665:303] 缓冲区包含 [SAMPLES:8192] 2014-03-06 10:48:15.731 FrameDecoder[665:303] 缓冲区包含 [DURATION:185。8ms] 音频价值 2014-03-06 10:48:15.732 FrameDecoder[665:303] 接收到的 PCM 缓冲区 [TIMESTAMP:2972.2ms] 2014-03-06 10:48:15.732 FrameDecoder[665:303] 缓冲区包含 [ SAMPLES:8192] 2014-03-06 10:48:15.732 FrameDecoder[665:303] 缓冲区包含 [DURATION:185.8ms] 价值的音频 2014-03-06 10:48:15.732 FrameDecoder[665:303] 接收到的 PCM 缓冲区与 [TIMESTAMP:3157.9ms] 2014-03-06 10:48:15.732 FrameDecoder[665:303] 缓冲区包含 [SAMPLES:8192] 2014-03-06 10:48:15.733 FrameDecoder[665:303] 缓冲区包含 [DURATION :185.8ms] 音频价值 2014-03-06 10:48:15.733 FrameDecoder[665:303] 接收 PCM 缓冲区 [TIMESTAMP:3343.7ms] 2014-03-06 10:48:15.733 FrameDecoder[665:303] 缓冲区包含 [SAMPLES:8192] 2014-03-06 10:48:15.733 FrameDecoder[665:303] 缓冲区包含 [DURATION:185.8ms] 价值的音频 2014-03-06 10:48:15.733 FrameDecoder[665:303] 接收到的 PCM 缓冲区带有 [TIMESTAMP:3529.4ms] 2014-03-06 10:48:15.734 FrameDecoder[665:303] 缓冲区包含 [SAMPLES:5631] 2014-03-06 10:48:15.734 FrameDecoder[665:303 ] 缓冲区包含 [DURATION:127.7ms] 价值的音频 2014-03-06 10:48:15.734 FrameDecoder[665:303] 总样本持续时间:3.657120 2014-03-06 10:48:15.734 FrameDecoder[665:303] 总计跟踪报告持续时间:3.750000

有人遇到过这样的奇怪问题吗?我已经尝试了几个音频文件以及具有不同压缩的 .mov 文件。一样的东西!我完全坚持这一点!

此外,当我通过 AudioUnits 播放样本时,实际上缺少的是从样本开头开始的那部分......

谢谢 !

4

1 回答 1

0

我的解决方法...

//starting 2 buffers before
[_reader setTimeRange:CMTimeRangeMake(CMTimeMake(-16384, samplerate), kCMTimeNegativeInfinity)];

//and
while ([_reader status] == AVAssetReaderStatusReading) {
    CMSampleBufferRef sampleBufferRef = [_ringBufferReaderTrackOutput copyNextSampleBuffer];
    if (sampleBufferRef){
        CMTime t = CMSampleBufferGetOutputPresentationTimeStamp(sampleBufferRef);
        if( t.value < 0 )
            release, continue...
    }

但我最后缺少 4096 个样本:-(

于 2014-03-28T14:53:43.390 回答