4

使用 Swift 在 iOS 中将音频 mp3 文件转换为具有以下特征的音频类型 raw 的最有效方法是什么?

single-channel (monaural)
little-endian
unheadered
16-bit signed
PCM
sampled at 16000 Hz
4

1 回答 1

7

我不知道这会有所帮助,或者这就是你所追求的。

有一个与此问题相关的解决方案,与您在此处之后的内容有关

我修改了你之后的答案,我不是这方面的专家,但我会尽力而为

import AVFoundation

public final class AVAudioFileConverter {

  var rwAudioSerializationQueue: DispatchQueue!
  var asset:AVAsset!
  var assetReader:AVAssetReader!
  var assetReaderAudioOutput:AVAssetReaderTrackOutput!
  var assetWriter:AVAssetWriter!
  var assetWriterAudioInput:AVAssetWriterInput!
  var outputURL:URL
  var inputURL:URL

    public init?(inputFileURL: URL, outputFileURL: URL) {
    inputURL = inputFileURL
    outputURL = outputFileURL

    if (FileManager.default.fileExists(atPath: inputURL.absoluteString)) {
      print("Input file does not exist at file path \(inputURL.absoluteString)")
      return nil
    }
  }

  public func convert() {
    let rwAudioSerializationQueueDescription = " rw audio serialization queue"
    // Create the serialization queue to use for reading and writing the audio data.
    rwAudioSerializationQueue = DispatchQueue(label: rwAudioSerializationQueueDescription)
    assert(rwAudioSerializationQueue != nil, "Failed to initialize Dispatch Queue")

    asset = AVAsset(url: inputURL)
    assert(asset != nil, "Error creating AVAsset from input URL")
    print("Output file path -> ", outputURL.absoluteString)

    asset.loadValuesAsynchronously(forKeys: ["tracks"], completionHandler: {
      var success = true
      var localError:NSError?
      success = (self.asset.statusOfValue(forKey: "tracks", error: &localError) == AVKeyValueStatus.loaded)
      // Check for success of loading the assets tracks.
      if (success) {
        // If the tracks loaded successfully, make sure that no file exists at the output path for the asset writer.
        let fm = FileManager.default
        let localOutputPath = self.outputURL.path
        if (fm.fileExists(atPath: localOutputPath)) {
          do {
            try fm.removeItem(atPath: localOutputPath)
            success = true
          } catch {
            print("Error trying to remove output file at path -> \(localOutputPath)")
          }
        }
      }

      if (success) {
        success = self.setupAssetReaderAndAssetWriter()
      } else {
        print("Failed setting up Asset Reader and Writer")
      }
      if (success) {
        success = self.startAssetReaderAndWriter()
        return
      } else {
        print("Failed to start Asset Reader and Writer")
      }

    })
  }

  func setupAssetReaderAndAssetWriter() -> Bool {
    do {
      assetReader = try AVAssetReader(asset: asset)
    } catch {
      print("Error Creating AVAssetReader")
    }

    do {
      assetWriter = try AVAssetWriter(outputURL: outputURL, fileType: AVFileType.wav)
    } catch {
      print("Error Creating AVAssetWriter")
    }

    var assetAudioTrack:AVAssetTrack? = nil
    let audioTracks = asset.tracks(withMediaType: AVMediaType.audio)

    if (audioTracks.count > 0) {
      assetAudioTrack = audioTracks[0]
    }

    if (assetAudioTrack != nil) {

      let decompressionAudioSettings:[String : Any] = [
        AVFormatIDKey:Int(kAudioFormatLinearPCM)
      ]

      assetReaderAudioOutput = AVAssetReaderTrackOutput(track: assetAudioTrack!, outputSettings: decompressionAudioSettings)
      assert(assetReaderAudioOutput != nil, "Failed to initialize AVAssetReaderTrackOutout")
      assetReader.add(assetReaderAudioOutput)

      var channelLayout = AudioChannelLayout()
      memset(&channelLayout, 0, MemoryLayout<AudioChannelLayout>.size);
      channelLayout.mChannelLayoutTag = kAudioChannelLayoutTag_Stereo;

        let outputSettings:[String : Any] = [
            AVFormatIDKey: Int(kAudioFormatLinearPCM),
            AVSampleRateKey: 44100,
            AVNumberOfChannelsKey: 2,
            AVChannelLayoutKey: NSData(bytes:&channelLayout, length:  MemoryLayout.size(ofValue: AudioChannelLayout.self)),
            AVLinearPCMBitDepthKey: 16,
            AVLinearPCMIsNonInterleaved: false,
            AVLinearPCMIsFloatKey: false,
            AVLinearPCMIsBigEndianKey: false,

        ]

      assetWriterAudioInput = AVAssetWriterInput(mediaType: AVMediaType.audio, outputSettings: outputSettings)
      assert(rwAudioSerializationQueue != nil, "Failed to initialize AVAssetWriterInput")
      assetWriter.add(assetWriterAudioInput)

    }
    print("Finsihed Setup of AVAssetReader and AVAssetWriter")
    return true
  }

  func startAssetReaderAndWriter() -> Bool {
    print("STARTING ASSET WRITER")
    assetWriter.startWriting()
    assetReader.startReading()
    assetWriter.startSession(atSourceTime: CMTime.zero)

    assetWriterAudioInput.requestMediaDataWhenReady(on: rwAudioSerializationQueue, using: {

      while(self.assetWriterAudioInput.isReadyForMoreMediaData ) {
        var sampleBuffer = self.assetReaderAudioOutput.copyNextSampleBuffer()
        if(sampleBuffer != nil) {
          self.assetWriterAudioInput.append(sampleBuffer!)
          sampleBuffer = nil
        } else {
          self.assetWriterAudioInput.markAsFinished()
          self.assetReader.cancelReading()
          self.assetWriter.finishWriting {
            print("Asset Writer Finished Writing")
          }
          break
        }
      }
    })
    return true
  }
}

用法

let inputFilePath = URL(fileURLWithPath: "/path/to/file.mp3")
let outputFileURL = URL(fileURLWithPath: "/path/to/output/output.wav")

if let audioConverter = AVAudioFileConverter(inputFileURL: inputFilePath, 
    outputFileURL: outputFileURL) {
    audioConverter.convert()
}

输入文件信息

File:           /path/12.mp3
File type ID:   MPG3
Num Tracks:     1
----
Data format:     2 ch,  44100 Hz, '.mp3' (0x00000000) 0 bits/channel, 0 bytes/packet, 1152 frames/packet, 0 bytes/frame
                no channel layout.
estimated duration: 1.149375 sec
audio bytes: 18390
audio packets: 44
bit rate: 128000 bits per second
packet size upper bound: 1052
maximum packet size: 418
audio data file offset: 417
optimized
audio 48510 valid frames + 576 priming + 1602 remainder = 50688

输出文件信息

File type ID:   WAVE
Num Tracks:     1
----
Data format:     2 ch,  44100 Hz, 'lpcm' (0x0000000C) 16-bit little-endian signed integer
                no channel layout.
estimated duration: 1.136327 sec
audio bytes: 200448
audio packets: 50112
bit rate: 1411200 bits per second
packet size upper bound: 4
maximum packet size: 4
audio data file offset: 4096
optimized
source bit depth: I16

这是来自metadata2go输出文件的信息

File Name: output.wav
File Size: 200 kB
File Type: WAV
File Type Extension: wav
MimeType: audio/x-wav
Encoding: Microsoft PCM
Num Channels: 2
Sample Rate: 44100
Avg Bytes Per Sec: 176400
Bits Per Sample: 16
Duration: 1.16 s
Category: audio

如果要更改设置,只需更改 outputSettings 这部分

let outputSettings:[String : Any] = [
    AVFormatIDKey: Int(kAudioFormatLinearPCM),
    AVSampleRateKey: 44100,
    AVNumberOfChannelsKey: 2,
    AVChannelLayoutKey: NSData(bytes:&channelLayout, length:  MemoryLayout.size(ofValue: AudioChannelLayout.self)),
    AVLinearPCMBitDepthKey: 16,
    AVLinearPCMIsNonInterleaved: false,
    AVLinearPCMIsFloatKey: false,
    AVLinearPCMIsBigEndianKey: false,
]

我希望这将有所帮助。

于 2020-02-14T16:58:21.213 回答