c# - 将 Byte[] 转换为 AudioClip 后统一音频中的一些噪音

Question

我正在尝试将 base64 转换为 byte[] 并在 audioClip 中，但我的声音中有一些噪音。

我正在使用此类将 byte[] 转换为 AudioClip。

using UnityEngine;
using System.Collections;
using System.IO;
using System;

namespace WWUtils.Audio{

  public class WAV  {
     // properties

     public float[] LeftChannel { get; internal set; }
     public float[] RightChannel { get; internal set; }
     public int ChannelCount { get; internal set; }
     public int SampleCount { get; internal set; }
     public int Frequency { get; internal set; }

     // convert two bytes to one float in the range -1 to 1

  static float bytesToFloat(byte firstByte, byte secondByte) {
      // convert two bytes to one short (little endian)
      short s = (short)((secondByte << 8) | firstByte);
      // convert to range from -1 to (just below) 1
      return s / 32768.0F;
  }

  static int bytesToInt(byte[] bytes,int offset=0){
      int value=0;
      for(int i=0;i<4;i++){
          value |= ((int)bytes[offset+i])<<(i*8);
      }
      return value;
  }

  private static byte[] GetBytes(string filename){
      return File.ReadAllBytes(filename);
  }

  // Returns left and right double arrays. 'right' will be null if sound is mono.
  public WAV(string filename):
      this(GetBytes(filename)) {}

  public WAV(byte[] wav){

      // Determine if mono or stereo
      ChannelCount = wav[22];     // Forget byte 23 as 99.999% of WAVs are 1 or 2 channels

      // Get the frequency
      Frequency = bytesToInt(wav,24);

      // Get past all the other sub chunks to get to the data subchunk:
      int pos = 12;   // First Subchunk ID from 12 to 16

      // Keep iterating until we find the data chunk (i.e. 64 61 74 61 ...... (i.e. 100 97 116 97 in decimal))
      while(!(wav[pos]==100 && wav[pos+1]==97 && wav[pos+2]==116 && wav[pos+3]==97)) {
          pos += 4;
          int chunkSize = wav[pos] + wav[pos + 1] * 256 + wav[pos + 2] * 65536 + wav[pos + 3] * 16777216;
          pos += 4 + chunkSize;
      }
      pos += 8;

      // Pos is now positioned to start of actual sound data.
      SampleCount = (wav.Length - pos)/2;     // 2 bytes per sample (16 bit sound mono)
      if (ChannelCount == 2) SampleCount /= 2;        // 4 bytes per sample (16 bit stereo)

      // Allocate memory (right will be null if only mono sound)
      LeftChannel = new float[SampleCount];
      if (ChannelCount == 2) RightChannel = new float[SampleCount];
      else RightChannel = null;

      // Write to double array/s:
      int i=0;
      while (pos < wav.Length) {
          LeftChannel[i] = bytesToFloat(wav[pos], wav[pos + 1]);
          pos += 2;
          if (ChannelCount == 2) {
              RightChannel[i] = bytesToFloat(wav[pos], wav[pos + 1]);
              pos += 2;
          }
          i++;
      }
  }
 public override string ToString (){
      return string.Format ("[WAV: LeftChannel={0}, RightChannel={1}, ChannelCount={2}, SampleCount={3}, Frequency={4}]", LeftChannel, RightChannel, ChannelCount, SampleCount, Frequency);
  }

}

而这个播放audioClip

  var recByte = System.Convert.FromBase64String(base64);
     WAV wav = new WAV(recByte);
     AudioClip audioClip = AudioClip.Create("soundLenny-Result", 
wav.SampleCount, 1, wav.Frequency, false, false);
     audioClip.SetData(wav.LeftChannel, 0);
//PlayClip...

我正在尝试转换以下类型之一。https://docs.microsoft.com/pt-br/azure/cognitive-services/speech-service/rest-text-to-speech#audio-outputs

有了这个 riff-16khz-16bit-mono-pcm，我的音频会有些噪音。我正在尝试转换这种格式 riff-24khz-16bit-mono-pcm。

如果我使用 riff-24khz-16bit-mono-pcm，我会得到 IndexOutOfRangeException，如果使用 riff-16khz-16bit-mono-pcm 音频是噪音

这是错误。

IndexOutOfRangeException：索引超出了数组的范围。WWUtils.Audio.WAV..ctor (System.Byte[] wav) (在 Assets/Scripts/WAV.cs:68)

如果我使用麦克风录制的 wav 文件进行此操作，则脚本可以正常工作。我该如何纠正这个？

c# - 将 Byte[] 转换为 AudioClip 后统一音频中的一些噪音

0 回答 0

Related

Reference