好吧,出于某种原因,我想将一些选定的 mp3 文件拆分为块时间:~28 毫秒。
我有切片<1秒的质量问题。
from av import AudioFrame
from pydub import AudioSegment
import av
#open an mp3 file
sound1 = AudioSegment.from_file(r"ΑΓΙΑ ΣΚΕΠΗ.mp3")
codec = av.CodecContext.create('pcm_s16le', 'r')
codec.sample_rate = 44100
codec.channels = 2
#split the file each part 10 second
#slices = sound1[::10000]
#split the file each part 2 second
#slices = sound1[::2000]
#split the file each part 1 second
#slices = sound1[::1000] #ok quality 1 tick every 1 second
#split the file each part 10 millisecond
slices = sound1[::10] #bad quality
pieces = AudioSegment.silent()
'''
for slice in slices:
pieces = pieces+slice
pieces.export("remaked.mp3",format="mp3")
#remaked works well
'''
for slice in slices:
#qualty loss (why?)
packet = av.Packet(slice.raw_data)
frame = codec.decode(packet)[0]
#remake AudioSegment from Av.AudioFrame
for p in frame.planes:
data = p.to_bytes()
data_segment = AudioSegment(data, sample_width=2, channels=2, frame_rate=44100)
pieces = pieces+data_segment
pieces.export("remaked.mp3",format="mp3")
我该如何解决质量问题?
请注意,我使用 av.AudioFrame ( frame = codec.decode(packet)[0]
) 因为我想用 aiortc 发送一些实时音频数据
编辑:
from av import AudioFrame
from pydub import AudioSegment
import pyaudio
import av
import fractions
from aiortc.mediastreams import MediaStreamTrack
class RadioTelephoneTrack(MediaStreamTrack):
kind = "audio"
def __init__(self):
super().__init__() # don't forget this!
self.sample_rate = 8000
self.AUDIO_PTIME = 0.020 # 20ms audio packetization
self.samples = int(self.AUDIO_PTIME * self.sample_rate)
self.FORMAT = pyaudio.paInt16
self.CHANNELS = 2
self.RATE = self.sample_rate
#self.RATE = 44100
self.CHUNK = int(8000*0.020)
#self.CHUNK = 1024
self.p = pyaudio.PyAudio()
self.mic_stream = self.p.open(format=self.FORMAT, channels=1,rate=self.RATE, input=True,frames_per_buffer=self.CHUNK)
self.codec = av.CodecContext.create('pcm_s16le', 'r')
self.codec.sample_rate = self.RATE
#self.codec.sample_fmt = AV_SAMPLE_FMT_S16
self.codec.channels = 2
#self.codec.channel_layout = "mono";
self.sound1 = AudioSegment.from_file(r"ΑΓΙΑ ΣΚΕΠΗ.mp3").set_frame_rate(self.sample_rate)
print("Frame rate: "+str(self.sound1.frame_rate))
#self.sound1_channels = self.sound1.split_to_mono()
#self.sound1 = self.sound1_channels[0].overlay(self.sound1_channels[1])
self.audio_samples = 0
self.chunk_number = 0
#self.sound1 = self.sound1 - 30 # make sound1 quiter 30dB
async def recv(self):
mic_data = self.mic_stream.read(self.CHUNK)
mic_sound = AudioSegment(mic_data, sample_width=2, channels=1, frame_rate=self.RATE)
mic_sound = AudioSegment.from_mono_audiosegments(mic_sound, mic_sound)
mic_sound_duration = len(mic_sound)
#print("Mic sound duration: "+str(mic_sound_duration))
mp3_slice_duration = mic_sound_duration
if(len(self.sound1)>(self.chunk_number+1)*mp3_slice_duration):
sound1_part = self.sound1[self.chunk_number*mp3_slice_duration:(self.chunk_number+1)*mp3_slice_duration]
elif(len(self.sound1)>(self.chunk_number)*mp3_slice_duration):
sound1_part = self.sound1[self.chunk_number*mp3_slice_duration:]
else:
#replay
times_played_1 = int((self.chunk_number)*mp3_slice_duration/len(self.sound1))
times_played_2 = int((self.chunk_number+1)*mp3_slice_duration/len(self.sound1))
if(times_played_1==times_played_2):
time_start = ((self.chunk_number)*mp3_slice_duration)-(times_played_1*len(self.sound1))
time_end = ((self.chunk_number+1)*mp3_slice_duration)-(times_played_1*len(self.sound1))
sound1_part = self.sound1[time_start:time_end]
else:
time_start_1 = ((self.chunk_number)*mp3_slice_duration)-(times_played_1*len(self.sound1))
sound1_part1 = self.sound1[time_start_1:]
time_end_1 = ((self.chunk_number+1)*mp3_slice_duration)-(times_played_2*len(self.sound1))
sound1_part2 = self.sound1[0:time_end_1]
sound1_part = sound1_part1.append(sound1_part2, crossfade=5)
#sound1_part = AudioSegment.silent()
#self.mix_sound = sound1_part.overlay(mic_sound)
self.mix_sound = sound1_part
packet = av.Packet(self.mix_sound.raw_data)
frame = self.codec.decode(packet)[0]
frame.pts = self.audio_samples
self.audio_samples += frame.samples
self.chunk_number = self.chunk_number+1
return frame
上面的代码有效(更好)。现在的主要问题是:
- 声音听起来很有深度。
- 每次声音重新开始(从头开始)时都会发出咔哒声。