0

我的目标是将不同音频文件的部分编辑在一起,并对这些部分中的每一个应用一个时间拉伸效果(音高元素的速度属性)。每个片段在最终混音中应持续 12 秒,因此输出文件的长度为 36 秒。以下代码对 3 个文件执行此操作(可以通过使用循环并将所有内容存储到列表中来扩展它)。[是的,有点长]

import gst, gobject
gobject.threads_init()
import numpy as np
import math

comp  = gst.element_factory_make("gnlcomposition", "composition")

gsrc1 = gst.element_factory_make("gnlfilesource")
gsrc1.props.location = "file1.mp3"
gsrc1.props.start          = 0
gsrc1.props.duration       = ?
gsrc1.props.media_start    = 0
gsrc1.props.priority       = 3
comp.add(gsrc1)

gsrc2 = gst.element_factory_make("gnlfilesource")
gsrc2.props.location = "file2.mp3"
gsrc2.props.start          = ?
gsrc2.props.duration       = ?
gsrc2.props.media_start    = 0
gsrc2.props.priority       = 4
comp.add(gsrc2)

gsrc3 = gst.element_factory_make("gnlfilesource")
gsrc3.props.location = "file3.mp3"
gsrc3.props.start          = ?
gsrc3.props.duration       = ?
gsrc3.props.media_start    = 0
gsrc3.props.priority       = 5
comp.add(gsrc3)

bin = gst.Bin()
audioconvertbin = gst.element_factory_make("audioconvert")
pitch1 = gst.element_factory_make("pitch")
pitch1.set_property("tempo", 1.05)
bin.add(audioconvertbin, pitch1)
audioconvertbin.link(pitch1)
bin.add_pad(gst.GhostPad("sink", audioconvertbin.get_pad("sink")))
bin.add_pad(gst.GhostPad("src", pitch1.get_pad("src")))

bin2 = gst.Bin()
audioconvertbin2 = gst.element_factory_make("audioconvert")
pitch2 = gst.element_factory_make("pitch")
pitch2.set_property("tempo", 0.95)
bin2.add(audioconvertbin2, pitch2)
audioconvertbin2.link(pitch2)
bin2.add_pad(gst.GhostPad("sink", audioconvertbin2.get_pad("sink")))
bin2.add_pad(gst.GhostPad("src", pitch2.get_pad("src")))

bin3 = gst.Bin()
audioconvertbin3 = gst.element_factory_make("audioconvert")
pitch3 = gst.element_factory_make("pitch")
pitch3.set_property("tempo", 1.1)
bin3.add(audioconvertbin3, pitch3)
audioconvertbin3.link(pitch3)
bin3.add_pad(gst.GhostPad("sink", audioconvertbin3.get_pad("sink")))
bin3.add_pad(gst.GhostPad("src", pitch3.get_pad("src")))

op = gst.element_factory_make("gnloperation")
comp.add(op)

op2 = gst.element_factory_make("gnloperation", "op2")
comp.add(op2)

op3 = gst.element_factory_make("gnloperation", "op3")
comp.add(op3)

op.add(bin)
op.props.start          = 0 * gst.SECOND
op.props.duration       = ?
op.props.priority       = 1
op2.add(bin2)
op2.props.start          = ?
op2.props.duration       = ?
op2.props.priority       = 1
op3.add(bin3)
op3.props.start          = ?
op3.props.duration       = ?
op3.props.priority       = 1

pipeline = gst.Pipeline()
audioconvert = gst.element_factory_make("audioconvert")
encoder = gst.element_factory_make("vorbisenc")
mux = gst.element_factory_make("oggmux")
filesink = gst.element_factory_make("filesink")
filesink.set_property("location", "output.ogg")
pipeline.add(comp, audioconvert, encoder, mux, filesink)
gst.element_link_many(audioconvert, encoder, mux, filesink)

def on_pad(comp, pad, elements):
    convpad = elements.get_compatible_pad(pad, pad.get_caps())
    pad.link(convpad)
comp.connect("pad-added", on_pad, audioconvert)

loop = gobject.MainLoop(is_running=True)
bus = pipeline.get_bus()
bus.add_signal_watch()
def on_message(bus, message, loop):
    if message.type == gst.MESSAGE_EOS:
        loop.quit()
    elif message.type == gst.MESSAGE_ERROR:
        print message
        loop.quit()
bus.connect("message", on_message, loop)
pipeline.set_state(gst.STATE_PLAYING)
loop.run()
pipeline.set_state(gst.STATE_NULL)
4

1 回答 1

0

我发现“?”的“最佳”值 是:

gsrc1 = gst.element_factory_make("gnlfilesource")
gsrc1.props.location = "file1"
gsrc1.props.start          = 0
gsrc1.props.duration       = 12 * 1.05 * gst.SECOND
gsrc1.props.media_start    = 0
gsrc1.props.priority       = 3
comp.add(gsrc1)

gsrc2 = gst.element_factory_make("gnlfilesource")
gsrc2.props.location = "file2.mp3"
gsrc2.props.start          = int(12 * 1.05 * gst.SECOND)
gsrc2.props.duration       = int(12 * gst.SECOND)
gsrc2.props.media_start    = 36 * gst.SECOND
gsrc2.props.priority       = 4
comp.add(gsrc2)

gsrc3 = gst.element_factory_make("gnlfilesource")
gsrc3.props.location = "file3.mp3"
gsrc3.props.start          = int(12 * 1.05 * gst.SECOND + 12 * gst.SECOND)
gsrc3.props.duration       = int(12 * 1.1 * gst.SECOND)
gsrc3.props.media_start    = 60 * gst.SECOND
gsrc3.props.priority       = 4
comp.add(gsrc3)

op.add(bin)
op.props.start          = 0 * gst.SECOND
op.props.duration       = int(12 * 1.05 * gst.SECOND)
op.props.priority       = 1

op2.add(bin2)
op2.props.start          = int(12 * 1.05 * gst.SECOND)
op2.props.duration       = int(12 * gst.SECOND)
op2.props.priority       = 1

op3.add(bin3)
op3.props.start          = int(12 * 1.05 * gst.SECOND + 12 * gst.SECOND)
op3.props.duration       = int(12 * 1.1 * gst.SECOND)
op3.props.priority       = 1

简而言之,您不应该认为 tempo changes < 1 有任何后果,但您应该考虑 tempo changes > 1 (从用户的角度来看,这是一种奇怪的行为)。

我将它与 Audacity 进行了比较,它给出了大致相同的结果,除了在大约 0.026 秒的两个摘录之间有一个很小的差距。我不认为这是由于舍入错误,也许我的持续时间/开始时间设置不完全正确(请参见下图中的间隙)。

摘录之间的差距

希望它会帮助一些人。

于 2014-04-23T09:12:08.883 回答