c - 使用 PortAudio 让 flite 输出音频

Question

我正在尝试让flite 语音合成库在我的 Mac 上运行，但 flite 库不支持我的声音架构。为了解决这个问题，我使用PortAudio来播放合成的音频；所以我不得不在audio.c文件中做一些修改才能让 flite 使用那个库。在 GNU AutoTools 中闲逛了一段时间后，我设法让所有东西都编译得很好，但后来我运行程序并得到这个输出：

$ ./flite -t "test"
frameIndex: 0
maxFrameIndex: 0
numChannels: 1
numSamples: 7225
sampleRate: 8000
=== Now playing back. ===
Waiting for playback to finish.
frameIndex in callback: -2008986336
maxFrameIndex in callback: 32655
numChannels in callback: 152579008
numSamples in callback: 0
sampleRate in callback: 0
Segmentation fault: 11  

$ ./flite -t "test"
frameIndex: 0
maxFrameIndex: 0
numChannels: 1
numSamples: 7225
sampleRate: 8000
=== Now playing back. ===
Waiting for playback to finish.
frameIndex in callback: -71217888
maxFrameIndex in callback: 32712
numChannels in callback: 232979392
numSamples in callback: 0
sampleRate in callback: 0
Segmentation fault: 11

这是audio.c文件中的相关代码，当我提供命令行参数时会调用它-t。playCallback()经过一些调试后，我标记了函数中发生分段错误的感兴趣区域。

static int playCallback( const void *inputBuffer, void *outputBuffer,
                        unsigned long framesPerBuffer,
                        const PaStreamCallbackTimeInfo* timeInfo,
                        PaStreamCallbackFlags statusFlags,
                        void *userData )
{
    cst_wave *data = (cst_wave*)userData;
    short *rptr = &data->samples[data->frameIndex * data->num_channels];
    short *wptr = (short*)outputBuffer;
    unsigned int i;
    int finished;
    unsigned int framesLeft = cst_wave_maxFrameIndex(data) - cst_wave_frameIndex(data);

    (void) inputBuffer; /* Prevent unused variable warnings. */
    (void) timeInfo;
    (void) statusFlags;
    (void) userData;

    printf("frameIndex in callback: %d\n", cst_wave_frameIndex(data));
    printf("maxFrameIndex in callback: %d\n", cst_wave_maxFrameIndex(data));
    printf("numChannels in callback: %d\n", cst_wave_num_channels(data));
    printf("numSamples in callback: %d\n", cst_wave_num_samples(data));
    printf("sampleRate in callback: %d\n\n", cst_wave_sample_rate(data));

    if( framesLeft < framesPerBuffer )
    {
        /* final buffer... */
        for( i=0; i<framesLeft; i++ )
        {
            *wptr++ = *rptr++;  /* left */
            if( cst_wave_num_channels(data) == 2 ) *wptr++ = *rptr++;  /* right */
        }
        for( ; i<framesPerBuffer; i++ )
        {
            *wptr++ = 0;  /* left */
            if( cst_wave_num_channels(data) == 2) *wptr++ = 0;  /* right */
        }
        data->frameIndex += framesLeft;
        finished = paComplete;
    }
    else
    {
        for( i=0; i<framesPerBuffer; i++ )
        {
            *wptr++ = *rptr++;  /* left */
            if( cst_wave_num_channels(data) == 2 ) *wptr++ = *rptr++;  /* right */
        }
        cst_wave_set_frameIndex(data, framesPerBuffer);
        finished = paContinue;
    }
    return finished;
}

int play_wave(cst_wave *w)
{
    PaStream* stream;
    PaStreamParameters outputParameters;
    cst_wave_set_frameIndex(w, 0);
    cst_wave_set_maxFrameIndex(w, (cst_wave_num_samples(w) / cst_wave_sample_rate(w)) * cst_wave_num_channels(w) * sizeof(short));
    int err = 0;
    err = Pa_Initialize();
    outputParameters.device = Pa_GetDefaultOutputDevice();
    if (outputParameters.device == paNoDevice)
    {
        fprintf(stderr,"Error: No default output device.\n");
        return -5;
    }
    printf("frameIndex: %d\n", cst_wave_frameIndex(w));
    printf("maxFrameIndex: %d\n", cst_wave_maxFrameIndex(w));
    printf("numChannels: %d\n", cst_wave_num_channels(w));
    printf("numSamples: %d\n", cst_wave_num_samples(w));
    printf("sampleRate: %d\n", cst_wave_sample_rate(w));

    outputParameters.channelCount = cst_wave_num_channels(w);
    outputParameters.sampleFormat = paInt16;
    outputParameters.suggestedLatency = Pa_GetDeviceInfo( outputParameters.device )->defaultLowOutputLatency;
    outputParameters.hostApiSpecificStreamInfo = NULL;
    puts("=== Now playing back. ===");
    err = Pa_OpenStream(&stream,
                        NULL, /* no input */
                        &outputParameters,
                        cst_wave_sample_rate(w),
                        512,
                        paClipOff,
                        playCallback,
                        &w);
    if( stream )
    {
        err = Pa_StartStream( stream );
        if( err != paNoError ) goto done;

        puts("Waiting for playback to finish.");

        while((err = Pa_IsStreamActive(stream)) == 1) Pa_Sleep(100);
        if( err < 0 ) goto done;

        err = Pa_CloseStream( stream );
        if( err != paNoError ) goto done;

        puts("Done.");
    }
done:
    Pa_Terminate();
    free(cst_wave_samples(w));
}

因为它是相关的，所以我还稍微修改了cst_wave结构，cst_wave.h以便它包含我必须存储的数据，并#defines在已经存在的数据中添加一些：

typedef struct  cst_wave_struct {
    const char *type;
    int frameIndex;
    int maxFrameIndex;
    int sample_rate;
    int num_samples;
    int num_channels;
    short *samples;
} cst_wave;

#define cst_wave_num_samples(w) (w?w->num_samples:0)
#define cst_wave_num_channels(w) (w?w->num_channels:0)
#define cst_wave_sample_rate(w) (w?w->sample_rate:0)
#define cst_wave_samples(w) (w->samples)
#define cst_wave_frameIndex(w) (w->frameIndex)
#define cst_wave_maxFrameIndex(w) (w->maxFrameIndex)

#define cst_wave_set_num_samples(w,s) w->num_samples=s
#define cst_wave_set_num_channels(w,s) w->num_channels=s
#define cst_wave_set_sample_rate(w,s) w->sample_rate=s
#define cst_wave_set_frameIndex(w,s) w->frameIndex=s
#define cst_wave_set_maxFrameIndex(w,s) w->maxFrameIndex=s

更新 1：

遵循@Rohan 的建议现在给了我这个输出：

$ ./bin/flite -t "test"
frameIndex: 0
maxFrameIndex: 0
numChannels: 1
numSamples: 7225
sampleRate: 8000
=== Now playing back. ===
Waiting for playback to finish.
frameIndex in callback: 0
maxFrameIndex in callback: 0
numChannels in callback: 1
numSamples in callback: 7225
sampleRate in callback: 8000

Done.
flite(68929,0x7fff71c0d310) malloc: *** error for object 0x7fd6e2809800: pointer being freed was not allocated
*** set a breakpoint in malloc_error_break to debug
Abort trap: 6

为了解决这个问题，我删除了free(cst_wave_samples(w));. 现在程序正常执行，没有明显错误，但我的 Mac 上仍然没有音频输出。有什么建议么？

score 7 · Accepted Answer

在我看来，问题可能出在其他地方。

当所有的事情都说完了，你添加评论的例程真的很简单。它基本上只是将一个充满数据的缓冲区从一个地方复制到另一个地方，如果数据没有填满输入缓冲区，则零填充其余部分。如果我正在编写代码，我可能会按照以下一般思路做更多的事情：

const unsigned frame_size = sizeof(short) * data->num_channels;    

char *source = &data->samples[data->frameIndex * data->num_channels];
char *dest = outputBuffer;

unsigned framesLeft = data->maxFrameIndex - data->frameIndex;
unsigned framesEmpty = framesPerBuffer - framesLeft;

memcpy(source, dest, framesLeft * frame_size);
memset(dest+framesLeft * frame_size, 0, framesEmpty * frame_size);

data->frameIndex += framesPerBuffer;

尽管写得相当笨拙，但如果需要填充的大小为零，则问题中的if/else将完全跳过该部分。memset

因此，这会将一个充满数据的缓冲区从一个地方复制到另一个地方，并将任何剩余部分填充为零。如果您遇到段错误，那么分配目标缓冲区的任何内容显然都没有在那里分配足够的空间。如果不做一些观察，就不可能猜测分配是否发生在Pa_Initialize, Pa_OpenStream,Pa_StartStream或其他地方——而且很可能你不太关心实际执行分配的代码而不是计算要分配多少空间的代码（这可能处于上述之一，或完全在其他地方）。

score 5 · Accepted Answer

在您的play_wave函数中，您正在调用：

err = Pa_OpenStream(&stream,
                    NULL, /* no input */
                    &outputParameters,
                    cst_wave_sample_rate(w),
                    512,
                    paClipOff,
                    playCallback,
                    &w);

这里作为你传递的最后一个参数&w，所以你传递cst_wave **的w是定义为cst_wave *w。

但是在playCallback()你使用它作为

cst_wave *data = (cst_wave*)userData;

因此，在此功能中，您错误地访问cst_wave **为cst_wave *. 因此，在某些时候，您将在使用w.

frameIndex, maxFrameIndex此外，这也是您的输出显示的其他参数（例如等）输出不正确的原因。

解决方案只是传递w给Pa_OpenStream()函数而不是&w.

您的下一个问题是您的设置不maxFrameIndex正确。正如您在评论中所说，这不应该是0. 为了正确设置它，你应该有这样的东西：

cst_wave_set_maxFrameIndex(w, cst_wave_num_samples(w) * cst_wave_num_channels(w));

最后，看起来你的回调可能有点搞砸了。这是一种更好，更有效的编写方法：

static int playCallback( const void *inputBuffer, void *outputBuffer,
                        unsigned long framesPerBuffer,
                        const PaStreamCallbackTimeInfo* timeInfo,
                        PaStreamCallbackFlags statusFlags,
                        void *userData )
{
    cst_wave *data = (cst_wave*)userData;
    short *rptr = &data->samples[data->frameIndex * data->num_channels];
    short *wptr = (short*)outputBuffer;
    int finished;
    unsigned int framesLeft = data->maxFrameIndex - data->frameIndex;

    (void) inputBuffer; /* Prevent unused variable warnings. */
    (void) timeInfo;
    (void) statusFlags;
    (void) userData;

    if( framesLeft < framesPerBuffer )
    {
        /* final buffer... */
        memcpy(wptr, rptr, sizeof(*wptr) * data->num_channels * framesLeft);
        memset(wptr, sizeof(*wptr) * data->num_channels * framesPerBuffer, 0);
        data->frameIndex += framesLeft;
        finished = paComplete;
    }
    else
    {
        memcpy(wptr, rptr, sizeof(*wptr) * data->num_channels * framesPerBuffer);
        data->frameIndex += framesPerBuffer;
        finished = paContinue;
    }
    return finished;
}

score 3 · Accepted Answer

You're in luck. I was able to compile both PortAudio and flite on my own Mac, and solve your problem.

You have several issues other than those mentioned before, all of which I have addressed in the code dump below.

Minor: You don't use consistently your own API for cst_wave.
Minor: I prefer to enclose my while and if blocks with {} always. This has a habit of preventing mysterious bugs.
Max Frames was being set to zero. That's because in (cst_wave_num_samples(w) / cst_wave_sample_rate(w)) * cst_wave_num_channels(w) * sizeof(short), you were dividing by the sample rate, which was greater than your number of samples. Given that integer division is left-associative and truncating, yadda yadda yadda zero.
Max Frames is still wrong, as a frame includes all channel samples. The number of frames is thus agnostic to both number of channels and the size of the samples themselves. Allowing myself to guess that flite misuses sample to mean frame, your max frame index is just cst_wave_num_samples(w). Else it will be cst_wave_num_samples(w) / cst_wave_num_channels(w).
PortAudio's documentation states you should call Pa_StopStream(stream) after the stream becomes inactive, whether or not you were waiting until it became so.
I simplified the callback, and corrected it for
- Minor: Consistent use of your API
- MAJOR: Ehm... cst_wave_set_frameIndex(data, framesPerBuffer); is definitely wrong. You're pinning yourself at frame index 512 instead of incrementing! That's because you asked for 512 frames per buffer when opening the stream and you're not incrementing the frame index by framesPerBuffer, you're setting the frame index to framesPerBuffer. You hadn't made it that far because your maxFrameIndex was 0 anyways so you were exiting. I fixed it so that the frame index increments - with your API of course.

Here is the code, which I took the freedom of documenting and cleaning until it approached my standards of elegance. Enjoy!

#include <stdio.h>
#include <string.h>

/**
 * Audio play callback.
 * 
 * Follows the PaStreamCallback signature, wherein:
 * 
 * @param input   and
 * @param output  are either arrays of interleaved samples or; if
 *                non-interleaved samples were requested using the
 *                paNonInterleaved sample format flag, an array of buffer
 *                pointers, one non-interleaved buffer for each channel.
 * @param frameCount    The number of sample frames to be processed by the
 *                      stream callback.
 * @param timeInfo      Timestamps indicating the ADC capture time of the first
 *                      sample in the input buffer, the DAC output time of the
 *                      first sample in the output buffer and the time the
 *                      callback was invoked. See PaStreamCallbackTimeInfo and
 *                      Pa_GetStreamTime()
 * @param statusFlags   Flags indicating whether input and/or output buffers
 *                      have been inserted or will be dropped to overcome
 *                      underflow or overflow conditions.
 * @param userData      The value of a user supplied pointer passed to
 *                      Pa_OpenStream() intended for storing synthesis data
 *                      etc.
 */

static int  playCallback(const void*                     inputBuffer,
                         void*                           outputBuffer,
                         unsigned long                   framesPerBuffer,
                         const PaStreamCallbackTimeInfo* timeInfo,
                         PaStreamCallbackFlags           statusFlags,
                         void*                           userData){
    (void) inputBuffer; /* Prevent unused variable warnings. */
    (void) timeInfo;
    (void) statusFlags;
    (void) userData;


    /**
     * Compute current processing state.
     */

    cst_wave*    data;
    short*       rptr;
    short*       wptr;
    unsigned int framesLeft, /* Number of frames of data remaining within the stream ***as a whole*** */
                 frames,     /* Number of frames of data to be written for this buffer. */
                 framesPad,  /* Number of frames of padding required within the final buffer. */
                 samples,    /* Number of samples of data to be written for this buffer. */
                 samplesPad, /* Number of samples of padding required within the final buffer. */
                 numBytes,   /* Number of bytes of data to be written for this buffer. */
                 numBytesPad;/* Number of bytes of padding required within the final buffer. */
    int          finalBuffer;/* Stores whether or not this is the final buffer. */


    data         = (cst_wave*)userData;
    rptr         = &data->samples[cst_wave_frameIndex  (data) *
                                  cst_wave_num_channels(data)];
    wptr         = (short*)outputBuffer;
    framesLeft   = cst_wave_maxFrameIndex(data) - cst_wave_frameIndex(data);
    finalBuffer  = framesLeft      <= framesPerBuffer;
    frames       = finalBuffer     ?  framesLeft     : framesPerBuffer;
    framesPad    = framesPerBuffer -  frames;
    samples      = frames     * cst_wave_num_channels(data);
    samplesPad   = framesPad  * cst_wave_num_channels(data);
    numBytes     = samples    * sizeof(short);
    numBytesPad  = samplesPad * sizeof(short);


    /**
     * Debug code. Comment out in production.
     */

    printf("framesLeft in callback: %u\n", framesLeft);
    printf("framesPerBuffer in callback: %lu\n", framesPerBuffer);
    printf("frames in callback: %u\n", frames);

    printf("frameIndex in callback: %d\n", cst_wave_frameIndex(data));
    printf("maxFrameIndex in callback: %d\n", cst_wave_maxFrameIndex(data));
    printf("numChannels in callback: %d\n", cst_wave_num_channels(data));
    printf("numSamples in callback: %d\n", cst_wave_num_samples(data));
    printf("sampleRate in callback: %d\n\n", cst_wave_sample_rate(data));


    /**
     * Output data. We handle the final buffer specially, padding it with zeros.
     */

    memcpy(wptr, rptr, numBytes);
    wptr += samples;
    rptr += samples;
    cst_wave_set_frameIndex(data, cst_wave_frameIndex(data) + frames);
    memset(wptr, 0, numBytesPad);
    wptr += samplesPad;
    rptr += samplesPad;


    /**
     * Return a completion or continue code depending on whether this was the
     * final buffer or not respectively.
     */

    return finalBuffer ? paComplete : paContinue;
}

/**
 * Play wave function.
 * 
 * Plays the given cst_wave data as audio, blocking until this is done.
 */

int play_wave(cst_wave *w){
    PaStream*          stream;
    PaStreamParameters outputParameters;
    int                err;

    /**
     * Initialize custom fields in cst_wave struct.
     */

    cst_wave_set_frameIndex(w, 0);
    cst_wave_set_maxFrameIndex(w, (cst_wave_num_samples(w)));
    // / cst_wave_sample_rate(w)  * cst_wave_num_channels(w) * sizeof(short)


    /**
     * Initialize Port Audio device and stream parameters.
     */

    err = Pa_Initialize();
    outputParameters.device = Pa_GetDefaultOutputDevice();
    if (outputParameters.device == paNoDevice){
        fprintf(stderr,"Error: No default output device.\n");
        return -5;
    }

    printf("frameIndex: %d\n", cst_wave_frameIndex(w));
    printf("maxFrameIndex: %d\n", cst_wave_maxFrameIndex(w));
    printf("numChannels: %d\n", cst_wave_num_channels(w));
    printf("numSamples: %d\n", cst_wave_num_samples(w));
    printf("sampleRate: %d\n", cst_wave_sample_rate(w));

    outputParameters.channelCount = cst_wave_num_channels(w);
    outputParameters.sampleFormat = paInt16;
    outputParameters.suggestedLatency = Pa_GetDeviceInfo( outputParameters.device )->defaultLowOutputLatency;
    outputParameters.hostApiSpecificStreamInfo = NULL;


    /**
     * Open the stream for playback.
     */

    puts("=== Now playing back. ===");
    err = Pa_OpenStream(&stream,
                        NULL, /* no input */
                        &outputParameters,
                        cst_wave_sample_rate(w),
                        512,
                        paClipOff,
                        playCallback,
                        w);

    if(stream){
        /**
         * Start the stream.
         */

        err = Pa_StartStream(stream);
        if(err != paNoError){
            goto done;
        }

        /**
         * Block while it plays.
         */

        puts("Waiting for playback to finish.");
        while((err = Pa_IsStreamActive(stream)) == 1){
            Pa_Sleep(100);
        }
        if(err < 0){
            goto done;
        }


        /**
         * Stop and close the stream. Both are necessary.
         */

        Pa_StopStream(stream);
        err = Pa_CloseStream(stream);
        if(err != paNoError){
            goto done;
        }
        puts("Done.");
    }

    /**
     * Terminate and leave.
     */
done:
    Pa_Terminate();
    return 0;
}

c - 使用 PortAudio 让 flite 输出音频

更新 1：

3 回答 3

Related

Reference