c++ - 使用多个输入时无法从 BufferSink 检索有效的过滤帧

Question

我已经在这个问题上苦苦挣扎了大约 2 周。

所以，这就是我想要做的：

从两个输入源读取（在本例中为从 ffmpeg 命令行发送的 2 个 udp url 流）
应用过滤器（特别是这个[in_0]scale=800x600[src1];[src1][in_1]overlay[out]- 如果你看看我如何初始化我的过滤器，你会明白我为什么使用名称in_0和in_1）
将数据包解码为各自的帧
使用av_buffersrc_add_frame()andav_buffersink_get_frame()函数检索最终的“过滤”帧以供以后使用。

当我尝试调用该av_buffersink_get_frame()函数来检索“过滤”帧时，我不断得到一个返回值 -11 ( AVERROR(EAGAIN))。根据文档，当发生这种情况时，“必须将更多输入帧添加到过滤图以获得更多输出”。我认为这意味着我必须重新开始将数据包解码为帧并av_buffersrc_add_frame()再次调用的过程av_buffersink_get_frame()。但是当我这样做时，结果似乎永远不会改变。我不断得到-11的结果。

奇怪的是，当我只提供一个输入源而不是两个，并提供适当的过滤器字符串时，我没有问题。“过滤”框架是有效的，我可以很好地使用它......

这是我的所有代码（使用 ffmpeg v4.3.2）

功能

typedef struct VideoContext
{
    AVFormatContext* FormatCtx;
    int StreamIndex;
    AVCodecContext* CodecCtx;
    AVPacket Packet;
    AVFrame* Frame;
    AVFilterContext* BufferSrcCtx;

    VideoContext() :
        FormatCtx(nullptr),
        StreamIndex(-1),
        CodecCtx(nullptr),
        Packet(),
        Frame(nullptr),
        BufferSrcCtx(nullptr),
        VidBuffer(nullptr)
    {
    }
} VideoContext;

typedef struct UrlInput
{
    std::string UrlStr;
    DataTypes::VideoContext VidCtx;
} UrlInput;

void CleanupVideoContext(DataTypes::VideoContext& vidCtx)
{
    if (vidCtx.BufferSrcCtx)
    {
        avfilter_free(vidCtx.BufferSrcCtx);
        vidCtx.BufferSrcCtx = NULL;
    }

    if (vidCtx.Frame)
    {
        av_frame_free(&vidCtx.Frame);
        vidCtx.Frame = NULL;
    }

    if (vidCtx.CodecCtx)
    {
        avcodec_free_context(&vidCtx.CodecCtx);
        vidCtx.CodecCtx = NULL;
    }

    if (vidCtx.FormatCtx)
    {
        avformat_close_input(&vidCtx.FormatCtx);
        vidCtx.FormatCtx = NULL;
    }
}

void Cleanup(AVFilterGraph*& filterGraph, AVFilterContext*& buffersinkCtx,
    AVFrame*& filterFrame, std::vector<UrlInput>& inputs)
{
    for (int i = 0; i < inputs.size(); ++i)
    {
        CleanupVideoContext(inputs[i].VidCtx);
    }

    if (buffersinkCtx)
    {
        avfilter_free(buffersinkCtx);
        buffersinkCtx = NULL;
    }

    if (filterGraph)
    {
        avfilter_graph_free(&filterGraph);
        filterGraph = NULL;
    }

    if (filterFrame)
    {
        av_frame_free(&filterFrame);
        filterFrame = NULL;
    }
}

bool OpenUrlVideo(UrlInput& input)
{
    int attemptsLeft = 5;

    do
    {
        int result = avformat_open_input(&input.VidCtx.FormatCtx,
            input.UrlStr.c_str(), NULL, NULL);
        if (result < 0)
        {
            Utils::LogAVMessage(result, AV_LOG_ERROR,
                "Cannot open url...");
            CleanupVideoContext(input.VidCtx);
            continue;
        }

        result = avformat_find_stream_info(input.VidCtx.FormatCtx, NULL);
        if (result < 0)
        {
            Utils::LogAVMessage(result, AV_LOG_ERROR,
                "Cannot find stream information...");
            CleanupVideoContext(input.VidCtx);
            continue;
        }

        AVCodec* codec = nullptr;
        int streamIdx = av_find_best_stream(input.VidCtx.FormatCtx,
            AVMEDIA_TYPE_VIDEO, -1, -1, &codec, 0);
        const char* errMsg = "";
        if (!Utils::Ffmpeg::IsStreamIndexValid(streamIdx, errMsg))
        {
            Utils::LogAVMessage(errMsg, AV_LOG_ERROR);
            CleanupVideoContext(input.VidCtx);
            continue;
        }

        AVStream* stream = input.VidCtx.FormatCtx->streams[streamIdx];
        input.VidCtx.StreamIndex = streamIdx;

        if (!codec)
        {
            const AVCodec* tempCodec =
                avcodec_find_decoder(stream->codecpar->codec_id);
            codec = const_cast<AVCodec*>(tempCodec);
            if (!codec)
            {
                Utils::LogAVMessage("Could not find decoder...",
                    AV_LOG_ERROR);
                CleanupVideoContext(input.VidCtx);
                continue;
            }
        }

        input.VidCtx.CodecCtx = avcodec_alloc_context3(codec);
        if (!input.VidCtx.CodecCtx)
        {
            Utils::LogAVMessage("Could not create decoding context...",
                AV_LOG_ERROR);
            CleanupVideoContext(input.VidCtx);
            continue;
        }

        result = avcodec_parameters_to_context(input.VidCtx.CodecCtx,
            stream->codecpar);
        if (result < 0)
        {
            Utils::LogAVMessage(result, AV_LOG_ERROR,
                "Could not assign codec parameters...");
            CleanupVideoContext(input.VidCtx);
            continue;
        }

        result = avcodec_open2(input.VidCtx.CodecCtx, codec, NULL);
        if (result < 0)
        {
            Utils::LogAVMessage(result, AV_LOG_ERROR,
                "Cannot open video decoder...");
            CleanupVideoContext(input.VidCtx);
            continue;
        }

        if (input.VidCtx.CodecCtx->width == 0 ||
            input.VidCtx.CodecCtx->height == 0)
        {
            Utils::LogAVMessage("Codec Context's width and/or height
                is 0...", AV_LOG_ERROR);
            CleanupVideoContext(input.VidCtx);
            continue;
        }
        break;

    } while (--attemptsLeft > 0);
    
    return true;
}

inline void FreeFilterInOuts(AVFilterInOut** outs, AVFilterInOut** ins)
{
    if (outs)
    {
        avfilter_inout_free(outs);
        outs = NULL;
    }

    if (ins)
    {
        avfilter_inout_free(ins);
        ins = NULL;
    }
}

bool CreateFilterGraph(AVFilterGraph*& filterGraph,
    AVFilterContext*& buffersinkCtx, std::vector<UrlInput>& inputs,
    std::string filterStr)
{
    const AVFilter* buffsrc = avfilter_get_by_name("buffer");
    const AVFilter* buffsink = avfilter_get_by_name("buffersink");

    AVFilterInOut* filterOuts = avfilter_inout_alloc();
    AVFilterInOut* filterIns = avfilter_inout_alloc();

    int numOfInputs = static_cast<int>(inputs.size());

    std::vector<char*> buffsrcNames;
    char* buffsinkName = "";

    filterGraph = avfilter_graph_alloc();
    if (!filterGraph)
    {
        Utils::LogAVMessage("Could not allocate filter graph...",
            AV_LOG_ERROR);
        FreeFilterInOuts(&filterOuts, &filterIns);
        filterGraph = NULL;
        return false;
    }

    std::string tempFilterStr = "";

    std::vector<char[512]> argsList(numOfInputs);
    for (int i = 0; i < numOfInputs; ++i)
    {
        // Store empty string in each argsList item
        strcpy(argsList[i], "");

        AVStream* stream = inputs[i].VidCtx.FormatCtx->
            streams[inputs[i].VidCtx.StreamIndex];
        int width = inputs[i].VidCtx.CodecCtx->width;
        int height = inputs[i].VidCtx.CodecCtx->height;
        AVPixelFormat pixFmt = inputs[i].VidCtx.CodecCtx->pix_fmt;
        AVRational timeBase = stream->time_base;
        AVRational aspectRatio =
            inputs[i].VidCtx.CodecCtx->sample_aspect_ratio;

        // Define input properties
        std::snprintf(argsList[i], sizeof(argsList[i]),
            "buffer=video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d[in_%d]; ",
            width, height, pixFmt, timeBase.num, timeBase.den,
            aspectRatio.num, aspectRatio.den, i);

        tempFilterStr += argsList[i];
    }

    filterStr = tempFilterStr + filterStr + "; [out] buffersink";

    Utils::LogAVMessage((std::string("\nFull Filter String:\n\t") +
        filterStr + "\n").c_str(), AV_LOG_WARNING);

    int result = avfilter_graph_parse2(filterGraph, filterStr.c_str(),
        &filterIns, &filterOuts);
    if (result < 0)
    {
        Utils::LogAVMessage(result, AV_LOG_ERROR,
            "Could not parse filter graph...");
        FreeFilterInOuts(&filterOuts, &filterIns);
        avfilter_graph_free(&filterGraph);
        return false;
    }

    result = avfilter_graph_config(filterGraph, NULL);
    if (result < 0)
    {
        Utils::LogAVMessage(result, AV_LOG_ERROR,
            "Could not configure filter graph...");
        FreeFilterInOuts(&filterOuts, &filterIns);
        avfilter_graph_free(&filterGraph);
        return false;
    }

    for (uint32_t i = 0; i < filterGraph->nb_filters; ++i)
    {
        if (Utils::ContainsString(filterGraph->filters[i]->name,
            "buffer_"))
        {
            buffsrcNames.push_back(filterGraph->filters[i]->name);
        }

        if (Utils::ContainsString(filterGraph->filters[i]->name,
            "buffersink_"))
        {
            buffsinkName = filterGraph->filters[i]->name;
        }
    }

    for (int i = 0; i < numOfInputs; ++i)
    {
        // Get the Parsed_buffer_x inputs(s)
        inputs[i].VidCtx.BufferSrcCtx =
            avfilter_graph_get_filter(filterGraph, buffsrcNames[i]);
        if (!inputs[i].VidCtx.BufferSrcCtx)
        {
            Utils::LogAVMessage(
            "avfilter_graph_get_filter() returned a NULL bufersrc context",
            AV_LOG_ERROR);
            FreeFilterInOuts(&filterOuts, &filterIns);
            avfilter_graph_free(&filterGraph);
            return false;
        }
    }

    buffersinkCtx = avfilter_graph_get_filter(filterGraph, buffsinkName);
    if (!buffersinkCtx)
    {
        Utils::LogAVMessage(
        "avfilter_graph_get_filter() returned a NULL buffersink context",
        AV_LOG_ERROR);
        FreeFilterInOuts(&filterOuts, &filterIns);
        avfilter_graph_free(&filterGraph);
        return false;
    }

    FreeFilterInOuts(&filterOuts, &filterIns);
    return true;
}

bool DecodeAndAddFrameToBuffersrc(DataTypes::VideoContext& vidCtx)
{
    int result = 0;
    while (1)
    {
        result = av_read_frame(vidCtx.FormatCtx, &vidCtx.Packet);
        if (result < 0)
        {
            break;
        }

        if (vidCtx.Packet.stream_index == vidCtx.StreamIndex)
        {
            result = avcodec_send_packet(vidCtx.CodecCtx, &vidCtx.Packet);
            if (result < 0)
            {
                Utils::LogAVMessage(result, AV_LOG_ERROR,
                    "Error while sending a packet to the decoder...");
                break;
            }

            while (result >= 0)
            {
                result = avcodec_receive_frame(vidCtx.CodecCtx,
                    vidCtx.Frame);
                if (result == AVERROR(EAGAIN) || result == AVERROR_EOF)
                {
                    break;
                }
                else if (result < 0)
                {
                    Utils::LogAVMessage(result, AV_LOG_ERROR,
                    "Error while receiving a frame from the decoder...");
                    return false;
                }

                vidCtx.Frame->pts = vidCtx.Frame->best_effort_timestamp;

                // push the decoded frame into the filtergraph
                if (av_buffersrc_add_frame_flags(vidCtx.BufferSrcCtx,
                    vidCtx.Frame, AV_BUFFERSRC_FLAG_KEEP_REF) < 0)
                {
                    Utils::LogAVMessage(
                    "Error while feeding the filtergraph...",
                    AV_LOG_ERROR);
                    break;
                }

                return true;
            }
        }
        av_packet_unref(&vidCtx.Packet);
    }

    return false;
}

如何使用上述代码

int main(int argc, char** argv)
{
    ConfigManager configManager("ConfigFile.xml");

    if (!configManager.IsLoaded())
    {
        return false;
    }

    std::vector<DataTypes::InputConfig> inputConfigs =
        configManager.GetInputConfigs();
    DataTypes::FilterConfig filterConfig = configManager.GetFilterConfig();

    if (!filterConfig.IsEnabled)
    {
        Utils::LogAVMessage("Forgot to provide a filter...", AV_LOG_ERROR);
        Logger::DestroyInstance();
        return -1;
    }

    // Create list of VideoContexts for inputs
    std::vector<UrlInput> urlInputs;

    for (int i = 0; i < inputConfigs.size(); ++i)
    {
        UrlInput input;
        input.UrlStr = inputConfigs[i].Option.Url.UrlString;
        if (!OpenUrlVideo(input))
        {
            Utils::LogAVMessage("Failed to Open Url Video...",
                AV_LOG_ERROR);
            Logger::DestroyInstance();
            return -1;
        }

        input.VidCtx.Frame = av_frame_alloc();
        if (!input.VidCtx.Frame)
        {
            Utils::LogAVMessage("Failed to allocate frame...",
                AV_LOG_ERROR);
            Logger::DestroyInstance();
            return -1;
        }

        // According to the ffmpeg doxygen 4.1 filter_video.c example,
        //    the AVPacket doens't get initialized
        // ...

        Utils::LogAVMessage("\nSuccessfully Opened Url Video\n",
            AV_LOG_WARNING);

        urlInputs.push_back(input);
    }

    AVFilterGraph* filterGraph = nullptr;
    AVFilterContext* buffersinkCtx = nullptr;
    AVFrame* filterFrame = nullptr;

    // Create and populate filter graph and filter contexts
    if (!CreateFilterGraph(filterGraph, buffersinkCtx, urlInputs,
        filterConfig.FilterString))
    {
        Utils::LogAVMessage(
            "Failed to create filter graph and filter contexts...",
            AV_LOG_ERROR);
        Cleanup(filterGraph, buffersinkCtx, filterFrame, urlInputs);
        Logger::DestroyInstance();
        return -1;
    }

    Utils::LogAVMessage(
        "Successfully Created Filter Graph and Filter Contexts",
        AV_LOG_WARNING);

    // Initialize Filter Frame
    filterFrame = av_frame_alloc();
    if (!filterFrame)
    {
        Utils::LogAVMessage("Failed to allocate filter frame...",
            AV_LOG_ERROR);
        Cleanup(filterGraph, buffersinkCtx, filterFrame, urlInputs);
        Logger::DestroyInstance();
        return -1;
    }

    // Decode Packets to Frames, Add them to Buffersrc Contextx
    //      then Retrieve Filter Frame from Buffersink Context

    int attemptsLeft = 10;
    bool succeeded = false;

    int framesLeft = 100;
    bool gotFrame = false;

    do
    {
        for (int i = 0; i < urlInputs.size(); ++i)
        {
            if (!DecodeAndAddFrameToBuffersrc(urlInputs[i].VidCtx))
            {
                // May need to free some memory here...
                --attemptsLeft;
                succeeded = false;
                continue;
            }
        }

        int result = 0;
        // Retrieve Filter Frame from Buffersink Context
        while (framesLeft > 0)
        {
            result = av_buffersink_get_frame(buffersinkCtx, filterFrame);
            if (result == AVERROR(EAGAIN) || result == AVERROR_EOF)
            {
                if (gotFrame)
                {
                    --framesLeft;
                    // Prep for next iteration
                    gotFrame = false;
                }
                else
                {
                    Utils::LogAVMessage(
                        "FAILED TO RETRIEVE FILTER FRAME...",
                        AV_LOG_ERROR);
                    succeeded = false;
                }
                
                if (framesLeft <= 0)
                {
                    succeeded = true;
                }
                break;
            }
            
            if (result < 0)
            {
                succeeded = false;
                // Make sure we exit out of both loops
                attemptsLeft = 0;
                break;
            }

            // Display frame info
            std::string frameInfoStr = std::string("Frame width:") +
                std::to_string(filterFrame->width) +
                " Frame height:" + std::to_string(filterFrame->height) +
                " Frame Pixel Format:" +
                Utils::PixelFormatToString
                    static_cast<AVPixelFormat>(filterFrame->format)) +
                " Frame pts:" + std::to_string(filterFrame->pts) +
                " Frame dts:" + std::to_string(filterFrame->pkt_dts);

            Utils::LogAVMessage(frameInfoStr.c_str(), AV_LOG_WARNING);
            av_frame_unref(filterFrame);

            gotFrame = true;
        }
        for (int i = 0; i < urlInputs.size(); ++i)
        {
            av_frame_unref(urlInputs[i].VidCtx.Frame);
        }

    } while (attemptsLeft > 0 && !succeeded);


    if (succeeded)
    {
        Utils::LogAVMessage("Successfully retrieved filtered frame(s)...",
            AV_LOG_WARNING);
    }
    else
    {
        Utils::LogAVMessage("Failed to retrieve all filtered frames...",
            AV_LOG_ERROR);
    }

    Cleanup(filterGraph, buffersinkCtx, filterFrame, urlInputs);
    Logger::DestroyInstance();

    return 0;
}

avfilter_graph_parse2()我从这个 stackoverflow 问题中找到了如何使用该函数： Implementing a multiple input filter graph with the Libavfilter library in Android NDK

我发现了如何从 ffmpeg doxygen 示例中解码“过滤”帧： http ://www.ffmpeg.org/doxygen/4.1/filtering_video_8c-example.html

我为代码量道歉，但我想彻底解决这个问题。同样，当我调用该av_buffersink_get_frame()函数时，我总是得到 -11 ( AVERROR(EAGAIN)) 的结果。另外我没有提到在CreateFilters()函数中，当我完成调用avfilter_graph_config()函数时，过滤器的链接列表似乎由于某种原因没有相互链接......请让我知道我做错了什么可能导致这个问题。提前谢谢你的帮助。

更新：

我为上面的代码找到了一个临时的“修复”。事实证明，vidCtx.Frame->pts = vidCtx.Frame->best_effort_timestamp;代码导致了问题。当我大约同时启动两个 ffmpeg 命令行应用程序（发送 udp url 流）时，我没有遇到问题。但是当我间隔几分钟启动它们，然后运行我的应用程序时，我遇到的问题总是从av_buffersink_get_frame(). 我认为这意味着 ffmpeg api 如何尝试处理不同的时间戳存在问题。vidCtx.Frame->pts = vidCtx.Frame->best_effort_timestamp;所以，我用一个黑客来代替vidCtx.Frame->pts = ++pts;其中 pts 从 0 开始。这使应用程序可以正常运行，但我发现了一个问题。当传入的 udp url 来自“screen-grab”命令时，作为输出的一部分显示的视频部分将以非常慢的速度播放。在这一点上，我正在努力找出为什么“屏幕抓取”视频最终如此缓慢......如果有人以前看过这个或有任何建议，请帮忙。谢谢

c++ - 使用多个输入时无法从 BufferSink 检索有效的过滤帧

0 回答 0

Related

Reference