4

我从网络摄像头读取 h.264 帧并从麦克风捕获音频。我需要将实时视频流式传输到 ffserver。在调试期间,我使用 ffmpeg 使用以下命令从 ffserver 读取视频:

ffmpeg -i http://127.0.0.1:12345/robot.avi -vcodec copy -acodec copy out.avi

我的输出文件中的视频略微加速。如果我添加一个音频流,它会加速几次。有时输出文件中没有音频。

这是我的音频编码代码:

#include "v_audio_encoder.h"

extern "C" {
#include <libavcodec/avcodec.h>
}
#include <cassert>

struct VAudioEncoder::Private
{
    AVCodec *m_codec;
    AVCodecContext *m_context;

    std::vector<uint8_t> m_outBuffer;
};

VAudioEncoder::VAudioEncoder( int sampleRate, int bitRate )
{
    d = new Private( );
    d->m_codec = avcodec_find_encoder( CODEC_ID_MP3 );
    assert( d->m_codec );
    d->m_context = avcodec_alloc_context3( d->m_codec );

    // put sample parameters
    d->m_context->channels = 2;
    d->m_context->bit_rate = bitRate;
    d->m_context->sample_rate = sampleRate;
    d->m_context->sample_fmt = AV_SAMPLE_FMT_S16;
    strcpy( d->m_context->codec_name, "libmp3lame" );

    // open it
    int res = avcodec_open2( d->m_context, d->m_codec, 0 );
    assert( res >= 0 );

    d->m_outBuffer.resize( d->m_context->frame_size );
}

VAudioEncoder::~VAudioEncoder( )
{
    avcodec_close( d->m_context );
    av_free( d->m_context );
    delete d;
}

void VAudioEncoder::encode( const std::vector<uint32_t>& samples, std::vector<uint8_t>& outbuf )
{
    assert( (int)samples.size( ) == d->m_context->frame_size );

    int outSize = avcodec_encode_audio( d->m_context, d->m_outBuffer.data( ),
                                        d->m_outBuffer.size( ), reinterpret_cast<const short*>( samples.data( ) ) );
    if( outSize ) {
        outbuf.resize( outSize );
        memcpy( outbuf.data( ), d->m_outBuffer.data( ), outSize );
    }
    else
        outbuf.clear( );
}

int VAudioEncoder::getFrameSize( ) const
{
    return d->m_context->frame_size;
}

这是我的流式视频代码:

#include "v_out_video_stream.h"

extern "C" {
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libavutil/avstring.h>
#include <libavformat/avio.h>
}

#include <stdexcept>
#include <cassert>

struct VStatticRegistrar
{
    VStatticRegistrar( )
    {
        av_register_all( );
        avformat_network_init( );
    }
};

VStatticRegistrar __registrar;

struct VOutVideoStream::Private
{
    AVFormatContext * m_context;
    int m_videoStreamIndex;
    int m_audioStreamIndex;

    int m_videoBitrate;
    int m_width;
    int m_height;
    int m_fps;
    int m_bitrate;

    bool m_waitKeyFrame;
};

VOutVideoStream::VOutVideoStream( int width, int height, int fps, int bitrate )
{
    d = new Private( );
    d->m_width = width;
    d->m_height = height;
    d->m_fps = fps;
    d->m_context = 0;
    d->m_videoStreamIndex = -1;
    d->m_audioStreamIndex = -1;
    d->m_bitrate = bitrate;
    d->m_waitKeyFrame = true;
}

bool VOutVideoStream::connectToServer( const std::string& uri )
{
    assert( ! d->m_context );

    // initalize the AV context
    d->m_context = avformat_alloc_context();
    if( !d->m_context )
        return false;
    // get the output format
    d->m_context->oformat = av_guess_format( "ffm", NULL, NULL );
    if( ! d->m_context->oformat )
        return false;

    strcpy( d->m_context->filename, uri.c_str( ) );

    // add an H.264 stream
    AVStream *stream = avformat_new_stream( d->m_context, NULL );
    if ( ! stream )
        return false;
    // initalize codec
    AVCodecContext* codec = stream->codec;
    if( d->m_context->oformat->flags & AVFMT_GLOBALHEADER )
        codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
    codec->codec_id = CODEC_ID_H264;
    codec->codec_type = AVMEDIA_TYPE_VIDEO;
    strcpy( codec->codec_name, "libx264" );
//    codec->codec_tag = ( unsigned('4') << 24 ) + (unsigned('6') << 16 ) + ( unsigned('2') << 8 ) + 'H';
    codec->width = d->m_width;
    codec->height = d->m_height;
    codec->time_base.den = d->m_fps;
    codec->time_base.num = 1;
    codec->bit_rate = d->m_bitrate;
    d->m_videoStreamIndex = stream->index;

    // add an MP3 stream
    stream = avformat_new_stream( d->m_context, NULL );
    if ( ! stream )
        return false;
    // initalize codec
    codec = stream->codec;
    if( d->m_context->oformat->flags & AVFMT_GLOBALHEADER )
        codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
    codec->codec_id = CODEC_ID_MP3;
    codec->codec_type = AVMEDIA_TYPE_AUDIO;
    strcpy( codec->codec_name, "libmp3lame" );
    codec->sample_fmt = AV_SAMPLE_FMT_S16;
    codec->channels = 2;
    codec->bit_rate = 64000;
    codec->sample_rate = 44100;
    d->m_audioStreamIndex = stream->index;

    // try to open the stream
    if( avio_open( &d->m_context->pb, d->m_context->filename, AVIO_FLAG_WRITE ) < 0 )
         return false;

    // write the header
    return avformat_write_header( d->m_context, NULL ) == 0;
}

void VOutVideoStream::disconnect( )
{
    assert( d->m_context );

    avio_close( d->m_context->pb );
    avformat_free_context( d->m_context );
    d->m_context = 0;
}

VOutVideoStream::~VOutVideoStream( )
{
    if( d->m_context )
        disconnect( );
    delete d;
}

int VOutVideoStream::getVopType( const std::vector<uint8_t>& image )
{
    if( image.size( ) < 6 )
        return -1;
    unsigned char *b = (unsigned char*)image.data( );

    // Verify NAL marker
    if( b[ 0 ] || b[ 1 ] || 0x01 != b[ 2 ] ) {
        ++b;
        if ( b[ 0 ] || b[ 1 ] || 0x01 != b[ 2 ] )
            return -1;
    }

    b += 3;

    // Verify VOP id
    if( 0xb6 == *b ) {
        ++b;
        return ( *b & 0xc0 ) >> 6;
    }

    switch( *b ) {
    case 0x65: return 0;
    case 0x61: return 1;
    case 0x01: return 2;
    }

    return -1;
}

bool VOutVideoStream::sendVideoFrame( std::vector<uint8_t>& image )
{
    // Init packet
    AVPacket pkt;
    av_init_packet( &pkt );
    pkt.flags |= ( 0 >= getVopType( image ) ) ? AV_PKT_FLAG_KEY : 0;

    // Wait for key frame
    if ( d->m_waitKeyFrame ) {
        if( pkt.flags & AV_PKT_FLAG_KEY )
            d->m_waitKeyFrame = false;
        else
            return true;
    }

    pkt.stream_index = d->m_videoStreamIndex;
    pkt.data = image.data( );
    pkt.size = image.size( );
    pkt.pts = pkt.dts = AV_NOPTS_VALUE;

    return av_write_frame( d->m_context, &pkt ) >= 0;
}

bool VOutVideoStream::sendAudioFrame( std::vector<uint8_t>& audio )
{
    // Init packet
    AVPacket pkt;
    av_init_packet( &pkt );
    pkt.stream_index = d->m_audioStreamIndex;
    pkt.data = audio.data( );
    pkt.size = audio.size( );
    pkt.pts = pkt.dts = AV_NOPTS_VALUE;

    return av_write_frame( d->m_context, &pkt ) >= 0;
}

这是我的使用方法:

BOOST_AUTO_TEST_CASE(testSendingVideo)
{
    const int framesToGrab = 90000;

    VOutVideoStream stream( VIDEO_WIDTH, VIDEO_HEIGHT, FPS, VIDEO_BITRATE );
    if( stream.connectToServer( URI ) ) {
        VAudioEncoder audioEncoder( AUDIO_SAMPLE_RATE, AUDIO_BIT_RATE );
        VAudioCapture microphone( MICROPHONE_NAME, AUDIO_SAMPLE_RATE, audioEncoder.getFrameSize( ) );

        VLogitecCamera camera( VIDEO_WIDTH, VIDEO_HEIGHT );
        BOOST_REQUIRE( camera.open( CAMERA_PORT ) );
        BOOST_REQUIRE( camera.startCapturing( ) );

        std::vector<uint8_t> image, encodedAudio;
        std::vector<uint32_t> voice;
        boost::system_time startTime;
        int delta;
        for( int i = 0; i < framesToGrab; ++i ) {
            startTime = boost::posix_time::microsec_clock::universal_time( );

            BOOST_REQUIRE( camera.read( image ) );
            BOOST_REQUIRE( microphone.read( voice ) );
            audioEncoder.encode( voice, encodedAudio );

            BOOST_REQUIRE( stream.sendVideoFrame( image ) );
            BOOST_REQUIRE( stream.sendAudioFrame( encodedAudio ) );

            delta = ( boost::posix_time::microsec_clock::universal_time( ) - startTime ).total_milliseconds( );
            if( delta < 1000 / FPS )
                boost::thread::sleep( startTime + boost::posix_time::milliseconds( 1000 / FPS - delta ) );
        }

        BOOST_REQUIRE( camera.stopCapturing( ) );
        BOOST_REQUIRE( camera.close( ) );
    }
    else
        std::cout << "failed to connect to server" << std::endl;
}

我认为我的问题在于 PTS 和 DTS。谁能帮我?

4

0 回答 0