我从网络摄像头读取 h.264 帧并从麦克风捕获音频。我需要将实时视频流式传输到 ffserver。在调试期间,我使用 ffmpeg 使用以下命令从 ffserver 读取视频:
ffmpeg -i http://127.0.0.1:12345/robot.avi -vcodec copy -acodec copy out.avi
我的输出文件中的视频略微加速。如果我添加一个音频流,它会加速几次。有时输出文件中没有音频。
这是我的音频编码代码:
#include "v_audio_encoder.h"
extern "C" {
#include <libavcodec/avcodec.h>
}
#include <cassert>
struct VAudioEncoder::Private
{
AVCodec *m_codec;
AVCodecContext *m_context;
std::vector<uint8_t> m_outBuffer;
};
VAudioEncoder::VAudioEncoder( int sampleRate, int bitRate )
{
d = new Private( );
d->m_codec = avcodec_find_encoder( CODEC_ID_MP3 );
assert( d->m_codec );
d->m_context = avcodec_alloc_context3( d->m_codec );
// put sample parameters
d->m_context->channels = 2;
d->m_context->bit_rate = bitRate;
d->m_context->sample_rate = sampleRate;
d->m_context->sample_fmt = AV_SAMPLE_FMT_S16;
strcpy( d->m_context->codec_name, "libmp3lame" );
// open it
int res = avcodec_open2( d->m_context, d->m_codec, 0 );
assert( res >= 0 );
d->m_outBuffer.resize( d->m_context->frame_size );
}
VAudioEncoder::~VAudioEncoder( )
{
avcodec_close( d->m_context );
av_free( d->m_context );
delete d;
}
void VAudioEncoder::encode( const std::vector<uint32_t>& samples, std::vector<uint8_t>& outbuf )
{
assert( (int)samples.size( ) == d->m_context->frame_size );
int outSize = avcodec_encode_audio( d->m_context, d->m_outBuffer.data( ),
d->m_outBuffer.size( ), reinterpret_cast<const short*>( samples.data( ) ) );
if( outSize ) {
outbuf.resize( outSize );
memcpy( outbuf.data( ), d->m_outBuffer.data( ), outSize );
}
else
outbuf.clear( );
}
int VAudioEncoder::getFrameSize( ) const
{
return d->m_context->frame_size;
}
这是我的流式视频代码:
#include "v_out_video_stream.h"
extern "C" {
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libavutil/avstring.h>
#include <libavformat/avio.h>
}
#include <stdexcept>
#include <cassert>
struct VStatticRegistrar
{
VStatticRegistrar( )
{
av_register_all( );
avformat_network_init( );
}
};
VStatticRegistrar __registrar;
struct VOutVideoStream::Private
{
AVFormatContext * m_context;
int m_videoStreamIndex;
int m_audioStreamIndex;
int m_videoBitrate;
int m_width;
int m_height;
int m_fps;
int m_bitrate;
bool m_waitKeyFrame;
};
VOutVideoStream::VOutVideoStream( int width, int height, int fps, int bitrate )
{
d = new Private( );
d->m_width = width;
d->m_height = height;
d->m_fps = fps;
d->m_context = 0;
d->m_videoStreamIndex = -1;
d->m_audioStreamIndex = -1;
d->m_bitrate = bitrate;
d->m_waitKeyFrame = true;
}
bool VOutVideoStream::connectToServer( const std::string& uri )
{
assert( ! d->m_context );
// initalize the AV context
d->m_context = avformat_alloc_context();
if( !d->m_context )
return false;
// get the output format
d->m_context->oformat = av_guess_format( "ffm", NULL, NULL );
if( ! d->m_context->oformat )
return false;
strcpy( d->m_context->filename, uri.c_str( ) );
// add an H.264 stream
AVStream *stream = avformat_new_stream( d->m_context, NULL );
if ( ! stream )
return false;
// initalize codec
AVCodecContext* codec = stream->codec;
if( d->m_context->oformat->flags & AVFMT_GLOBALHEADER )
codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
codec->codec_id = CODEC_ID_H264;
codec->codec_type = AVMEDIA_TYPE_VIDEO;
strcpy( codec->codec_name, "libx264" );
// codec->codec_tag = ( unsigned('4') << 24 ) + (unsigned('6') << 16 ) + ( unsigned('2') << 8 ) + 'H';
codec->width = d->m_width;
codec->height = d->m_height;
codec->time_base.den = d->m_fps;
codec->time_base.num = 1;
codec->bit_rate = d->m_bitrate;
d->m_videoStreamIndex = stream->index;
// add an MP3 stream
stream = avformat_new_stream( d->m_context, NULL );
if ( ! stream )
return false;
// initalize codec
codec = stream->codec;
if( d->m_context->oformat->flags & AVFMT_GLOBALHEADER )
codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
codec->codec_id = CODEC_ID_MP3;
codec->codec_type = AVMEDIA_TYPE_AUDIO;
strcpy( codec->codec_name, "libmp3lame" );
codec->sample_fmt = AV_SAMPLE_FMT_S16;
codec->channels = 2;
codec->bit_rate = 64000;
codec->sample_rate = 44100;
d->m_audioStreamIndex = stream->index;
// try to open the stream
if( avio_open( &d->m_context->pb, d->m_context->filename, AVIO_FLAG_WRITE ) < 0 )
return false;
// write the header
return avformat_write_header( d->m_context, NULL ) == 0;
}
void VOutVideoStream::disconnect( )
{
assert( d->m_context );
avio_close( d->m_context->pb );
avformat_free_context( d->m_context );
d->m_context = 0;
}
VOutVideoStream::~VOutVideoStream( )
{
if( d->m_context )
disconnect( );
delete d;
}
int VOutVideoStream::getVopType( const std::vector<uint8_t>& image )
{
if( image.size( ) < 6 )
return -1;
unsigned char *b = (unsigned char*)image.data( );
// Verify NAL marker
if( b[ 0 ] || b[ 1 ] || 0x01 != b[ 2 ] ) {
++b;
if ( b[ 0 ] || b[ 1 ] || 0x01 != b[ 2 ] )
return -1;
}
b += 3;
// Verify VOP id
if( 0xb6 == *b ) {
++b;
return ( *b & 0xc0 ) >> 6;
}
switch( *b ) {
case 0x65: return 0;
case 0x61: return 1;
case 0x01: return 2;
}
return -1;
}
bool VOutVideoStream::sendVideoFrame( std::vector<uint8_t>& image )
{
// Init packet
AVPacket pkt;
av_init_packet( &pkt );
pkt.flags |= ( 0 >= getVopType( image ) ) ? AV_PKT_FLAG_KEY : 0;
// Wait for key frame
if ( d->m_waitKeyFrame ) {
if( pkt.flags & AV_PKT_FLAG_KEY )
d->m_waitKeyFrame = false;
else
return true;
}
pkt.stream_index = d->m_videoStreamIndex;
pkt.data = image.data( );
pkt.size = image.size( );
pkt.pts = pkt.dts = AV_NOPTS_VALUE;
return av_write_frame( d->m_context, &pkt ) >= 0;
}
bool VOutVideoStream::sendAudioFrame( std::vector<uint8_t>& audio )
{
// Init packet
AVPacket pkt;
av_init_packet( &pkt );
pkt.stream_index = d->m_audioStreamIndex;
pkt.data = audio.data( );
pkt.size = audio.size( );
pkt.pts = pkt.dts = AV_NOPTS_VALUE;
return av_write_frame( d->m_context, &pkt ) >= 0;
}
这是我的使用方法:
BOOST_AUTO_TEST_CASE(testSendingVideo)
{
const int framesToGrab = 90000;
VOutVideoStream stream( VIDEO_WIDTH, VIDEO_HEIGHT, FPS, VIDEO_BITRATE );
if( stream.connectToServer( URI ) ) {
VAudioEncoder audioEncoder( AUDIO_SAMPLE_RATE, AUDIO_BIT_RATE );
VAudioCapture microphone( MICROPHONE_NAME, AUDIO_SAMPLE_RATE, audioEncoder.getFrameSize( ) );
VLogitecCamera camera( VIDEO_WIDTH, VIDEO_HEIGHT );
BOOST_REQUIRE( camera.open( CAMERA_PORT ) );
BOOST_REQUIRE( camera.startCapturing( ) );
std::vector<uint8_t> image, encodedAudio;
std::vector<uint32_t> voice;
boost::system_time startTime;
int delta;
for( int i = 0; i < framesToGrab; ++i ) {
startTime = boost::posix_time::microsec_clock::universal_time( );
BOOST_REQUIRE( camera.read( image ) );
BOOST_REQUIRE( microphone.read( voice ) );
audioEncoder.encode( voice, encodedAudio );
BOOST_REQUIRE( stream.sendVideoFrame( image ) );
BOOST_REQUIRE( stream.sendAudioFrame( encodedAudio ) );
delta = ( boost::posix_time::microsec_clock::universal_time( ) - startTime ).total_milliseconds( );
if( delta < 1000 / FPS )
boost::thread::sleep( startTime + boost::posix_time::milliseconds( 1000 / FPS - delta ) );
}
BOOST_REQUIRE( camera.stopCapturing( ) );
BOOST_REQUIRE( camera.close( ) );
}
else
std::cout << "failed to connect to server" << std::endl;
}
我认为我的问题在于 PTS 和 DTS。谁能帮我?