我正在学习如何从这个示例中创建 MP4 视频。问题是该示例演示了从动态生成的一些虚拟源数据进行音频编码。我需要对文件中的音频进行编码。我检查了许多示例,其中大多数显示相同或只是单独的音频编码。在我的试错过程中,我对音频和视频帧使用相同的 AVFormatContext。我不确定这样做是否正确,还是应该有 2 个单独的上下文?到目前为止,我的视频编码正常,但音频流失败因为 AVPacket 无法找到正确的音频流索引。这是我设置音频流的方法:
void open_audio(AVFormatContext *oc, AVCodec **codec, AVStream **st ,enum AVCodecID codec_id){
// AVCodecContext *c;
int ret;
// c = st->codec;
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",avcodec_get_name(codec_id));
}
/* open it */
if(avformat_open_input(&oc,_audioInName.c_str(),NULL,NULL) !=0){
Msg::PrintErrorMsg("Error opening audio file");
}
AVStream* audioStream = NULL;
// Find the audio stream (some container files can have multiple streams in them)
for (uint32_t i = 0; i < oc->nb_streams; ++i)
{
if (oc->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
{
audioStream = oc->streams[i];
break;
}
}
if (audioStream == NULL)
{
Msg::PrintErrorMsg("Could not find any audio stream in the file");
}
*st =audioStream;
AVCodecContext *c = audioStream->codec;
c->codec = *codec;//avcodec_find_decoder(c->codec_id);
audioStream->id = 1;
c->sample_fmt = AV_SAMPLE_FMT_S16;
c->bit_rate = 64000;
c->sample_rate = 44100;
c->channels = 1;
if (oc->oformat->flags & AVFMT_GLOBALHEADER){
c->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
if (c->codec == NULL)
{
Msg::PrintErrorMsg("Couldn't find a proper decoder");
}
ret = avcodec_open2(c, *codec, NULL);
if (ret < 0) {
Msg::PrintErrorMsg("Could not open audio codec\n");
}
}
这里的“oc”也是用于初始化视频流的相同上下文。
然后我试图写这样的音频帧:
void write_audio_frame(AVFormatContext *oc, AVStream *st){
AVCodecContext *c;
AVPacket pkt = { 0 }; // data and size must be 0;
AVFrame *frame = avcodec_alloc_frame();
int got_packet, ret;
av_init_packet(&pkt);
c = st->codec;
/////
// get_audio_frame(samples, audio_input_frame_size, c->channels);
////Read the packet:
while(av_read_frame(oc,&pkt) == 0 ){
if(pkt.stream_index ==st->index){
// Try to decode the packet into a frame
int frameFinished = 0;
avcodec_decode_audio4(c, frame, &frameFinished, &pkt);
// Some frames rely on multiple packets, so we have to make sure the frame is finished before
// we can use it
if (frameFinished){
assert(frameFinished);
ret = avcodec_encode_audio2(c, &pkt, frame, &got_packet);
if (ret < 0) {
Msg::PrintErrorMsg("Error encoding audio frame\n");
}
if (!got_packet){
printf("failed to aquire packet");
}
pkt.stream_index = st->index;
/* Write the compressed frame to the media file. */
ret = av_interleaved_write_frame(oc, &pkt);
if (ret != 0) {
Msg::PrintErrorMsg("Error while writing audio frame.");
}
}
}
}
}
av_free_packet(&pkt);
avcodec_free_frame(&frame);
}
问题是我从来没有通过这个语句:“if(pkt.stream_index ==st->index)”。数据包流索引永远不会等于音频流索引。任何人都可以指出我错在哪里?
更新:
我确实设法打开输入音频流进行编码,但我无法将音频和视频流编码为单个输出。从我看到的 PTS 和 DTS 可能是问题的根源。目前我根据 muxing.c 示例计算 pts 但是它根本不适用于音频。
这是我如何使用它:
while(frame_count < _streamDurationNBFrames-1){
uint8_t *frameToWrite =_frames.front();
// Compute current audio and video time. ///
if (audio_st){
audio_pts = (double)audioIn_st->pts.val * audioIn_st->time_base.num / audioIn_st->time_base.den;
}
else{
audio_pts = 0.0;
}
if (video_st){
video_pts = (double)video_st->pts.val * video_st->time_base.num / video_st->time_base.den;
}else{
video_pts = 0.0;
}
if ((!audio_st || audio_pts >= _streamDuration) && (!video_st || video_pts >= _streamDuration)){
break;
}
if (audio_st && audio_pts < video_pts) {
av_read_frame(informat, &pkt);//read audio from input stream
Msg::PrintMsg("Encode audio here...");
//================== AUDIO ENCODE HERE
outpkt.data = pkt.data;
outpkt.size = pkt.size;
outpkt.stream_index = pkt.stream_index;
outpkt.flags |= AV_PKT_FLAG_KEY;
outpkt.pts = pkt.pts;
outpkt.dts =pkt.dts;
if(av_interleaved_write_frame(oc, &outpkt) < 0)
{
Msg::PrintErrorMsg("Fail Audio Write ");
}
else
{
audio_st->codec->frame_number++;
}
av_free_packet(&outpkt);
av_free_packet(&pkt);
}else{
//================== VIDEO ENCODE HERE
write_video_frame(oc, video_st,frameToWrite);
frame->pts += av_rescale_q(1, video_st->codec->time_base, video_st->time_base);
}
///at last delete this frame:
_frames.pop();
delete frameToWrite; ///deallocate the written frame!
}
不知何故,一旦我进入音频编码循环,audio_pts 就永远不会到达 video_pts 并且始终为零:
audio_pts = (double)audio_st->pts.val * audio_st->time_base.num / audio_st->time_base.den; is always zero because (double)audio_st->pts.val returns zero.
所以基本上我又问了同样的问题:当音频来自外部文件时如何进行混音?
顺便说一句,下面的答案没有帮助,因为它假定音频和视频流都来自同一个文件,而在我的情况下,只有音频来自外部源。