ffmpeg h264和aac封装为mp4文件

最新推荐文章于 2025-03-20 15:49:32 发布

zhou jiabo

最新推荐文章于 2025-03-20 15:49:32 发布

阅读量4k

点赞数 6

分类专栏：音视频开发文章标签： muxer mp4 h.264 aac

本文链接：https://blog.csdn.net/weixin_43796767/article/details/117093781

版权

音视频开发专栏收录该内容

45 篇文章

订阅专栏

本文详细介绍了视频编码中的I帧、P帧和B帧概念，以及它们在编解码过程中的作用。同时，阐述了DTS和PTS在视频时间戳中的差异，特别是在存在B帧时的处理。接着，通过FFmpeg展示了如何将H264和AAC码流复用为MP4文件的过程，并分析了可能存在的时间基转换问题及其优化方法。最后，讨论了优化后的时间基转换对文件时间和比特率的影响。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

1、I帧/P帧/B帧

I帧：帧内编码图像帧，也叫关键帧，包含一幅完整的图像信息，不含运动矢量，在解码时不需要参考其它帧图像。在闭合式GOP（画面组）中，每个GOP的开始是IDR帧，且当前GOP的数据不会参考前后GOP的数据。

在编解码中，为了方便将首个I帧（IDR，即时解码器刷新）和其它I帧区别开来，这样就能方便控制编码和解码流程。

IDR帧的作用是立刻刷新，使错误不至于传播，从IDR帧开始重新算一个新的序列开始编码。IDR会导致DPB（参考帧列表）清空，在IDR帧之后的所有帧都不能引用IDR帧之前的帧的内容。

P帧：预测编码图像帧，是帧间编码帧的一种，利用之前的I帧或P帧进行预测编码。

B帧：双向预测编码图像帧，是帧间编码帧的一种，利用之前和之后的I帧或P帧，进行双向预测编码。

B帧不可以作为参考帧。B帧具有更高的压缩率，但需要更多的缓冲时间以及更高的CPU占用率。

因此B帧更适合本地存储以及视频点播，不适用于对实时性要求高的直播系统。

2、DTS和PTS

DTS（decoding time stamp，解码时间戳），PTS（presentation time stamp，显示时间戳）；音频中DTS和PTS是相同的，视频中如果存在B帧（双向预测编码帧），需要依赖其前面和后面的帧，因此含有B帧的解码和显示顺序不同的。

More：ffmpeg时间戳详解

H264码流与AAC码流复用合并为MP4文件

1、原料准备：

从mp4中抽取音频码流：ffmpeg -i 001.mp4 -acodec copy -vn 001.aac

从mp4中抽取视频码流：ffmpeg -i 001.mp4 -codec copy -bsf: h264_mp4toannexb -f h264 001.h264

2、封装代码：

#include <stdio.h>
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"

struct AVState {
    AVFormatContext *fmt_ctx;   // 格式上下文
    AVStream *stream;   // 码流
    int type;           // 码流类型
    int stream_index;   // 码流下标
};


int init_fmt_ctx(AVFormatContext **fmt_ctx, const char *file) {
    AVFormatContext *me = avformat_alloc_context();
    if (me == NULL) {
        printf("avformat_alloc failed.\n");
        return -1;
    } 
    if (avformat_open_input(&me, file, NULL, NULL) != 0) {
        printf("Couldn't open input stream.\n");
        return -1;
    }
    if (avformat_find_stream_info(me, NULL) < 0) {
        printf("Couldn't find stream information.\n");
        return -1;
    }
    //av_dump_format(me, 0, file, 0);
    *fmt_ctx = me;
    return 0;
}


int AVState_Init(struct AVState **state, int type, const char *file) {
    struct AVState *me = malloc(sizeof(*me));

    if (init_fmt_ctx(&me->fmt_ctx, file) < 0) {
        printf("failed to init audio_fmt_ctx\n");
        return -1;
    }

    int index = av_find_best_stream(me->fmt_ctx, type, -1, -1, NULL, 0);
    if (index < 0) {
        printf("failed to find stream_index\n");
        return -1;
    }
    me->stream_index = index; 
    
    me->type = type;
    me->stream = me->fmt_ctx->streams[me->stream_index];
    *state = me;
    return 0;
}

void AVState_Destroy(struct AVState *state) {
    if (state->fmt_ctx) {
        avformat_close_input(&state->fmt_ctx);
        avformat_free_context(state->fmt_ctx);
    }
    free(state);
}


void muxer(const char *mp4file, const char *h264file, const char *aacfile) {
    // 初始输入码流状态结构体
    struct AVState *audio, *video;
    if (AVState_Init(&audio, AVMEDIA_TYPE_AUDIO, aacfile) < 0) {
        goto _Error;
    }
    if (AVState_Init(&video, AVMEDIA_TYPE_VIDEO, h264file) < 0) {
        goto _Error;
    }

    // 初始化mp4的格式上下文
    AVFormatContext *fmt_ctx;
    avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, mp4file);
    if (fmt_ctx == NULL) {
        printf("failed to alloc output fmt_ctx\n");
        goto _Error;
    }

    // 设置输出流
    AVStream *out_stream_audio = avformat_new_stream(fmt_ctx, NULL);
    AVStream *out_stream_video = avformat_new_stream(fmt_ctx, NULL);
    avcodec_parameters_copy(out_stream_audio->codecpar, audio->stream->codecpar);
    avcodec_parameters_copy(out_stream_video->codecpar, video->stream->codecpar);
    int out_audio_index = out_stream_audio->index;
    int out_video_index = out_stream_video->index;
    printf("out_audio_index:%d out_video_index:%d\n", out_audio_index, out_video_index);
    av_dump_format(fmt_ctx, 0, mp4file, 1);

    
    // 打开输出文件io
    if (! (fmt_ctx->oformat->flags & AVFMT_NOFILE)) {
        if (avio_open(&fmt_ctx->pb, mp4file, AVIO_FLAG_WRITE) < 0) {
            printf("failed to open output file\n");
            goto _Error;
        }
    }

    // 写文件头
    if (avformat_write_header(fmt_ctx, NULL) < 0) {
        printf("failed to write header\n");
        goto _Error;
    }
    AVPacket *packet = av_packet_alloc();

    // 音频编码数据和视频编码数据合并到MP4文件
    int frame_index = 0;
    int64_t cur_video_pts = 0, cur_audio_pts = 0;
    while (1) {
        struct AVState *inputstate = NULL;  
        AVStream *out_stream = NULL;
        int out_index = -1;
        
        //比较时间戳，判断当前应该写什么帧
        if (av_compare_ts(cur_video_pts, video->stream->time_base, \
                    cur_audio_pts, audio->stream->time_base) < 0) {
            inputstate = video;
            out_index = out_video_index;
        } else {
            inputstate = audio;
            out_index = out_audio_index;
        }
        out_stream = fmt_ctx->streams[out_index];
        
        // 从输入流读取编码数据
        if (av_read_frame(inputstate->fmt_ctx, packet) < 0) {
            break;
        }
       
        // 如果该帧没有pts，需要补上
        if (packet->pts == AV_NOPTS_VALUE) {
            AVRational timebase = inputstate->stream->time_base;
            // 两帧之间的持续时间
            int64_t calc_duration = (double)AV_TIME_BASE / av_q2d(inputstate->stream->r_frame_rate);
            
            packet->pts = (double)(frame_index * calc_duration) / (double)(av_q2d(timebase)*AV_TIME_BASE);
            packet->dts = packet->pts;
            packet->duration = (double)calc_duration / (double)(av_q2d(timebase)*AV_TIME_BASE);
            frame_index++;
        } 

        // 记录pts
        if (out_index == out_video_index) cur_video_pts = packet->pts;
        else cur_audio_pts = packet->pts;

        // 更新PTS/DTS
        packet->pts = av_rescale_q_rnd(packet->pts, inputstate->stream->time_base, \
                out_stream->time_base, (AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
        packet->dts = av_rescale_q_rnd(packet->dts, inputstate->stream->time_base, \
                out_stream->time_base, (AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
        packet->duration = av_rescale_q(packet->dts, inputstate->stream->time_base, out_stream->time_base);
        packet->pos = -1;
        packet->stream_index = out_index;

        // 写文件
        if (av_interleaved_write_frame(fmt_ctx, packet) < 0) {
            printf("failed to muxing packet\n");
            break;
        } 

        av_packet_unref(packet);
    }

    // 写文件尾
    av_write_trailer(fmt_ctx);

_Error:
    if (audio) AVState_Destroy(audio);
    if (video) AVState_Destroy(video);
    if (fmt_ctx->pb) avio_close(fmt_ctx->pb);
    if (fmt_ctx) avformat_free_context(fmt_ctx);
    if (packet)  av_packet_free(&packet);
}


int main(int argc, char const* argv[])
{
    muxer(argv[1], argv[2], argv[3]);
    return 0;
}

3、测试结果：

问题：文件大小相同，但在时间和比特率上有些许差异。

原来的testvideo/test.mp4文件：

得到的out.mp4文件

4、优化时间基转换过程

packet->pts = av_rescale_q_rnd(packet->pts, inputstate->stream->time_base, \
                out_stream->time_base, (AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
packet->dts = av_rescale_q_rnd(packet->dts, inputstate->stream->time_base, \
                out_stream->time_base, (AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
packet->duration = av_rescale_q(packet->dts, inputstate->stream->time_base, out_stream->time_base);

替换为

// 将 packet 中的各时间值从输入流封装格式时间基转换到输出流封装格式时间基
av_packet_rescale_ts(packet, inputstate->stream->time_base, out_stream->time_base);

优化效果：