004 ffmpeg_VideoEncodingMuxing

最新推荐文章于 2023-03-17 20:17:47 发布

yi巴

最新推荐文章于 2023-03-17 20:17:47 发布

阅读量350

点赞数

分类专栏： ffmpeg 文章标签： ffmpeg

本文链接：https://blog.csdn.net/jiandanjiuhao_88/article/details/79387929

版权

ffmpeg 专栏收录该内容

21 篇文章 2 订阅

订阅专栏

这个demo的功能是：构造音频数据，且和一个yuv文件合成一个flv文件。不一定是flv文件，根据后缀名来判断容器格式。这份代码很多是从ffmpeg的例子中拷贝过来的。

还是先上两个结构体：

typedef struct _IOParam
{
    const char *input_file_name;    //输入的像素文件名
    const char *output_file_name;   //输出的封装视频文件名
    int frame_width;                //视频帧宽度
    int frame_height;               //视频帧高度
} IOParam;

typedef struct OutputStream 
{
    AVStream *st;
    int64_t next_pts;
    AVFrame *frame;
    AVFrame *tmp_frame;

    int samples_count;
    float t, tincr, tincr2;             //音频相关
    struct SwrContext *swr_ctx;
} OutputStream;

这两个是自己定义的结构体，IOParam接受命令参数，音视频流各自对应一个Outputstream。从main函数入手，把流程搞清楚。main函数还是比较简单。

int main(int argc, char **argv)
{
    AVDictionary *opt = NULL;
    IOParam io = {NULL};  
    if (!hello(argc, argv, opt, io))
    {
        return 1;
    }

接受命令参数，opt可以不用考虑。

    AVOutputFormat *fmt;
    AVFormatContext *oc;
    Open_coder_muxer(&fmt, &oc, io.output_file_name);

注册解复用器和编解码器，初始化解复用上下文。Open_coder_muxer相当于初始化的第一步。

    int ret;
    OutputStream video_st = { 0 }, audio_st = { 0 };
    AVCodec *audio_codec = NULL, *video_codec = NULL;
    ret = Add_audio_video_streams(&video_st, &audio_st, oc, fmt, audio_codec, video_codec, io);

初始化编码器，并和audio和vedio streams关联起来。

    Open_video(oc, video_codec, &video_st, opt, io);
    Open_audio(oc, audio_codec, &audio_st, opt);

打开音视频流。Open_video和Open_audio重点分析。

    if (!(fmt->flags & AVFMT_NOFILE))
    {
        ret = avio_open(&oc->pb, io.output_file_name, AVIO_FLAG_WRITE);
        if (ret < 0)
        {
            fprintf(stderr, "Could not open '%s': %d\n", io.output_file_name, ret);
            return 1;
        }
    }

    /* Write the stream header, if any. */
    ret = avformat_write_header(oc, &opt);
    if (ret < 0)
    {
        fprintf(stderr, "Error occurred when opening output file: %d\n",ret);
        return 1;
    }

打开输出文件，并写入header。

    int videoFrameIdx = 0, audioFrameIdx = 0;
    encode_video = 1;
    encode_audio = 1;
    while (encode_video || encode_audio) 
    {
        /* select the stream to encode */
        if (encode_video &&
                (!encode_audio || av_compare_ts(video_st.next_pts, video_st.st->codec->time_base, audio_st.next_pts, audio_st.st->codec->time_base) <= 0))
        {
            encode_video = !Write_video_frame(oc, &video_st);
            if (encode_video)
            {
                printf("Write %d video frame.\n", videoFrameIdx++);
            }
            else
            {
                printf("Video ended, exit.\n");
            }
        }
        else 
        {
            encode_audio = !Write_audio_frame(oc, &audio_st);
            if (encode_audio)
            {
                printf("Write %d audio frame.\n", audioFrameIdx++);
            }
            else
            {
                printf("Audio ended, exit.\n");
            }
        }
    }

输出音视频frame。调用了av_compare_ts来保证了音视频交叉输出。Write_video_frame和Write_audio_frame得重点分析。

    //写入文件尾数据
    av_write_trailer(oc);

    /* Close each codec. */
    Close_stream(oc, &video_st);
    Close_stream(oc, &audio_st);


    if (!(fmt->flags & AVFMT_NOFILE))
    {
        //关闭输出文件
        avio_closep(&oc->pb);
    }

    //关闭输出文件的上下文句柄
    avformat_free_context(oc);

    printf("Procssing succeeded.\n");
    return 0;
}

结束动作。

下面分析具体逻辑，初始化就是两个函数：Open_coder_muxer和 Add_audio_video_streams。

int Open_coder_muxer(AVOutputFormat **fmt, AVFormatContext **oc, const char *filename)
{
    /* Initialize libavcodec, and register all codecs and formats. */
    av_register_all();

    /* allocate the output media context */
    avformat_alloc_output_context2(oc, NULL, NULL, filename);

    *fmt = (*oc)->oformat;
    return 0;
}

av_register_all 是使用ffmpeg的第一步。第二步就是初始化复用的context。avformat_alloc_output_context2 根据输出文件的后缀名对复用的context进行了初始化。只要在结构体名中含有format，就说明ffmpeg在复用和解复用，正如上面的 AVOutputFormat 和 AVFormatContext，*fmt = (*oc)->oformat;代表着两个的关系。(*fmt)->video_codec和(*fmt)->audio_codec是音视频的编解码器id，在avformat_alloc_output_context2函数中已经初始化，flv文件会有默认的编码器。avi文件应该也会有默认值吧。

进入初始化第二步，复用功能需要和stream结合，包含video和audio，stream又得和编解码器结合起来。（stream叫什么呢？叫流，好像不太合适。直接写英文吧。ffmpeg中的有一些转悠的名词。）

int Add_audio_video_streams(OutputStream *video_st, OutputStream *audio_st, 
    AVFormatContext *oc, AVOutputFormat *fmt, 
    AVCodec *audio_codec, AVCodec *video_codec, 
    IOParam &io)
{
    int ret = 0;
    if (fmt->video_codec != AV_CODEC_ID_NONE)
    {
        add_stream(video_st, oc, &video_codec, fmt->video_codec);
        video_st->st->codec->width = io.frame_width;
        video_st->st->codec->height = io.frame_height;
        ret |= HAVE_VIDEO;
        ret |= ENCODE_VIDEO;
    }
    if (fmt->audio_codec != AV_CODEC_ID_NONE)
    {
        add_stream(audio_st, oc, &audio_codec, fmt->audio_codec);
        ret |= HAVE_AUDIO;
        ret |= ENCODE_AUDIO;
    }

    return ret;
}

fmt->video_codec 和 fmt->audio_codec 在第一步的初始化中已经完成，在这一步中就需要找到编码器啦。具体逻辑在add_stream函数中，对编码器进行初始化。

static void add_stream(OutputStream *ost, AVFormatContext *oc,  AVCodec **codec, enum AVCodecID codec_id)
{
    AVCodecContext *c;
    int i;

    /* find the encoder */
    *codec = avcodec_find_encoder(codec_id);

    ost->st = avformat_new_stream(oc, *codec);
    ost->st->id = oc->nb_streams - 1;
    c = ost->st->codec;

    switch ((*codec)->type)
    {
    case AVMEDIA_TYPE_AUDIO:
        ...
        break;

    case AVMEDIA_TYPE_VIDEO:
        c->codec_id = codec_id;

        c->bit_rate = 400000;
        /* Resolution must be a multiple of two. */
        c->width = 480;
        c->height = 272;
        /* timebase: This is the fundamental unit of time (in seconds) in terms
        * of which frame timestamps are represented. For fixed-fps content,
        * timebase should be 1/framerate and timestamp increments should be
        * identical to 1. */
        {
            AVRational r = { 1, STREAM_FRAME_RATE };
            ost->st->time_base = r;
        }
        c->time_base = ost->st->time_base;

        c->gop_size = 250; /* emit one intra frame every twelve frames at most */
        c->pix_fmt = AV_PIX_FMT_YUV420P;
        ...
        break;

    default:
        break;
    }

    /* Some formats want stream headers to be separate. */
    if (oc->oformat->flags & AVFMT_GLOBALHEADER)
        c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}

avcodec_find_encoder根据codec_id找到了encoder，之后根据encoder和AVFormatContext创建stream，在stream中有一个AVCodecContext。得记住这几个的关系。后面就是设置编码器AVCodecContext的一些参数。至于这些参数代表啥意思？我也只能大概了解啦。

初始化后，还有open。三个open：Open_video，Open_audio和avio_open。avio_open是打开输出文件句柄，ffmpeg的函数。我们先关注video，音频在后面。

void Open_video(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg, IOParam &io)
{
    int ret;
    AVCodecContext *c = ost->st->codec;
    AVDictionary *opt = NULL;
    ret = avcodec_open2(c, codec, &opt);

    /* allocate and init a re-usable frame */
    ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);

    g_inputYUVFile = fopen(io.input_file_name, "rb+");
}

编码器初始化后需要open，真是这个函数avcodec_open2，参数是编码器和编码器context。ost->frame是保存文件中读取yuv数据，和c->pix_fmt, c->width, c->height相关。之后打开文件。初始化完成后，开始读取一个一个数据，编码后开发复用成文件。也就是Write_video_frame这个文件。

int Write_video_frame(AVFormatContext *oc, OutputStream *ost)
{   
    AVFrame *frame = get_video_frame(ost);

    int got_packet = 0;
    AVPacket pkt = { 0 };
    av_init_packet(&pkt);
    AVCodecContext *c = ost->st->codec;
    int ret = avcodec_encode_video2(c, &pkt, frame, &got_packet);

    if (got_packet)
    {
        ret = write_frame(oc, &c->time_base, ost->st, &pkt);
    }


    return (frame || got_packet) ? 0 : 1;
}

get_video_frame从yuv文件获取到一帧数据，通过avcodec_encode_video2进行编码，pkt数据保存了编码后的数据，后在write_frame的把pkt写入到文件中。如何获取数据的呢？

static AVFrame *get_video_frame(OutputStream *ost)
{
    AVCodecContext *c = ost->st->codec;

    /* check if we want to generate more frames */
    {
        AVRational r = { 1, 1 };
        if (av_compare_ts(ost->next_pts, ost->st->codec->time_base, STREAM_DURATION, r) >= 0)
        {
            return NULL;
        }
    }

    if(fill_yuv_image(ost->frame, ost->next_pts, c->width, c->height) <0 ){
        return NULL;
    }

    ost->frame->pts = ost->next_pts++;

    return ost->frame;
}

ost->next_pts是读取文件yuv帧的个数。av_compare_ts是一个时间刻度的大小比对。

av_compare_ts(ost->next_pts, ost->st->codec->time_base, STREAM_DURATION, r) >= 0

ost->next_pts*ost->st->codec->time_base >= STREAM_DURATION*r
ost->st->codec->time_base = 1/25
STREAM_DURATION  = 1
r = 1/1

则表示：ost->next_pts最大为250

读取yuv帧在fill_yuv_image函数中实现。来看一看yuv文件的格式。

static int fill_yuv_image(AVFrame *pict, int frame_index, int width, int height)
{
    int x, y, ret;

    /* when we pass a frame to the encoder, it may keep a reference to it
    * internally;
    * make sure we do not overwrite it here
    */
    ret = av_frame_make_writable(pict);
    if (ret < 0)
    {
        exit(1);
    }

    /* Y */
    for (y = 0; y < height; y++)
    {
        ret = fread(&pict->data[0][y * pict->linesize[0]], 1, width, g_inputYUVFile);
        if (ret != width)
        {
            exit(1);
        }
    }

    /* U */
    for (y = 0; y < height / 2; y++) 
    {
        ret = fread(&pict->data[1][y * pict->linesize[1]], 1, width / 2, g_inputYUVFile);
        if (ret != width / 2)
        {
            exit(1);
        }
    }

    /* V */
    for (y = 0; y < height / 2; y++) 
    {
        ret = fread(&pict->data[2][y * pict->linesize[2]], 1, width / 2, g_inputYUVFile);
        if (ret != width / 2)
        {
            exit(1);
        }
    }

    return 0;
}

pict 在之前已经调用alloc_picture已经初始化了。av_frame_make_writable是确定可写。读YUV看起来很简单，但是没有理解，那就先记住吧。好，读取了一个yuv帧，该编码啦。调用的是：avcodec_encode_video2，这个函数以前就遇到过。如果编码成功，则got_packet为1,数据存在pkt中。调用write_frame写文件。

static int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt)
{
    /* rescale output packet timestamp values from codec to stream timebase */
    av_packet_rescale_ts(pkt, *time_base, st->time_base);
    pkt->stream_index = st->index;

    /* Write the compressed frame to the media file. */
    //  log_packet(fmt_ctx, pkt);
    return av_interleaved_write_frame(fmt_ctx, pkt);
}

av_packet_rescale_ts是pkt的时间戳刻度从*time_base变成st->time_base，这个demo中这两个是相同的。通过av_interleaved_write_frame写入数据，第一个参数是之前打开并写入文件头的文件句柄，第二个参数是写入文件的packet。视频这一块就完成了。

音频的不同主要在于open_audio和Write_audio_frame。音频这一块还比较复杂，涉及的比较少。我先放一放吧。