FFmpeg零基础学习(四)——CPU视频解码重新编码

CoderForge

已于 2024-01-28 17:51:34 修改

阅读量1.3k

点赞数 25

分类专栏： FFmpeg 文章标签： FFmpeg

于 2023-12-16 10:09:12 首次发布

本文链接：https://blog.csdn.net/qq_43211060/article/details/135029300

版权

FFmpeg 专栏收录该内容

8 篇文章 2 订阅

订阅专栏

前言

本篇文章的功能在于对视频进行解码，然后重新编码。只不过，本篇文章并没有对获得的解码数据进行任何的操作，后续会有文章，针对这方面的内容，进行完善。

正文

一、核心代码

code

	int ret = 0;
    int err;
    //打开输入文件
    char filename[] = "juren-30s.mp4";
    //动态分配和初始化一个空的AVFormatContext对象，为后续的音视频封装和解封装操作做准备。
    AVFormatContext *fmt_ctx = avformat_alloc_context();

    if (!fmt_ctx)
    {
        qDebug() << "fmt_ctx error code:"<<AVERROR(ENOMEM);
        return;
    }

    //打开文件
    if((err = avformat_open_input(&fmt_ctx, filename,NULL,NULL)) < 0)
    {
        printf("can not open file %d \n",err);
        return;
    }


    //分配并初始化一个 AVCodecContext 结构体，该结构体用于编解码器的相关信息和参数设置。
    AVCodecContext *avctx = avcodec_alloc_context3(NULL);

    //用于将AVCodecParameters结构体的值赋值给AVCodecContext结构体的对应字段
    //参数值的赋值涉及到了编解码器的基本配置，例如编码类型、码率、帧率等等。这样可以方便地将参数信息传递给编解码器上下文，以供后续的编解码操作使用。
    ret = avcodec_parameters_to_context(avctx, fmt_ctx->streams[0]->codecpar);

    if (ret < 0)
    {
        qDebug() << "avcodec_parameters_to_context error code:"<<ret;
        return;
    }

    //查找解码器
    AVCodec *codec = avcodec_find_decoder(avctx->codec_id);

    //打开解码器
    if ((ret = avcodec_open2(avctx, codec, NULL)) < 0)
    {
        qDebug() << "avcodec_open2 error code:"<<ret;
        return;
    }

    //打开输出文件容器
    char filename_out[] = "juren-30s-5.mp4";
    AVFormatContext *fmt_ctx_out = NULL;
    //该函数会尝试通过指定的输出格式来分配一个输出格式上下文
    err = avformat_alloc_output_context2(&fmt_ctx_out, NULL, NULL, filename_out);

    if (!fmt_ctx_out)
    {
        qDebug() << "error code:"<<AVERROR(ENOMEM);
        return;
    }

    //添加一路流到容器上下文
    AVStream *st = avformat_new_stream(fmt_ctx_out, NULL);
    st->time_base = fmt_ctx->streams[0]->time_base;

    //分配帧和包资源
    AVCodecContext *enc_ctx = NULL;
    AVPacket *pkt = av_packet_alloc();
    AVFrame *frame = av_frame_alloc();
    AVPacket *pkt_out = av_packet_alloc();

    int frame_num = 0;
    int read_end = 0;

    while(true)
    {
        if(1 == read_end )
        {
            break;
        }

        ret = av_read_frame(fmt_ctx, pkt);

        //跳过不处理音频包
        if(1 == pkt->stream_index)
        {
            av_packet_unref(pkt);
            continue;
        }

        //读取到文件的结尾了
        if (AVERROR_EOF == ret)
        {
            //读取完文件，这时候 pkt 的 data 跟 size 应该是 null
            avcodec_send_packet(avctx, NULL);
        }
        else
        {
            if( 0 != ret)
            {
                qDebug() << "av_read_frame error code:"<<ret;
                return;
            }
            else
            {
                retry:
                if (avcodec_send_packet(avctx, pkt) == AVERROR(EAGAIN))
                {
                    qDebug() << "Receive_frame and send_packet both returned EAGAIN, which is an API violation";
                    //这里可以考虑休眠 0.1 秒，返回 EAGAIN 通常是 ffmpeg 的内部 api 有bug
                    goto retry;
                }

                //释放 pkt 里面的编码数据
                av_packet_unref(pkt);
            }
        }

        //循环不断从解码器读数据，直到没有数据可读。
        while(true)
        {
            //读取 AVFrame
            ret = avcodec_receive_frame(avctx, frame);
            /* 释放 frame 里面的YUV数据，
             * 由于 avcodec_receive_frame 函数里面会调用 av_frame_unref，所以下面的代码可以注释。
             * 所以我们不需要 手动 unref 这个 AVFrame
             * */
            //av_frame_unref(frame);

            if(AVERROR(EAGAIN) == ret)
            {
                //提示 EAGAIN 代表 解码器 需要 更多的 AVPacket
                //跳出 第一层 for，让 解码器拿到更多的 AVPacket
                break;
            }
            else if( AVERROR_EOF == ret )
            {
                /* 提示 AVERROR_EOF 代表之前已经往 解码器发送了一个 data 跟 size 都是 NULL 的 AVPacket
                 * 发送 NULL 的 AVPacket 是提示解码器把所有的缓存帧全都刷出来。
                 * 通常只有在 读完输入文件才会发送 NULL 的 AVPacket，或者需要用现有的解码器解码另一个的视频流才会这么干。
                 *
                 * */

                /* 往编码器发送 null 的 AVFrame，让编码器把剩下的数据刷出来。*/
                ret = avcodec_send_frame(enc_ctx, NULL);
                for(;;)
                {
                    ret = avcodec_receive_packet(enc_ctx, pkt_out);
                    //这里不可能返回 EAGAIN，如果有直接退出。
                    if (ret == AVERROR(EAGAIN)){
                        printf("avcodec_receive_packet error code %d \n",ret);
                        return;
                    }
                    if ( AVERROR_EOF == ret ){
                        break;
                    }
                    //编码出 AVPacket ，先打印一些信息，然后把它写入文件。
                    printf("pkt_out size : %d \n",pkt_out->size);
                    //设置 AVPacket 的 stream_index ，这样才知道是哪个流的。
                    pkt_out->stream_index = st->index;
                    //转换 AVPacket 的时间基为 输出流的时间基。
                    pkt_out->pts = av_rescale_q_rnd(pkt_out->pts, fmt_ctx->streams[0]->time_base, st->time_base, static_cast<AVRounding>(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
                    pkt_out->dts = av_rescale_q_rnd(pkt_out->dts, fmt_ctx->streams[0]->time_base, st->time_base, static_cast<AVRounding>(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
                    pkt_out->duration = av_rescale_q_rnd(pkt_out->duration, fmt_ctx->streams[0]->time_base, st->time_base, static_cast<AVRounding>(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));


                    ret = av_interleaved_write_frame(fmt_ctx_out, pkt_out);
                    if (ret < 0) {
                        printf("av_interleaved_write_frame faile %d \n",ret);
                        return;
                    }
                    av_packet_unref(pkt_out);
                }
                av_write_trailer(fmt_ctx_out);
                //跳出 第二层 for，文件已经解码完毕。
                read_end = 1;
                break;
            }
            else if(ret >= 0)
            {
                //只有解码出来一个帧，才可以开始初始化编码器。

                if(NULL == enc_ctx)
                {
                    //打开编码器，并且设置 编码信息。
                    AVCodec *encode = avcodec_find_encoder(AV_CODEC_ID_H264);
                    enc_ctx = avcodec_alloc_context3(encode);
                    enc_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
                    enc_ctx->bit_rate = 400000;
                    enc_ctx->framerate = avctx->framerate;
                    enc_ctx->gop_size = 30;
                    enc_ctx->max_b_frames = 10;
                    enc_ctx->profile = FF_PROFILE_H264_MAIN;
                    /*
                     * 其实下面这些信息在容器那里也有，也可以一开始直接在容器那里打开编码器
                     * 我从 AVFrame 里拿这些编码器参数是因为，容器的信息不一样就是最终的信息。
                     * 因为你解码出来的 AVFrame 可能会经过 filter 滤镜，经过滤镜之后信息就会变化，但是本文没有使用滤镜。
                     */
                    //编码器的时间基要取 AVFrame 的时间基，因为 AVFrame 是输入。
                    enc_ctx->time_base = fmt_ctx->streams[0]->time_base;
                    enc_ctx->width = fmt_ctx->streams[0]->codecpar->width;
                    enc_ctx->height = fmt_ctx->streams[0]->codecpar->height;
                    enc_ctx->sample_aspect_ratio = st->sample_aspect_ratio = frame->sample_aspect_ratio;
                    enc_ctx->pix_fmt = static_cast<AVPixelFormat>(frame->format);
                    enc_ctx->color_range            = frame->color_range;
                    enc_ctx->color_primaries        = frame->color_primaries;
                    enc_ctx->color_trc              = frame->color_trc;
                    enc_ctx->colorspace             = frame->colorspace;
                    enc_ctx->chroma_sample_location = frame->chroma_location;

                    /* 注意，这个 field_order 不同的视频的值是不一样的，这里我写死了。
                     * 因为 本文的视频就是 AV_FIELD_PROGRESSIVE
                     * 生产环境要对不同的视频做处理的
                     */
                    enc_ctx->field_order = AV_FIELD_PROGRESSIVE;

                    /* 现在我们需要把 编码器参数复制给流，解码的时候是 从流赋值参数给解码器。
                     * 现在要反着来。
                     * */
                    ret = avcodec_parameters_from_context(st->codecpar,enc_ctx);
                    if (ret < 0)
                    {
                        qDebug() << "avcodec_parameters_from_context codec faile:" << ret;
                        return;
                    }

                    //它用于初始化和打开音频或视频编解码器的上下文。
                    if ((ret = avcodec_open2(enc_ctx, encode, NULL)) < 0) {
                        qDebug() << "avcodec_open2 codec faile:" << ret;
                        return ;
                    }

                    //正式打开输出文件
                    if ((ret = avio_open2(&fmt_ctx_out->pb, filename_out, AVIO_FLAG_WRITE,&fmt_ctx_out->interrupt_callback,NULL)) < 0)
                    {
                        qDebug() << "avio_open2 codec faile:" << ret;
                        return;
                    }

                    //要先写入文件头部。
                    ret = avformat_write_header(fmt_ctx_out,NULL);
                    if (ret < 0)
                    {
                        qDebug() << "avformat_write_header codec faile:" << ret;
                        return;
                    }
                }

                //往编码器发送 AVFrame，然后不断读取 AVPacket
                ret = avcodec_send_frame(enc_ctx, frame);
                if (ret < 0)
                {
                    qDebug() <<  "avcodec_send_frame fail:"<<ret;
                    return;
                }

                for(;;)
                {
                    ret = avcodec_receive_packet(enc_ctx, pkt_out);
                    if (ret == AVERROR(EAGAIN))
                    {
                        break;
                    }
                    if (ret < 0)
                    {
                        qDebug() <<  "avcodec_receive_packet fail:"<<ret;
                        return;
                    }
                    //编码出 AVPacket ，先打印一些信息，然后把它写入文件。
                    qDebug() << "pkt_out size:"<<pkt_out->size;

                    //设置 AVPacket 的 stream_index ，这样才知道是哪个流的。
                    pkt_out->stream_index = st->index;
                    //转换 AVPacket 的时间基为 输出流的时间基。
                    pkt_out->pts = av_rescale_q_rnd(pkt_out->pts, fmt_ctx->streams[0]->time_base, st->time_base, static_cast<AVRounding>(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
                    pkt_out->dts = av_rescale_q_rnd(pkt_out->dts, fmt_ctx->streams[0]->time_base, st->time_base, static_cast<AVRounding>(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
                    pkt_out->duration = av_rescale_q_rnd(pkt_out->duration, fmt_ctx->streams[0]->time_base, st->time_base, static_cast<AVRounding>(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));

                    ret = av_interleaved_write_frame(fmt_ctx_out, pkt_out);
                    if (ret < 0) {
                        qDebug() <<  "av_interleaved_write_frame fail:"<<ret;
                        return;
                    }
                    av_packet_unref(pkt_out);
                }
            }
            else
            {
                qDebug() <<"other fail \n";
                return;
            }
        }
    }
    av_frame_free(&frame);
    av_packet_free(&pkt);
    av_packet_free(&pkt_out);

    //关闭编码器，解码器。
    avcodec_close(avctx);
    avcodec_close(enc_ctx);

    //释放容器内存。
    avformat_free_context(fmt_ctx);

    //必须调 avio_closep ，要不可能会没把数据写进去，会是 0kb
    avio_closep(&fmt_ctx_out->pb);
    avformat_free_context(fmt_ctx_out);
    qDebug() <<"done \n";

二、重要的API

1、avformat_alloc_context

avformat_alloc_context是FFmpeg库中的一个函数，用于动态分配并初始化一个AVFormatContext结构体。它的函数原型如下：

/**
 * Allocate an AVFormatContext.
 * avformat_free_context() can be used to free the context and everything
 * allocated by the framework within it.
 */
AVFormatContext *avformat_alloc_context(void);

该函数会分配一块内存，并将其初始化为一个空的AVFormatContext结构体，然后返回指向该结构体的指针。

使用avformat_alloc_context函数可以创建一个空的AVFormatContext对象，然后可以通过设置不同的字段和参数来配置它，以便进行音视频封装或解封装操作。
总结来说，avformat_alloc_context函数用于动态分配和初始化一个空的AVFormatContext对象，为后续的音视频封装和解封装操作做准备。
所以，分配后，可以对AVFormatContext 对象进行判空，防止初始化失败。

2、avformat_open_input

avformat_open_input是FFmpeg库中的一个函数，用于打开音视频输入文件并初始化相关的输入上下文（AVFormatContext）。它的函数原型如下：

/**
 * Open an input stream and read the header. The codecs are not opened.
 * The stream must be closed with avformat_close_input().
 *
 * @param ps       Pointer to user-supplied AVFormatContext (allocated by
 *                 avformat_alloc_context). May be a pointer to NULL, in
 *                 which case an AVFormatContext is allocated by this
 *                 function and written into ps.
 *                 Note that a user-supplied AVFormatContext will be freed
 *                 on failure.
 *> 传入值为avformat_alloc_context 分配的对象
 * @param url      URL of the stream to open.
 *> 要打开的流的地址
 * @param fmt      If non-NULL, this parameter forces a specific input format.
 *                 Otherwise the format is autodetected.
 * >输入的文件的格式，若为NULL,则自动检测
 * @param options  A dictionary filled with AVFormatContext and demuxer-private
 *                 options.
 *                 On return this parameter will be destroyed and replaced with
 *                 a dict containing options that were not found. May be NULL.
 *
 * @return 0 on success, a negative AVERROR on failure.
 *> 返回值0，为正确，其他值为失败
 * @note If you want to use custom IO, preallocate the format context and set its pb field.
 */
int avformat_open_input(AVFormatContext **ps, const char *url,
                        const AVInputFormat *fmt, AVDictionary **options);

该函数的参数说明如下：
ps：指向指针的指针，用于存储分配的AVFormatContext对象。
url：输入文件的URL或文件名。
fmt：指定输入格式，如果为NULL，则由FFmpeg自动检测输入文件的格式。
options：指向包含附加选项的字典。可以在打开输入文件时提供一些特定的选项，比如设置超时时间、设置输入缓冲大小等。

1、强调关闭使用avformat_close_input。
2、函数返回一个整数值，表示操作的结果。如果返回值小于0，则表示打开输入文件失败，否则返回0表示操作成功。使用avformat_open_input函数可以打开一个音视频输入文件，并将其与一个AVFormatContext对象关联起来，以便后续的音视频解封装操作。
3、avformat_open_input函数用于打开音视频输入文件，并初始化相关的输入上下文。它是进行音视频解封装操作的起点之一。

源码如下：

int avformat_open_input(AVFormatContext **ps, const char *filename,
                        ff_const59 AVInputFormat *fmt, AVDictionary **options)
{
    AVFormatContext *s = *ps;
    int i, ret = 0;
    AVDictionary *tmp = NULL;
    ID3v2ExtraMeta *id3v2_extra_meta = NULL;

    if (!s && !(s = avformat_alloc_context()))
        return AVERROR(ENOMEM);
    if (!s->av_class) {
        av_log(NULL, AV_LOG_ERROR, "Input context has not been properly allocated by avformat_alloc_context() and is not NULL either\n");
        return AVERROR(EINVAL);
    }
    if (fmt)
        s->iformat = fmt;

    if (options)
        av_dict_copy(&tmp, *options, 0);

    if (s->pb) // must be before any goto fail
        s->flags |= AVFMT_FLAG_CUSTOM_IO;

    if ((ret = av_opt_set_dict(s, &tmp)) < 0)
        goto fail;

    if (!(s->url = av_strdup(filename ? filename : ""))) {
        ret = AVERROR(ENOMEM);
        goto fail;
    }

#if FF_API_FORMAT_FILENAME
FF_DISABLE_DEPRECATION_WARNINGS
    av_strlcpy(s->filename, filename ? filename : "", sizeof(s->filename));
FF_ENABLE_DEPRECATION_WARNINGS
#endif
    if ((ret = init_input(s, filename, &tmp)) < 0)
        goto fail;
    s->probe_score = ret;

    if (!s->protocol_whitelist && s->pb && s->pb->protocol_whitelist) {
        s->protocol_whitelist = av_strdup(s->pb->protocol_whitelist);
        if (!s->protocol_whitelist) {
            ret = AVERROR(ENOMEM);
            goto fail;
        }
    }

    if (!s->protocol_blacklist && s->pb && s->pb->protocol_blacklist) {
        s->protocol_blacklist = av_strdup(s->pb->protocol_blacklist);
        if (!s->protocol_blacklist) {
            ret = AVERROR(ENOMEM);
            goto fail;
        }
    }

    if (s->format_whitelist && av_match_list(s->iformat->name, s->format_whitelist, ',') <= 0) {
        av_log(s, AV_LOG_ERROR, "Format not on whitelist \'%s\'\n", s->format_whitelist);
        ret = AVERROR(EINVAL);
        goto fail;
    }

    avio_skip(s->pb, s->skip_initial_bytes);

    /* Check filename in case an image number is expected. */
    if (s->iformat->flags & AVFMT_NEEDNUMBER) {
        if (!av_filename_number_test(filename)) {
            ret = AVERROR(EINVAL);
            goto fail;
        }
    }

    s->duration = s->start_time = AV_NOPTS_VALUE;

    /* Allocate private data. */
    if (s->iformat->priv_data_size > 0) {
        if (!(s->priv_data = av_mallocz(s->iformat->priv_data_size))) {
            ret = AVERROR(ENOMEM);
            goto fail;
        }
        if (s->iformat->priv_class) {
            *(const AVClass **) s->priv_data = s->iformat->priv_class;
            av_opt_set_defaults(s->priv_data);
            if ((ret = av_opt_set_dict(s->priv_data, &tmp)) < 0)
                goto fail;
        }
    }

    /* e.g. AVFMT_NOFILE formats will not have a AVIOContext */
    if (s->pb)
        ff_id3v2_read_dict(s->pb, &s->internal->id3v2_meta, ID3v2_DEFAULT_MAGIC, &id3v2_extra_meta);

#if FF_API_DEMUXER_OPEN
    if (!(s->flags&AVFMT_FLAG_PRIV_OPT) && s->iformat->read_header)
#else
    if (s->iformat->read_header)
#endif
        if ((ret = s->iformat->read_header(s)) < 0)
            goto fail;

    if (!s->metadata) {
        s->metadata = s->internal->id3v2_meta;
        s->internal->id3v2_meta = NULL;
    } else if (s->internal->id3v2_meta) {
        av_log(s, AV_LOG_WARNING, "Discarding ID3 tags because more suitable tags were found.\n");
        av_dict_free(&s->internal->id3v2_meta);
    }

    if (id3v2_extra_meta) {
        if (!strcmp(s->iformat->name, "mp3") || !strcmp(s->iformat->name, "aac") ||
            !strcmp(s->iformat->name, "tta") || !strcmp(s->iformat->name, "wav")) {
            if ((ret = ff_id3v2_parse_apic(s, id3v2_extra_meta)) < 0)
                goto close;
            if ((ret = ff_id3v2_parse_chapters(s, id3v2_extra_meta)) < 0)
                goto close;
            if ((ret = ff_id3v2_parse_priv(s, id3v2_extra_meta)) < 0)
                goto close;
        } else
            av_log(s, AV_LOG_DEBUG, "demuxer does not support additional id3 data, skipping\n");
    }
    ff_id3v2_free_extra_meta(&id3v2_extra_meta);

    if ((ret = avformat_queue_attached_pictures(s)) < 0)
        goto close;

#if FF_API_DEMUXER_OPEN
    if (!(s->flags&AVFMT_FLAG_PRIV_OPT) && s->pb && !s->internal->data_offset)
#else
    if (s->pb && !s->internal->data_offset)
#endif
        s->internal->data_offset = avio_tell(s->pb);

    s->internal->raw_packet_buffer_remaining_size = RAW_PACKET_BUFFER_SIZE;

    update_stream_avctx(s);

    for (i = 0; i < s->nb_streams; i++)
        s->streams[i]->internal->orig_codec_id = s->streams[i]->codecpar->codec_id;

    if (options) {
        av_dict_free(options);
        *options = tmp;
    }
    *ps = s;
    return 0;

close:
    if (s->iformat->read_close)
        s->iformat->read_close(s);
fail:
    ff_id3v2_free_extra_meta(&id3v2_extra_meta);
    av_dict_free(&tmp);
    if (s->pb && !(s->flags & AVFMT_FLAG_CUSTOM_IO))
        avio_closep(&s->pb);
    avformat_free_context(s);
    *ps = NULL;
    return ret;
}

分析可以查看雷神的这篇文章:
FFmpeg源代码简单分析：avformat_open_input()
可以看到，若打开文件失败，或是分配资源失败等等，都会将传入的AVFormatContext的对象置为NULL.

3、avcodec_alloc_context3

avcodec_alloc_context3 是 FFmpeg 库中的一个函数，用于分配并初始化一个 AVCodecContext 结构体，该结构体用于编解码器的相关信息和参数设置。
AVCodecContext 结构体是 FFmpeg 库中最为重要的结构之一，它包含了编解码器的相关信息，例如编码器类型、编码器参数、图像尺寸、音频采样率等。通过 AVCodecContext，你可以对媒体数据进行编解码操作，并设置编解码器的各种参数。
使用 avcodec_alloc_context3 函数可以创建一个空的 AVCodecContext 结构体，并将其内部字段初始化为默认值。
需要注意的是，分配的 AVCodecContext 结构体在使用完毕后，需要通过调用 avcodec_free_context 函数进行释放，以避免内存泄漏。
总而言之，avcodec_alloc_context3 函数用于分配并初始化 AVCodecContext 结构体，为后续的编解码操作提供基础。你可以使用该函数创建一个编解码器上下文，并设置相应的参数，以便进行媒体数据的编解码处理。

4、avcodec_parameters_to_context

1、用于将AVCodecParameters结构体的值赋值给AVCodecContext结构体的对应字段。AVCodecParameters结构体包含了编解码器相关的参数信息，而AVCodecContext结构体则是编解码器的上下文环境。
2、该函数会根据AVCodecParameters结构体中的值，更新AVCodecContext结构体中对应的字段。参数值的赋值涉及到了编解码器的基本配置，例如编码类型、码率、帧率等等。这样可以方便地将参数信息传递给编解码器上下文，以供后续的编解码操作使用。

5、avformat_alloc_output_context2

参数解析：

AVFormatContext **ctx: 输出参数，指向分配的输出格式上下文的指针。
AVOutputFormat *oformat: 输出参数，指定要分配的输出格式。
const char *format_name: 可选参数，用于指定输出格式的短名称。如果为NULL，则根据输出文件的扩展名自动推断。
const char *filename: 输出文件的路径或URL。

1、首先，该函数会尝试通过指定的输出格式来分配一个输出格式上下文（AVFormatContext）。如果format_name参数为NULL，函数会尝试根据输出文件的扩展名猜测输出格式。然后，分配的上下文将被赋值给ctx指针。
2、在成功分配输出格式上下文后，我们可以继续设置该上下文的各种参数，如音视频编码器、封装格式的选项等。