【FFMPEG】encoder深入分析

猿来如此yXy

已于 2023-03-20 17:06:11 修改

阅读量1k

点赞数

分类专栏：多媒体文章标签： ffmpeg encoder AVFrame AVPacket

于 2023-03-17 20:17:47 首次发布

本文链接：https://blog.csdn.net/weixin_38537730/article/details/129612849

版权

多媒体专栏收录该内容

8 篇文章 3 订阅

订阅专栏

从前面一篇文章ffmpeg内部组件总结可以知道encoder的具体使用方法:

char *filename = "/data/record/test.mp4"
AVFormatContext *format_ctx = NULL;
//通过输入url找到对应的muxer即format_ctx->oformat并malloc AVFormatContext
format_ctx = avformat_alloc_output_context2(&format_ctx, NULL, NULL, filename);
//通过url找到并初始化IO模块
avio_open2(&format_ctx->pb, filename, AVIO_FLAG_WRITE, NULL, NULL);
//通过codec找到对应的encoder
AVCodec *enc = avcodec_find_encoder(format_ctx->oformat->video_codec);
AVCodecContext *enc_ctx = avcodec_alloc_context3(enc);
//打开encoder
avcodec_open2(enc_ctx, enc, NULL);
AVStream *stream = avformat_new_stream(format_ctx, enc);
avcodec_parameters_from_context(stream->codecpar, enc_ctx);

while(!exit) {
    //将输入源的数据送到encoder中编码
    avcodec_send_frame(enc_ctx, frame);
    AVPacket *pkt = av_packet_alloc();
    //从encoder中获取编码后的数据
    avcodec_receive_packet(enc_ctx, pkt);
    //将编码后的数据送到muxer中
    av_write_frame(format_ctx, pkt);
}

本篇文章将以struct FFCodec ff_mpeg4_encoder深入分析下，

encoder需要外部传入什么参数？
输入/输出buffer的处理
encoder内部是否有数据缓存？
接下来重点分析下这四个接口: avcodec_open2(…), avcodec_parameters_from_context(…), avcodec_send_frame(…), avcodec_receive_packet(…)分别干了什么。

avcodec_open2

int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options)
{
    int ret = 0;
    AVCodecInternal *avci;
    const FFCodec *codec2;
    ........................;
    if (!codec)
        codec = avctx->codec;
    //将struct AVCodec*转换成struct FFCodec*
    codec2 = ffcodec(codec);
    avctx->codec_type = codec->type;
    avctx->codec_id   = codec->id;
    avctx->codec      = codec;
    ........................;
    //分配AVCodecInternal并赋值给AVCodecContext.internal
    avci = av_mallocz(sizeof(*avci));
    avctx->internal = avci;
    //后面重点关注下这个AVCodecInternal中的buffer_frame和buffer_pkt是干啥的？
    avci->buffer_frame = av_frame_alloc();
    avci->buffer_pkt = av_packet_alloc();
    ........................;
    avci->skip_samples_multiplier = 1;

    if (codec2->priv_data_size > 0) {
        if (!avctx->priv_data) {
            //以FFCodec ff_mpeg4_encoder为例子，这里malloc的codec2->priv_data_size实际上是struct MpegEncContext
            //const FFCodec ff_mpeg4_encoder = {
            //    .p.name         = "mpeg4",
           //     .p.long_name    = NULL_IF_CONFIG_SMALL("MPEG-4 part 2"),
           //     .p.type         = AVMEDIA_TYPE_VIDEO,
           //     .p.id           = AV_CODEC_ID_MPEG4,
           //     .priv_data_size = sizeof(MpegEncContext),
           //     .init           = encode_init,
           //     FF_CODEC_ENCODE_CB(ff_mpv_encode_picture),
           //     .close          = ff_mpv_encode_end,
           //     .p.pix_fmts     = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
           //     .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SLICE_THREADS,
           //     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
           //     .p.priv_class   = &mpeg4enc_class,
           //     };
            avctx->priv_data = av_mallocz(codec2->priv_data_size);
            if (!avctx->priv_data) {
                ret = AVERROR(ENOMEM);
                goto free_and_end;
            }
            if (codec->priv_class) {
                *(const AVClass **)avctx->priv_data = codec->priv_class;
                av_opt_set_defaults(avctx->priv_data);
            }
        }
        if (codec->priv_class && (ret = av_opt_set_dict(avctx->priv_data, options)) < 0)
            goto free_and_end;
    } else {
        avctx->priv_data = NULL;
    }
    ..........................;
    
    if (av_codec_is_encoder(avctx->codec))
        ret = ff_encode_preinit(avctx);
    else
        ret = ff_decode_preinit(avctx);
    if (ret < 0)
        goto free_and_end;

    .....................................;
    //调用codec2->init为具体encoder的init函数
    if (!(avctx->active_thread_type & FF_THREAD_FRAME) ||
        avci->frame_thread_encoder) {
        if (codec2->init) {
            lock_avcodec(codec2);
            ret = codec2->init(avctx);
            unlock_avcodec(codec2);
            if (ret < 0) {
                avci->needs_close = codec2->caps_internal & FF_CODEC_CAP_INIT_CLEANUP;
                goto free_and_end;
            }
        }
        avci->needs_close = 1;
    }


    if (av_codec_is_decoder(avctx->codec)) {
        ......................;
    }
    ...................................;
}

从上面avcodec_open2(…)针对encoder来说主要做了以下4件事情:

malloc具体encoder的context
malloc AVCodecInternal
调用ff_encode_preinit(…) 即调用encode_preinit_video(…)检查AVPixFmtDescriptor里面的参数并赋值到AVCodecContext中的成员。
调用具体encoder的init接口

有一个全局数组AVPixFmtDescriptor av_pix_fmt_descriptors[] 里面定义了这种像素格式的参数:

static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
    [AV_PIX_FMT_YUV420P] = {
        .name = "yuv420p",
        .nb_components = 3,
        .log2_chroma_w = 1,
        .log2_chroma_h = 1,
        .comp = {
            { 0, 1, 0, 0, 8 },        /* Y */
            { 1, 1, 0, 0, 8 },        /* U */
            { 2, 1, 0, 0, 8 },        /* V */
        },
        .flags = AV_PIX_FMT_FLAG_PLANAR,
    },
    [AV_PIX_FMT_YUYV422] = {
        .name = "yuyv422",
        .nb_components = 3,
        .log2_chroma_w = 1,
        .log2_chroma_h = 0,
        .comp = {
            { 0, 2, 0, 0, 8 },        /* Y */
            { 0, 4, 1, 0, 8 },        /* U */
            { 0, 4, 3, 0, 8 },        /* V */
        },
    },
    [AV_PIX_FMT_YVYU422] = {
        .name = "yvyu422",
        .nb_components = 3,
        .log2_chroma_w = 1,
        .log2_chroma_h = 0,
        .comp = {
            { 0, 2, 0, 0, 8 },        /* Y */
            { 0, 4, 3, 0, 8 },        /* U */
            { 0, 4, 1, 0, 8 },        /* V */
        },
    },
    ................;
 }

这里面的具体含义，后面再encode用到的时候再反过来查看。

typedef struct AVPixFmtDescriptor {
    const char *name;
    uint8_t nb_components;  ///< The number of components each pixel has, (1-4)

    /**
     * Amount to shift the luma width right to find the chroma width.
     * For YV12 this is 1 for example.
     * chroma_width = AV_CEIL_RSHIFT(luma_width, log2_chroma_w)
     * The note above is needed to ensure rounding up.
     * This value only refers to the chroma components.
     */
    uint8_t log2_chroma_w;

    /**
     * Amount to shift the luma height right to find the chroma height.
     * For YV12 this is 1 for example.
     * chroma_height= AV_CEIL_RSHIFT(luma_height, log2_chroma_h)
     * The note above is needed to ensure rounding up.
     * This value only refers to the chroma components.
     */
    uint8_t log2_chroma_h;

    /**
     * Combination of AV_PIX_FMT_FLAG_... flags.
     */
    uint64_t flags;

    /**
     * Parameters that describe how pixels are packed.
     * If the format has 1 or 2 components, then luma is 0.
     * If the format has 3 or 4 components:
     *   if the RGB flag is set then 0 is red, 1 is green and 2 is blue;
     *   otherwise 0 is luma, 1 is chroma-U and 2 is chroma-V.
     *
     * If present, the Alpha channel is always the last component.
     */
    AVComponentDescriptor comp[4];

    /**
     * Alternative comma-separated names.
     */
    const char *alias;
} AVPixFmtDescriptor;

typedef struct AVComponentDescriptor {
    /**
     * Which of the 4 planes contains the component.
     */
    int plane;

    /**
     * Number of elements between 2 horizontally consecutive pixels.
     * Elements are bits for bitstream formats, bytes otherwise.
     */
    int step;

    /**
     * Number of elements before the component of the first pixel.
     * Elements are bits for bitstream formats, bytes otherwise.
     */
    int offset;

    /**
     * Number of least significant bits that must be shifted away
     * to get the value.
     */
    int shift;

    /**
     * Number of bits in the component.
     */
    int depth;
} AVComponentDescriptor;

avcodec_parameters_from_context

将中AVCodecContext 的参数赋值给AVStream中的AVCodecParameters,这些参数主要是给muxer使用的。

int avcodec_parameters_from_context(AVCodecParameters *par,
                                    const AVCodecContext *codec)
{
    int ret;

    codec_parameters_reset(par);

    par->codec_type = codec->codec_type;
    par->codec_id   = codec->codec_id;
    par->codec_tag  = codec->codec_tag;

    par->bit_rate              = codec->bit_rate;
    par->bits_per_coded_sample = codec->bits_per_coded_sample;
    par->bits_per_raw_sample   = codec->bits_per_raw_sample;
    par->profile               = codec->profile;
    par->level                 = codec->level;

    switch (par->codec_type) {
    case AVMEDIA_TYPE_VIDEO:
        par->format              = codec->pix_fmt;
        par->width               = codec->width;
        par->height              = codec->height;
        par->field_order         = codec->field_order;
        par->color_range         = codec->color_range;
        par->color_primaries     = codec->color_primaries;
        par->color_trc           = codec->color_trc;
        par->color_space         = codec->colorspace;
        par->chroma_location     = codec->chroma_sample_location;
        par->sample_aspect_ratio = codec->sample_aspect_ratio;
        par->video_delay         = codec->has_b_frames;
        break;
    case AVMEDIA_TYPE_AUDIO:
        par->format           = codec->sample_fmt;
#if FF_API_OLD_CHANNEL_LAYOUT
FF_DISABLE_DEPRECATION_WARNINGS
        // if the old/new fields are set inconsistently, prefer the old ones
        if ((codec->channels && codec->channels != codec->ch_layout.nb_channels) ||
            (codec->channel_layout && (codec->ch_layout.order != AV_CHANNEL_ORDER_NATIVE ||
                                       codec->ch_layout.u.mask != codec->channel_layout))) {
            if (codec->channel_layout)
                av_channel_layout_from_mask(&par->ch_layout, codec->channel_layout);
            else {
                par->ch_layout.order       = AV_CHANNEL_ORDER_UNSPEC;
                par->ch_layout.nb_channels = codec->channels;
            }
FF_ENABLE_DEPRECATION_WARNINGS
        } else {
#endif
        ret = av_channel_layout_copy(&par->ch_layout, &codec->ch_layout);
        if (ret < 0)
            return ret;
#if FF_API_OLD_CHANNEL_LAYOUT
FF_DISABLE_DEPRECATION_WARNINGS
        }
        par->channel_layout  = par->ch_layout.order == AV_CHANNEL_ORDER_NATIVE ?
                               par->ch_layout.u.mask : 0;
        par->channels        = par->ch_layout.nb_channels;
FF_ENABLE_DEPRECATION_WARNINGS
#endif
        par->sample_rate      = codec->sample_rate;
        par->block_align      = codec->block_align;
        par->frame_size       = codec->frame_size;
        par->initial_padding  = codec->initial_padding;
        par->trailing_padding = codec->trailing_padding;
        par->seek_preroll     = codec->seek_preroll;
        break;
    case AVMEDIA_TYPE_SUBTITLE:
        par->width  = codec->width;
        par->height = codec->height;
        break;
    }

    if (codec->extradata) {
        par->extradata = av_mallocz(codec->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
        if (!par->extradata)
            return AVERROR(ENOMEM);
        memcpy(par->extradata, codec->extradata, codec->extradata_size);
        par->extradata_size = codec->extradata_size;
    }

    return 0;
}

avcodec_send_frame

下面来看看send接口把需要编码的数据送到什么地方了？

int attribute_align_arg avcodec_send_frame(AVCodecContext *avctx, const AVFrame *frame)
{
    AVCodecInternal *avci = avctx->internal;
    int ret;
    ......................................;
    if (!frame) {
        avci->draining = 1;
    } else {
        //重点看下
        ret = encode_send_frame_internal(avctx, frame);
        if (ret < 0)
            return ret;
    }
    //如果avci->buffer_pkt->data为null，执行一次encode动作
    //相当于调用了一次avcodec_receive_packet(.......)
    if (!avci->buffer_pkt->data && !avci->buffer_pkt->side_data) {
        ret = encode_receive_packet_internal(avctx, avci->buffer_pkt);
        if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
            return ret;
    }
    avctx->frame_number++;
    return 0;
}

static int encode_send_frame_internal(AVCodecContext *avctx, const AVFrame *src)
{
    AVCodecInternal *avci = avctx->internal;
    AVFrame *dst = avci->buffer_frame;
    int ret;

    if (avctx->codec->type == AVMEDIA_TYPE_AUDIO) {
        ................;
    }
   //将src中的buffer 转移到AVCodecInternal.buffer_frame中
    ret = av_frame_ref(dst, src);
    if (ret < 0)
        return ret;

    return 0;
}

avcodec_receive_packet

int attribute_align_arg avcodec_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
{
    AVCodecInternal *avci = avctx->internal;
    int ret;

    av_packet_unref(avpkt);

    if (!avcodec_is_open(avctx) || !av_codec_is_encoder(avctx->codec))
        return AVERROR(EINVAL);

    if (avci->buffer_pkt->data || avci->buffer_pkt->side_data) {
        av_packet_move_ref(avpkt, avci->buffer_pkt);
    } else {
        ret = encode_receive_packet_internal(avctx, avpkt);
        if (ret < 0)
            return ret;
    }

    return 0;
}

static int encode_receive_packet_internal(AVCodecContext *avctx, AVPacket *avpkt)
{
    AVCodecInternal *avci = avctx->internal;
    int ret;
    .............................;
    if (ffcodec(avctx->codec)->cb_type == FF_CODEC_CB_TYPE_RECEIVE_PACKET) {
        ret = ffcodec(avctx->codec)->cb.receive_packet(avctx, avpkt);//这种方式一般没有codec实现
        if (ret < 0)
            av_packet_unref(avpkt);
        else
            // Encoders must always return ref-counted buffers.
            // Side-data only packets have no data and can be not ref-counted.
            av_assert0(!avpkt->data || avpkt->buf);
    } else
        ret = encode_simple_receive_packet(avctx, avpkt);//都走这里
    return ret;
}

static int encode_simple_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
{
    int ret;
    while (!avpkt->data && !avpkt->side_data) {
        //执行编码动作
        ret = encode_simple_internal(avctx, avpkt);
        if (ret < 0)
            return ret;
    }
    return 0;
}

int ff_encode_get_frame(AVCodecContext *avctx, AVFrame *frame)
{
    AVCodecInternal *avci = avctx->internal;

    if (avci->draining)
        return AVERROR_EOF;

    if (!avci->buffer_frame->buf[0])
        return AVERROR(EAGAIN);

    av_frame_move_ref(frame, avci->buffer_frame);

    return 0;
}

static int encode_simple_internal(AVCodecContext *avctx, AVPacket *avpkt)
{
    AVCodecInternal   *avci = avctx->internal;
    AVFrame          *frame = avci->in_frame;
    const FFCodec *const codec = ffcodec(avctx->codec);
    int got_packet;
    int ret;

    if (avci->draining_done)
        return AVERROR_EOF;

    if (!frame->buf[0] && !avci->draining) {
        av_frame_unref(frame);
        //将avcodec_send_frame下来的frame取出来
        ret = ff_encode_get_frame(avctx, frame);
        if (ret < 0 && ret != AVERROR_EOF)
            return ret;
    }
    if (CONFIG_FRAME_THREAD_ENCODER &&
        avci->frame_thread_encoder && (avctx->active_thread_type & FF_THREAD_FRAME))
        /* This might modify frame, but it doesn't matter, because
         * the frame properties used below are not used for video
         * (due to the delay inherent in frame threaded encoding, it makes
         *  no sense to use the properties of the current frame anyway). */
        ret = ff_thread_video_encode_frame(avctx, avpkt, frame, &got_packet);
    else {
         //调用具体encoder的编码函数，传入frame，传出编码好的avpkt
        ret = codec->cb.encode(avctx, avpkt, frame, &got_packet);
        if (avctx->codec->type == AVMEDIA_TYPE_VIDEO && !ret && got_packet &&
            !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
            avpkt->pts = avpkt->dts = frame->pts;
    }

    av_assert0(ret <= 0);

    emms_c();

    if (!ret && got_packet) {
        if (avpkt->data) {
            //将pkt->data里面的数据拷贝到pkt->buf->data
            //看起来是encoder只是将数据指向了pkt->data
            ret = av_packet_make_refcounted(avpkt);
            if (ret < 0)
                goto end;
        }
    ............;
    return ret;
}

下面来看下FFCodec ff_mpeg4_encoder的encode函数: ff_mpv_encode_picture

int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
                          const AVFrame *pic_arg, int *got_packet)
{
    MpegEncContext *s = avctx->priv_data;
    int i, stuffing_count, ret;
    int context_count = s->slice_context_count;

    s->vbv_ignore_qmax = 0;

    s->picture_in_gop_number++;
    //将AVFrame *pic_arg里面的内容拷贝picture中，然后将picture存放到MpegEncContext.input_picture
    //然后通过reorder后将此picture存放到MpegEncContext.reordered_input_picture
    if (load_input_picture(s, pic_arg) < 0)
        return -1;
    //先将MpegEncContext.input_picture中picture按照规则再重排order到MpegEncContext.reordered_input_picture
    //然后选择MpegEncContext.reordered_input_picture中的第一个作为new_picture，即要编码的数据
    if (select_input_picture(s) < 0) {
        return -1;
    }

    /* output? */
    if (s->new_picture->data[0]) {
        int growing_buffer = context_count == 1 && !s->data_partitioning;
        size_t pkt_size = 10000 + s->mb_width * s->mb_height *
                                  (growing_buffer ? 64 : (MAX_MB_BYTES + 100));
        if (CONFIG_MJPEG_ENCODER && avctx->codec_id == AV_CODEC_ID_MJPEG) {
            ret = ff_mjpeg_add_icc_profile_size(avctx, s->new_picture, &pkt_size);
            if (ret < 0)
                return ret;
        }
        //给AVPacket.data分配内存空间
        if ((ret = ff_alloc_packet(avctx, pkt, pkt_size)) < 0)
            return ret;
        pkt->size = avctx->internal->byte_buffer_size - AV_INPUT_BUFFER_PADDING_SIZE;
        if (s->mb_info) {
            s->mb_info_ptr = av_packet_new_side_data(pkt,
                                 AV_PKT_DATA_H263_MB_INFO,
                                 s->mb_width*s->mb_height*12);
            s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
        }

        for (i = 0; i < context_count; i++) {
            int start_y = s->thread_context[i]->start_mb_y;
            int   end_y = s->thread_context[i]->  end_mb_y;
            int h       = s->mb_height;
            uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
            uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);

            init_put_bits(&s->thread_context[i]->pb, start, end - start);
        }

        s->pict_type = s->new_picture->pict_type;
        //emms_c();
        ret = frame_start(s);
vbv_retry:
       //开始编码
        ret = encode_picture(s, s->picture_number);
        if (growing_buffer) {
            //pkt->data在ff_alloc_packet中是指向avctx->internal->byte_buffer
            av_assert0(s->pb.buf == avctx->internal->byte_buffer);
            //此处如果buffer size有增长，则重新赋值一下
            pkt->data = s->pb.buf;
            pkt->size = avctx->internal->byte_buffer_size;
        }
        ........................;
        frame_end(s);

       if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) && s->out_format == FMT_MJPEG)
            ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
        ..............................;
        //设置pts 和dts
        pkt->pts = s->current_picture.f->pts;
        if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
            if (!s->current_picture.f->coded_picture_number)
                pkt->dts = pkt->pts - s->dts_delta;
            else
                pkt->dts = s->reordered_pts;
            s->reordered_pts = pkt->pts;
        } else
            pkt->dts = pkt->pts;
        if (s->current_picture.f->key_frame)
            pkt->flags |= AV_PKT_FLAG_KEY;
        if (s->mb_info)
            av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
    } else {
        s->frame_bits = 0;
    }
    .......................;
    pkt->size = s->frame_bits / 8;
    *got_packet = !!pkt->size;
    return 0;
}