001 ffmpeg_videoencoding

最新推荐文章于 2024-01-21 19:21:33 发布

yi巴

最新推荐文章于 2024-01-21 19:21:33 发布

阅读量839

点赞数

分类专栏： ffmpeg 文章标签： ffmpeg

本文链接：https://blog.csdn.net/jiandanjiuhao_88/article/details/79387597

版权

ffmpeg 专栏收录该内容

21 篇文章 2 订阅

订阅专栏

这个wiki主要是讲video的编码流程，代码功能就是：把YUV文件变成h264文件的过程。其实看代码很简单，但是为了加深我的记忆，所以分析代码流程，虽然没有一行一行的分析，但是至少做到每个细节都清晰。

1 ffmpeg的编译

准备ffmpeg和x264两个报文，因为ffmpeg默认是不支持h264的编码，解码是支持的。

1.1 x264

下载地址是：http://download.videolan.org/pub/videolan/x264/snapshots/
文件名：last_stable_x264.tar.bz2
编译行：

$./configure --enable-shared --disable-asm --prefix=/data1/.....
$ make
$ make install

把生成的bin目录放入到LD_LIBRARY_PATH中。

1.2 ffmpeg

直接到ffmpeg的官网下载，我的版本是3.0.2版本。编译行：

./configure --prefix=/data1/home/wy/ffmpeg/ffmpeg-3.0.2.install/ --enable-debug  --enable-shared --disable-asm --enable-libx264 --enable-gpl --enable-pthreads --extra-cflags=-I/data1/home/wy/ffmpeg/x264/install/include --extra-ldflags=-L/data1/home/wy/ffmpeg/x264/install/lib

make -j8
make install

为了调试，需要–enable-debug。用动态库，需要–enable-shared。之后就是把X264加入进去的选项。把生成的bin目录放入到LD_LIBRARY_PATH中。

用动态库就是不能忘记LD_LIBRARY_PATH，其实还有一个方法，在应用程序编译选项中添加-rpath。这个需要查一下具体办法。

2 编译应用程序

用CMakeLists来编译，CMakeList确实是一个好东东哦。添加include和lib路径，SET一些编译选项，ADD_EXECUTABLE源文件和TARGET_LINK_LIBRARIES库。相关的库有：

SET(lib_deps avcodec avformat avutil pthread rt)

在添加编译选项的时候有一个需要添加进去才能通过：-D__STDC_CONSTANT_MACROS. 在ffmpeg的源码中libavutil/common.h文件中：

#if defined(__cplusplus) && !defined(__STDC_CONSTANT_MACROS) && !defined(UINT64_C)
#error missing -D__STDC_CONSTANT_MACROS / #define __STDC_CONSTANT_MACROS
#endif

至于为什么有这行代码，没查到原因哦。

3 源码分析

先列出include的ffmpeg的头文件：

extern "C"
{
    #include "libavutil/opt.h"
    #include "libavcodec/avcodec.h"
    #include "libavutil/channel_layout.h"
    #include "libavutil/common.h"
    #include "libavutil/imgutils.h"
    #include "libavutil/mathematics.h"
    #include "libavutil/samplefmt.h"
}

为什么会有extern呢？因为我们是用c++来调用c生成的库，必须得用extern。不然会报错。c++和c对于在内存函数的表达不同。

再看两个结构体，这两个结构体从头到尾贯穿：

typedef struct
{
    FILE *pFin;             //输入的YUV文件指针
    FILE *pFout;            //输出的码流文件指针

    char *pNameIn;          //输入YUV文件名
    char *pNameOut;         //输出码流文件名

    uint16_t nImageWidth;   //图像宽度
    uint16_t nImageHeight;  //图像高度

    uint16_t nFrameRate;    //编码帧率
    uint64_t nBitRate;      //编码码率
    uint16_t nGOPSize;      //一个GOP大小
    uint16_t nMaxBFrames;   //最大B帧数量
    uint16_t nTotalFrames;  //编码总帧数
} IOParam;

这是接收命令行参数的结构，相当于配置结构。在编码时需要用到这些参数，怎么用的后面有描述。

typedef struct
{
    AVCodec         *codec;     //指向编解码器实例
    AVFrame         *frame;     //保存解码之后/编码之前的像素数据
    AVCodecContext  *c;         //编解码器上下文，保存编解码器的一些参数设置
    AVPacket        pkt;        //码流包结构，包含编码码流数据
} CodecCtx;

这个相当于ffmpeg在编码时的上下文，保存在一个结构中，方便使用。

代码很短的话，并且功能知道后，直接从main函数入手吧。main函数很短，越短越好，自己写代码一定要把main函数写得短了又短。

int main(int argc, char **argv)
{
    // 1 读取并解析命令行参数
    IOParam io_param;
    if (!Parse_input_param(argc, argv, io_param))
    {
        printf("Error: Incomplete input parameters. Please check the command line.\n");
        return -1;
    }
    int frameIdx, packetIdx = 0, ret, got_output;

    // 2 打开输入输出文件
    Open_file(io_param);                        

    // 3 根据输入参数设置并打开编码器各个部件
    CodecCtx ctx = { NULL, NULL, NULL};
    if(Open_encoder(ctx, io_param) ){
    }else{
        printf("Error: Open_encoder fail\n");
        return -1;
    }

    // 4 一帧一帧的处理，不要忘记最后一帧。
    for (frameIdx = 0; frameIdx < io_param.nTotalFrames; frameIdx++)
    {
        // 4.1 初始化AVPacket实例
        av_init_packet(&(ctx.pkt));            
        ctx.pkt.data = NULL;                    
        ctx.pkt.size = 0;

        fflush(stdout);

        //4.2 设置编码数据：读取像素信息，设置pts
        Read_yuv_data(ctx, io_param, 0);        //Y分量
        Read_yuv_data(ctx, io_param, 1);        //U分量
        Read_yuv_data(ctx, io_param, 2);        //V分量
        ctx.frame->pts = frameIdx;

        // 4.3 endocde:来源数据放在ctx.frame，结果数据放在ctx.pkt中。
        ret = avcodec_encode_video2(ctx.c, &(ctx.pkt), ctx.frame, &got_output); 

        // 4.4  获得一个完整的码流包
        if (got_output) 
        {            
            fwrite(ctx.pkt.data, 1, ctx.pkt.size, io_param.pFout);
            av_packet_unref(&(ctx.pkt));  //unref pkt
        }
    }

    // 4.5 是否还有数据没有编码
    for (got_output = 1; got_output; frameIdx++) 
    {
        ret = avcodec_encode_video2(ctx.c, &(ctx.pkt), NULL, &got_output); //这个时候frame为空，没有读取原始像素。
        if (got_output) 
        {
            fwrite(ctx.pkt.data, 1, ctx.pkt.size, io_param.pFout);
            av_packet_unref(&(ctx.pkt));
        }
    }

    // 5 结尾处理：关闭文件，关闭编码器
    Close_file(io_param);
    Close_encoder(ctx);

    return 0;
}

上面的代码分为5步，关键点在于第3，4步。分析第3步，解决这个问题，编码器是如何设置的呢？

bool Open_encoder(CodecCtx &ctx, IOParam io_param)
{
    // 3.1 注册所有所需的音视频编解码器
    avcodec_register_all();

    // 3.2 根据CODEC_ID查找编解码器对象实例的指针,编码器为H264
    ctx.codec = avcodec_find_encoder(AV_CODEC_ID_H264);

    // 3.3 分配AVCodecContext实例
    ctx.c = avcodec_alloc_context3(ctx.codec);          

    // 3.3 设置编码器的上下文
    setContext(ctx,io_param);                           

    // 3.4 根据编码器上下文打开编码器
    if (avcodec_open2(ctx.c, ctx.codec, NULL) < 0) 

    // 3.5 分配AVFrame对象，并初始化
    ctx.frame = av_frame_alloc();                       
    ctx.frame->format = ctx.c->pix_fmt;
    ctx.frame->width = ctx.c->width;
    ctx.frame->height = ctx.c->height;

    //3.6 分配AVFrame所包含的像素存储空间
    ret = av_image_alloc(ctx.frame->data, ctx.frame->linesize, ctx.c->width, ctx.c->height, ctx.c->pix_fmt, 32);

    return true;
}

每一步都对应的是ffmeg的功能。细节不追究，有两个地方，一个是3.3，编码器的上下文是什么。另一个3.6 把av_image_alloc分配存储空间简单分析一下。

void setContext(CodecCtx &ctx, IOParam io_param)
{
    // 3.3.1 码率
    ctx.c->bit_rate = io_param.nBitRate;

    // 3.3.2 宽高
    ctx.c->width = io_param.nImageWidth;
    ctx.c->height = io_param.nImageHeight;

    // 3.3.3 time base,在计算pts、dts时会用到
    AVRational rational = {1,25};
    ctx.c->time_base = rational;

    // 3.3.4 gop大小，B帧的出现次数
    ctx.c->gop_size = io_param.nGOPSize;
    ctx.c->max_b_frames = io_param.nMaxBFrames;

    // 3.3.5 YUV的像素格式
    ctx.c->pix_fmt = AV_PIX_FMT_YUV420P;

    // ?
    av_opt_set(ctx.c->priv_data, "preset", "slow", 0);
}

设置的都是和视频相关的，在编码时需要用到这些变量。ffmpeg的具体逻辑不清楚，那就先记住有哪些变量。看 av_image_alloc 函数：

//返回值：
//成功返回0，如果失败则返回一个负值。
int av_image_alloc(uint8_t *pointers[4], //AVFrame结构中的缓存指针、（out）
                    int linesizes[4], //各个颜色分量的宽度、（out）
                    int w, int h, //图像分辨率（宽、高）、（in）
                    enum AVPixelFormat pix_fmt, //像素格式 （in）
                    int align);//内存对其的大小，32字节对齐还是64字节对齐 （in）

第3步编码器的初始化完成后，进入第4步的编码阶段。4.2获取到原始yuv数据 Read_yuv_data ，4.3编码 avcodec_encode_video2。

int Read_yuv_data(CodecCtx &ctx, IOParam &io_param, int color_plane)
{
    int frame_height    = color_plane == 0? ctx.frame->height : ctx.frame->height / 2;
    int frame_width     = color_plane == 0? ctx.frame->width : ctx.frame->width / 2;
    int frame_size      = frame_width * frame_height;
    int frame_stride    = ctx.frame->linesize[color_plane];

    if (frame_width == frame_stride)
    {
        //宽度和跨度相等，像素信息连续存放
        fread(ctx.frame->data[color_plane], frame_size, 1, io_param.pFin);
    } 
    else
    {
        //宽度小于跨度，像素信息保存空间之间存在间隔
        for (int row_idx = 0; row_idx < frame_height; row_idx++)
        {
            fread(ctx.frame->data[color_plane] + row_idx * frame_stride, frame_width, 1, io_param.pFin);
        }
    }

    return frame_size;
}

从文件中读取yuv原始数据。读起来简单，但是读多少从哪里读有一定的逻辑，主要是区分像素信息是连续放还是有间隔。为什么存在这种逻辑呢？

读到了YUV数据则需要调用编码函数 avcodec_encode_video2。

//返回值：编码是否成功。成功返回0，失败则返回负的错误码
int avcodec_encode_video2(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr);

// avctx: AVCodecContext结构，指定了编码的一些参数,也就是上下文。
// avpkt: AVPacket对象的指针，用于保存输出码流；
// frame：AVframe结构，用于传入原始的像素数据；
// got_packet_ptr：输出参数，用于标识AVPacket中是否已经有了完整的一帧；

编码完成后需要判断是否还有内存中的数据没有编码，也就是在不继续读取数据进行编码，直到没有一个完整的帧。

源码分析完成，简单明了。其实细节是没有深入的，我不想一下子砸代码中去。

最后感谢代码的开发作者：https://github.com/yinwenjie/FFmpeg_Tutorial

高手这么多，难道我就只能分析分析别人的代码吗？