在ffmpeg中图像处理、音频处理可以使用libswscale和libswresample库,简单的几次转换在编解码尚可,一旦需求复杂则难以实现,就需要使用libavfilter库实现。
libavfilter库简介
libavfilter提供了一整套的基于filter的机制,以插件(可实现自定义filter并编译到ffmpeg库中)的形式快速组装需要的结果。Ffmpeg官方提供的filter相关资料见文档https://ffmpeg.org/ffmpeg-filters.html#Filtering-Introduction。
图文示例说明
通常按数据出路类型,将多媒体filter分为三类:音频filter、视频filter、字幕filter。
在FFmpeg中filter分类简单分为以下:
-
source filter (只有输出,如buffer/abuffer)
-
audio filter (如amix/aresample/volume)
-
video filter (如vflip /rotate/scale等)
-
Multimedia filter
-
sink filter (只有输入,如buffersink/abuffersink)
除了source和sink filter,其他filter都至少有一个输入、至少一个输出。以官方介绍示例说明图像处理的filter过程。
上面橙色部分是不同filter,绿色连线是filter的连接(不同线路部分构成一个chain),形成一个有向无环图filter graph。输入一个AVFrame
到buffer中,经过split后两个输出main和tmp;tmp经过crop和vlip,输出flip。之后经overlay处理,flip叠加到mian上,并输出到buffersink,最后结果取出为AVFrame。
给出一个带参数具体的命令,
ffmpeg -i INPUT -vf "split [main][tmp]; [tmp] crop=iw:ih/2:0:0, vflip [flip]; [main][flip] overlay=0:H/2" OUTPUT
输入INPUT,输出OUTPUT。其中 –vf 参数指定为filter graph的文字描述,以”;”分割,有三条filter chain:
- “split [main][tmp]”,
默认输入来自INPUT(ffmpeg已经处理为buffer filter),经split复制输出main和tmp - “[tmp] crop=iw:ih/2:0:0, vflip [flip]”
tmp经crop裁剪起点为[0,0]、大小为[iw,ih/2]的区域,经vflip垂直翻转后输出为flip - “[main][flip] overlay=0:H/2”
两个输入经overlay,将flip叠加到main的上半部分,输出为OUTPUT(ffmpeg内部已经处理为buffersink filter)
在流程图中,虚框部分为我们实际filter的处理部分,后续实现均以该部分说明。
函数接口说明
第一组,与filter 、filter graph对象创建、管理有关
AVFilterGraph *avfilter_graph_alloc(void);
const AVFilter *avfilter_get_by_name(const char *name);
int avfilter_graph_create_filter(AVFilterContext **filt_ctx, const AVFilter *filt,
const char *name, const char *args, void *opaque,
AVFilterGraph *graph_ctx);
int avfilter_link(AVFilterContext *src, unsigned srcpad, AVFilterContext *dst, unsigned dstpad);
int avfilter_graph_parse_ptr(AVFilterGraph *graph, const char *filters,
AVFilterInOut **inputs, AVFilterInOut **outputs,
void *log_ctx);
int avfilter_graph_config(AVFilterGraph *graphctx, void *log_ctx);
void avfilter_graph_free(AVFilterGraph **graph);
...
第二组,与filter使用有关
int av_buffersrc_add_frame(AVFilterContext *ctx, AVFrame *frame);
int av_buffersrc_add_frame_flags(AVFilterContext *buffer_src, AVFrame *frame, int flags);
int av_buffersink_get_frame(AVFilterContext *ctx, AVFrame *frame);
int av_buffersink_get_samples(AVFilterContext *ctx, AVFrame *frame, int nb_samples);
示例1 filter不做任何处理
从文件中读取每一帧图像数据,经过filter后,保存图像数据。本示给出完整代码,演示使用buffe、buffersink两个filter,但不对图像数据做任何处理(即流程图中虚空部分为空,buffe直接连接buffersink)。
/*
filter示例,对一个yuv视频文件进行处理
*/
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
#include "libavformat/avformat.h"
#include "libavutil/imgutils.h"
#include "libavfilter/avfilter.h"
#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"
#include "libavutil/opt.h"
#ifdef __cplusplus
}
#endif
#define INPUT_FORMAT_YUV420P
int main()
{
int ret;
/* 输入输出视频文件信息 */
const int in_width = 640;
const int in_height = 272;
const int in_fps = 25;
#ifdef INPUT_FORMAT_YUV420P
const char *in_file_name = "../files/Titanic_640x272_yuv420p.yuv";
AVPixelFormat in_pix_fmt = AV_PIX_FMT_YUV420P;
int in_bpp = av_get_bits_per_pixel(av_pix_fmt_desc_get(in_pix_fmt)); // rgb24 24, yuv 12
const char *out_file_name = "out.yuv";
#else
const char *in_file_name = "../files/Titanic_640x272_rgb24.rgb";
AVPixelFormat in_pix_fmt = AV_PIX_FMT_BGR24;
int in_bpp = av_get_bits_per_pixel(av_pix_fmt_desc_get(in_pix_fmt)); // rgb24 24, yuv 12
const char *out_file_name = "out.rgb";
#endif
FILE *in_file, *out_file;
fopen_s(&in_file, in_file_name, "rb");
if(!in_file) {
fprintf(stderr, "Can not open file %s\n", in_file_name);
return 0;
}
fopen_s(&out_file, out_file_name, "wb");
if(!out_file) {
fprintf(stderr, "Can not open file %s\n", out_file_name);
return 0;
}
/* filter相关 */
AVFilterContext *buffersink_ctx;
AVFilterContext *buffersrc_ctx;
AVFilterGraph *filter_graph;
{
// 初始化 filtwer graph
filter_graph = avfilter_graph_alloc();
// source filter
const AVFilter* buffersrc = avfilter_get_by_name("buffer");
char args[512]; // 描述输入的参数
snprintf(args, sizeof(args),
"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
in_width, in_height, in_pix_fmt, 1, in_fps, 1, 1);
ret = avfilter_graph_create_filter(&buffersrc_ctx, buffersrc, "in", args, NULL, filter_graph);
if(ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create buffer source\n");
return ret;
}
// sink filter
const AVFilter *buffersink = avfilter_get_by_name("buffersink");
ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out", NULL, NULL, filter_graph);
if(ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create buffer sink\n");
return ret;
}
// links
if(ret = avfilter_link(buffersrc_ctx, 0, buffersink_ctx, 0) != 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot link buffersrc_ctx to buffersink_ctx\n");
return ret;
}
// 检查并配置filter_graph
if((ret = avfilter_graph_config(filter_graph, NULL)) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot config graph\n");
return ret;
}
char *graph_str = avfilter_graph_dump(filter_graph, NULL);
FILE* graphFile = NULL;
fopen_s(&graphFile, "graphFile.txt", "w");
fprintf(graphFile, "%s", graph_str);
av_free(graph_str);
fclose(graphFile);
}
AVFrame *frame = av_frame_alloc();
frame->width = in_width;
frame->height = in_height;
frame->format = in_pix_fmt;
//av_frame_get_buffer(frame, 1);
av_image_alloc(frame->data,frame->linesize,in_width, in_height,in_pix_fmt,1); // 每一次数据均存于此
AVFrame *frame_filt = av_frame_alloc();
int64_t frame_cnt = 1;
while(!feof(in_file)) {
if(fread(frame->data[0], in_width*in_height * in_bpp / 8, 1, in_file) != 1)
break;
/* push the decoded frame into the filtergraph */
//ret = av_buffersrc_add_frame(buffersrc_ctx, frame);
ret = av_buffersrc_add_frame_flags(buffersrc_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF);
if(ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while feeding the filtergraph\n");
break;
}
/* pull filtered frames from the filtergraph */
ret = av_buffersink_get_frame(buffersink_ctx, frame_filt);
if(ret < 0)
break;
//保存结果
#ifdef INPUT_FORMAT_YUV420P
// 任意yuv420p对齐方式,保存为1字节对齐
for(int i = 0; i < frame_filt->height; i++)
fwrite(frame_filt->data[0] + frame_filt->linesize[0] * i, 1, frame_filt->width, out_file);
for(int i = 0; i < frame_filt->height / 2; i++)
fwrite(frame_filt->data[1] + frame_filt->linesize[1] * i, 1, frame_filt->width / 2, out_file);
for(int i = 0; i < frame_filt->height / 2; i++)
fwrite(frame_filt->data[2] + frame_filt->linesize[2] * i, 1, frame_filt->width / 2, out_file);
#else // RGB24,保存为1字节对齐
for(int i = 0; i < frame_filt->height; i++)
fwrite(frame_filt->data[0] + frame_filt->linesize[0] * i, 1, frame_filt->width * in_bpp/8, out_file);
#endif
av_frame_unref(frame_filt);
}
fclose(in_file);
fclose(out_file);
av_frame_free(&frame);
av_frame_free(&frame_filt);
avfilter_graph_free(&filter_graph);
return 0;
}
注意av_buffersrc_add_frame
和av_buffersrc_add_frame_flags
调用区别,以及对AVFrame释放的影响。
示例2 官方filter示例实现
本示例实现官方示例的filter功能,将输入的上半部分翻转后再叠加到输入。有如下两种实现方式:(1)手动创建所有filter,再手动连接filter输入和输出,方法直观,但比较繁杂;(2)使用字符串描述的graph,再加入并连接到buffer与buffersink之间,实现简单,推荐。
先给出结果对比图
分别以两种方式,给出AVFilterGraph
的实现代码
(1)手动创建所有filter实现形式
依次调用avfilter_graph_create_filter()
方法,向graph添加所有filter,然后调用avfilter_link()
建立所有filter之间的连接。
{
初始化 filtwer graph
filter_graph = avfilter_graph_alloc();
all filters
// source filter
const AVFilter* buffersrc = avfilter_get_by_name("buffer");
char args[512]; // 描述输入的参数
snprintf(args, sizeof(args),
"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
in_width, in_height, in_pix_fmt, 1, in_fps, 1, 1);
ret = avfilter_graph_create_filter(&buffersrc_ctx, buffersrc, "in", args, NULL, filter_graph);
if(ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create buffer source\n");
return ret;
}
// sink filter
const AVFilter *buffersink = avfilter_get_by_name("buffersink");
//AVPixelFormat pix_fmts[] = {AV_PIX_FMT_BGR24, AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE}; // 输出参数
//AVBufferSinkParams *buffersink_params = av_buffersink_params_alloc();
//buffersink_params->pixel_fmts = pix_fmts;
ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out", NULL, NULL, filter_graph);
if(ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create buffer sink\n");
return ret;
}
// split filter
const AVFilter *splitFilter = avfilter_get_by_name("split");
AVFilterContext *splitFilter_ctx;
ret = avfilter_graph_create_filter(&splitFilter_ctx, splitFilter, "split", "outputs=2", NULL, filter_graph);
if(ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Fail to create split filter\n");
return -1;
}
// crop filter
const AVFilter *cropFilter = avfilter_get_by_name("crop");
AVFilterContext *cropFilter_ctx;
ret = avfilter_graph_create_filter(&cropFilter_ctx, cropFilter, "crop", "out_w=iw:out_h=ih/2:x=0:y=0", NULL, filter_grap
if(ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Fail to create crop filter\n");
return -1;
}
// vflip filter
const AVFilter *vflipFilter = avfilter_get_by_name("vflip");
AVFilterContext *vflipFilter_ctx;
ret = avfilter_graph_create_filter(&vflipFilter_ctx, vflipFilter, "vflip", NULL, NULL, filter_graph);
if(ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Fail to create vflip filter\n");
return -1;
}
// overlay filter
const AVFilter *overlayFilter = avfilter_get_by_name("overlay");
AVFilterContext *overlayFilter_ctx;
ret = avfilter_graph_create_filter(&overlayFilter_ctx, overlayFilter, "overlay", "y=0:H/2", NULL, filter_graph);
if(ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Fail to create overlay filter\n");
return -1;
}
all links
// link: buffer -> split
ret = avfilter_link(buffersrc_ctx, 0, splitFilter_ctx, 0);
if(ret != 0) {
av_log(NULL, AV_LOG_ERROR, "Fail to link src filter to split filter\n");
return -1;
}
// link: split 0:main -> overlay 0
ret = avfilter_link(splitFilter_ctx, 0, overlayFilter_ctx, 0);
if(ret != 0) {
av_log(NULL, AV_LOG_ERROR, "Fail to link split filter:0 to overlay filter\n");
return -1;
}
// link: split 1:tmp -> crop
ret = avfilter_link(splitFilter_ctx, 1, cropFilter_ctx, 0);
if(ret != 0) {
av_log(NULL, AV_LOG_ERROR, "Fail to link split filter:1 to crop filter\n");
return -1;
}
// link: crop -> vflip
ret = avfilter_link(cropFilter_ctx, 0, vflipFilter_ctx, 0);
if(ret != 0) {
av_log(NULL, AV_LOG_ERROR, "Fail to link crop filter to vflip filter\n");
return -1;
}
// link: vflip -> overlay 1
ret = avfilter_link(vflipFilter_ctx, 0, overlayFilter_ctx, 1);
if(ret != 0) {
av_log(NULL, AV_LOG_ERROR, "Fail to link vflip filter to overlay filter:1 \n");
return -1;
}
// link: overlay -> buffersink
ret = avfilter_link(overlayFilter_ctx, 0, buffersink_ctx, 0);
if(ret != 0) {
av_log(NULL, AV_LOG_ERROR, "Fail to link vflip filter to buffersink filter\n");
return -1;
}
// 检查并配置filter_graph
if((ret = avfilter_graph_config(filter_graph, NULL)) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot config graph\n");
return ret;
}
char *graph_str = avfilter_graph_dump(filter_graph, NULL);
FILE* graphFile = NULL;
fopen_s(&graphFile, "graphFile.txt", "w");
fprintf(graphFile, "%s", graph_str);
av_free(graph_str);
fclose(graphFile);
}
(2)字符串描述graph实现实现
通过字符串描述的一个filter graph,嵌入到buffer
和buffersink
之间,仅需要调用avfilter_graph_parse_ptr
函数一次即可,相比上例而言,省去了手动创建所有filters、手动创建links的操作,简单高效不易出错。
注意avfilter_graph_parse_ptr
函数输入的两个AVFilterInOut
参数。
{
初始化 filtwer graph
filter_graph = avfilter_graph_alloc();
all filters
// source filter
const AVFilter* buffersrc = avfilter_get_by_name("buffer");
char args[512]; // 描述输入的参数
snprintf(args, sizeof(args),
"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
in_width, in_height, in_pix_fmt, 1, in_fps, 1, 1);
ret = avfilter_graph_create_filter(&buffersrc_ctx, buffersrc, "in", args, NULL, filter_graph);
if(ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create buffer source\n");
return ret;
}
// sink filter
const AVFilter *buffersink = avfilter_get_by_name("buffersink");
//AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE}; // 输出参数
//AVBufferSinkParams *buffersink_params = av_buffersink_params_alloc();
//buffersink_params->pixel_fmts = pix_fmts;
ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out", NULL, NULL, filter_graph);
if(ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create buffer sink\n");
return ret;
}
// 将字符串描述的graph添加到filter_graph中,并且指定连接的输入和输出
const char* filters_descr = "split [main][tmp]; [tmp] crop=iw:ih/2:0:0, vflip [flip]; [main][flip] overlay=0:H/2";
AVFilterInOut *outputs = avfilter_inout_alloc();
AVFilterInOut *inputs = avfilter_inout_alloc();
// buffer source的输出必须连接到filters_descr描述的graph的第一个输入; 如果第一个输入标签未指定,默认为in
outputs->name = av_strdup("in");
outputs->filter_ctx = buffersrc_ctx;
outputs->pad_idx = 0;
outputs->next = NULL;
// bufer sink的输入必须连接到filters_descr描述的graph的最后一个输出; 如果最后一个输出标签未指定,默认为out
inputs->name = av_strdup("out");
inputs->filter_ctx = buffersink_ctx;
inputs->pad_idx = 0;
inputs->next = NULL;
// filters_descr(的输出) 连接到 inputs的输入, filters_descr(的输入) 连接到 outputs(buffersrc_ctx)的输出,
ret = avfilter_graph_parse_ptr(filter_graph, filters_descr, &inputs, &outputs, NULL);
if(ret < 0) {
//parse失败,inputs、outputs需要释放;成功将不需要释放。
avfilter_inout_free(&inputs);
avfilter_inout_free(&outputs);
av_log(NULL, AV_LOG_ERROR, "Cannot parse filters_descr\n");
return ret;
}
// 检查并配置filter_graph
if((ret = avfilter_graph_config(filter_graph, NULL)) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot config graph\n");
return ret;
}
char *graph_str = avfilter_graph_dump(filter_graph, NULL);
FILE* graphFile = NULL;
fopen_s(&graphFile, "graphFile.txt", "w");
fprintf(graphFile, "%s", graph_str);
av_free(graph_str);
fclose(graphFile);
}
创建了两个AVFilterInOut
对象inputs和outputs作为avfilter_graph_parse_ptr
的参数,是相对外部filters来说的。
参数inputs指的是filters_descr描述的grpah的输出应该连接的外部filter的输入,也就是buffersink_ctx的输入;
参数outputs指的是filters_descr描述的grpah的输入应该连接的外部filter的输出,也就是buffer的输出。