#FFmpeg关于Nvidia支持介绍
##NVDEC/CUVID(官方介绍如下)
官方链接:http://trac.ffmpeg.org/wiki/HWAccelIntro
CUDA (NVENC/NVDEC)
NVENC and NVDEC are NVIDIA’s hardware-accelerated encoding and decoding APIs. They used to be called CUVID. They can be used for encoding and decoding on Windows and Linux. FFmpeg refers to NVENC/NVDEC interconnect as CUDA.
NVDEC offers decoders for H.264, HEVC, MJPEG, MPEG-1/2/4, VP8/VP9, VC-1. Codec support varies by hardware (see the GPU compatibility table).
Note that FFmpeg offers both NVDEC and CUVID hwaccels. They differ in how frames are decoded and forwarded in memory.
The full set of codecs being available only on Pascal hardware, which adds VP9 and 10 bit support. The note about missing ffnvcodec from NVENC applies for NVDEC as well.
Sample decode using CUDA:
ffmpeg -hwaccel cuda -i input output
Sample decode using CUVID:
ffmpeg -c:v h264_cuvid -i input output
FFplay only supports older option -vcodec (not -c:v) and only CUVID.
ffplay -vcodec hevc_cuvid file.mp4
Full hardware transcode with NVDEC and NVENC:
ffmpeg -hwaccel cuda -hwaccel_output_format cuda -i input -c:v h264_nvenc -preset slow output
If ffmpeg was compiled with support for libnpp, it can be used to insert a GPU based scaler into the chain:
ffmpeg -hwaccel_device 0 -hwaccel cuda -i input -vf scale_npp=-1:720 -c:v h264_nvenc -preset slow output.mkv
The -hwaccel_device option can be used to specify the GPU to be used by the hwaccel in ffmpeg.
上面一段话的总结是,我们有两种方式去调用h264的解码,第一种是通过加速器-hwaccel cuda
去调用,第二种是通过-c:v h264_cuvid
,这两种方式都是GPU解码,底层调用的都是ffnvcodec的API,只是调用方式不同而已。
总结一下:
cuvid和nvdec底层调用的解码API都是ffnvcodec中提供的API,两者本质没有上面区别。
在调用区别是:
- cuvid在ffmpeg是一个外部解码器(类似于libx264外部库),可以直接通过
avcodec_find_decoder_by_name(h264_cuvid、libx265等)
直接获取到一个解码器,这个解码器内部使用的是ffnvcodec的API来解码。 - nvdec是一个加速解码器,在调用的过程中先打开一个解码器,比如h264,注意,这个解码器是ffmpeg内部自己写的解码器,然后给这个解码器的上下文
AVCodecContext
指定一个加速硬件,比如cuda
,然后在实际使用过程中,如果发现指定了硬件加速器,那么就进入cuda的解码器中,也就是ffnvcodec的API中,如果没有加速器,进进入ffmpeg自己写的cpu的软解码的逻辑中。
综上所述,cuvid和nvenc是Nvidia的第三方编解码库(你以前是不是觉的nvdec和nvenc是Nvidia的第三方解码器),nvdec是解码的加速器,就是ffmpeg内部自己写了一个h264的解码代码(根据h264标准),在这些代码中内嵌了一个硬解码加速器,比如cuda,如果你指定了使用cuda硬件,那么就会跳入硬解码的逻辑中。
下面详细介绍一下
目前FFmpeg的第三方库支持中有关英伟达的支持有如下几个,注意后面的[autodetect]
表示不指定disable就自动检测:
The following libraries provide various hardware acceleration features:
--disable-cuvid disable Nvidia CUVID support [autodetect]
--disable-ffnvcodec disable dynamically linked Nvidia code [autodetect]
--disable-nvdec disable Nvidia video decoding acceleration (via hwaccel) [autodetect]
--disable-nvenc disable Nvidia video encoding code [autodetect]
##那么这四个有什么联系和区别呢?
下面是configure中硬件加速自动检测的列表,可以看到有我们刚才说的四个NVIDIA模块。
HWACCEL_AUTODETECT_LIBRARY_LIST="
...
cuda
cuvid
...
ffnvcodec
nvdec
nvenc
...
"
AUTODETECT_LIBS="
$EXTERNAL_AUTODETECT_LIBRARY_LIST
$HWACCEL_AUTODETECT_LIBRARY_LIST
$THREADS_LIST
"
下面是自动检测的流程,其实就是检查头文件、库文件是否存在,能否通过编译(一个简单的main函数)
#下面是检测ffnvcodec开关以及自动检测其头文件和库文件是否可以用
#ffnvcodec是Nvidia提供的关于编解码的头文件
if ! disabled ffnvcodec; then
ffnv_hdr_list="ffnvcodec/nvEncodeAPI.h ffnvcodec/dynlink_cuda.h ffnvcodec/dynlink_cuviddec.h ffnvcodec/dynlink_nvcuvid.h"
check_pkg_config ffnvcodec "ffnvcodec >= 9.1.23.1" "$ffnv_hdr_list" "" || \
check_pkg_config ffnvcodec "ffnvcodec >= 9.0.18.3 ffnvcodec < 9.1" "$ffnv_hdr_list" "" || \
check_pkg_config ffnvcodec "ffnvcodec >= 8.2.15.10 ffnvcodec < 8.3" "$ffnv_hdr_list" "" || \
check_pkg_config ffnvcodec "ffnvcodec >= 8.1.24.11 ffnvcodec < 8.2" "$ffnv_hdr_list" ""
fi
#查看编码头文件ffnvcodec/nvEncodeAPI.h和库文件ffnvcodec是否可以通过编译
enabled nvenc &&
test_cc -I$source_path <<EOF || disable nvenc
#include <ffnvcodec/nvEncodeAPI.h>
NV_ENCODE_API_FUNCTION_LIST flist;
void f(void) { struct { const GUID guid; } s[] = { { NV_ENC_PRESET_HQ_GUID } }; }
int main(void) { return 0; }
EOF
#这里同上,检测头文件ffnvcodec/dynlink_cuda.h ffnvcodec/dynlink_cuviddec.h是否存在
if enabled_any nvdec cuvid; then
check_type "ffnvcodec/dynlink_cuda.h ffnvcodec/dynlink_cuviddec.h" "CUVIDAV1PICPARAMS"
fi
在上面的解码模块中有一个命令enabled_any nvdec cuvid
从这里可以看到(它俩使用的是相同的头文件)nvdec和cuvid
最终依赖的是一个底层库。
接下来检测上述检测是否通过
enabled(){
test "${1#!}" = "$1" && op="=" || op="!="
eval test "x\$${1#!}" $op "xyes"
}
requested(){
test "${1#!}" = "$1" && op="=" || op="!="
eval test "x\$${1#!}_requested" $op "xyes"
}
# Check if requested libraries were found.
for lib in $AUTODETECT_LIBS; do
requested $lib && ! enabled $lib && die "ERROR: $lib requested but not found";
done
##FFmpeg源代码分析
下面是cuviddec,c解码器模板内容:
// * Nvidia CUVID decoder
#include "libavutil/hwcontext.h"
#include "compat/cuda/dynlink_loader.h"
#include "avcodec.h"
#include "decode.h"
#include "hwconfig.h"
#include "nvdec.h"
#include "internal.h"
static av_cold int cuvid_decode_init(AVCodecContext *avctx);
//这里是一个宏定义模板
#define DEFINE_CUVID_CODEC(x, X, bsf_name) \
static const AVClass x##_cuvid_class = { \
.class_name = #x "_cuvid", \
.item_name = av_default_item_name, \
.option = options, \
.version = LIBAVUTIL_VERSION_INT, \
}; \
const AVCodec ff_##x##_cuvid_decoder = { \
.name = #x "_cuvid", \
.long_name = NULL_IF_CONFIG_SMALL("Nvidia CUVID " #X " decoder"), \
.type = AVMEDIA_TYPE_VIDEO, \
.id = AV_CODEC_ID_##X, \
.priv_data_size = sizeof(CuvidContext), \
.priv_class = &x##_cuvid_class, \
.init = cuvid_decode_init, \
.close = cuvid_decode_end, \
.receive_frame = cuvid_output_frame, \
.flush = cuvid_flush, \
.bsfs = bsf_name, \
.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \
.caps_internal = FF_CODEC_CAP_SETS_FRAME_PROPS, \
.pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
AV_PIX_FMT_NV12, \
AV_PIX_FMT_P010, \
AV_PIX_FMT_P016, \
AV_PIX_FMT_NONE }, \
.hw_configs = cuvid_hw_configs, \
.wrapper_name = "cuvid", \
};
上面cuvid_decode_init 、uvid_decode_end
这些回调函数内部使用的就是ffnvcodec中的API.
然后再看编码器,这里是Nvidia编码器nvenc.h
#include <ffnvcodec/nvEncodeAPI.h>
#include "compat/cuda/dynlink_loader.h"
int ff_nvenc_encode_init(AVCodecContext *avctx);
int ff_nvenc_encode_close(AVCodecContext *avctx);
int ff_nvenc_receive_packet(AVCodecContext *avctx, AVPacket *pkt);
void ff_nvenc_encode_flush(AVCodecContext *avctx);
extern const enum AVPixelFormat ff_nvenc_pix_fmts[];
extern const AVCodecHWConfigInternal *const ff_nvenc_hw_configs[];
这里是nvenc_h264.c,英伟达关于H264的编码器
static const AVClass h264_nvenc_class = {
.class_name = "h264_nvenc",
.item_name = av_default_item_name,
.option = options,
.version = LIBAVUTIL_VERSION_INT,
};
const AVCodec ff_h264_nvenc_encoder = {
.name = "h264_nvenc",
.long_name = NULL_IF_CONFIG_SMALL("NVIDIA NVENC H.264 encoder"),
.type = AVMEDIA_TYPE_VIDEO,
.id = AV_CODEC_ID_H264,
.init = ff_nvenc_encode_init,
.receive_packet = ff_nvenc_receive_packet,
.close = ff_nvenc_encode_close,
.flush = ff_nvenc_encode_flush,
.priv_data_size = sizeof(NvencContext),
.priv_class = &h264_nvenc_class,
.defaults = defaults,
.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE |
AV_CODEC_CAP_ENCODER_FLUSH | AV_CODEC_CAP_DR1,
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
.pix_fmts = ff_nvenc_pix_fmts,
.wrapper_name = "nvenc",
.hw_configs = ff_nvenc_hw_configs,
};
上面ff_nvenc_receive_packet、ff_nvenc_encode_close
这些回调函数内部使用的就是ffnvcodec中的API.
Nvidia支持的加速编码码还包含:
- nvenc_hevc
上面的两个我们称之为编解码器,是因为构造他它们的结构体是AVCodec
,它们都注册在编解码器中的数组中:
static const FFCodec * const codec_list[] = {
&ff_h264_nvenc_encoder;,
&ff_hevc_cuvid_decoder,
&ff_libx264_encoder,
&ff_amv_encoder,
...
&ff_apng_decoder,
&ff_arbc_decoder,
&ff_argo_decoder,
&ff_asv1_decoder,
&ff_adpcm_ima_ws_decoder,
&ff_adpcm_ms_decoder,
&ff_adpcm_mtaf_decoder,
&ff_adpcm_psx_decoder,
&ff_adpcm_sbpro_2_decoder,
&ff_bintext_decoder,
&ff_xbin_decoder,
&ff_idf_decoder,
&ff_av1_decoder,
NULL };
那下面这个就是加速器,它是由AVHWAccel
构成的
这里是nvdec.h,里面是NVIDIA解码sdk的封装
// * HW decode acceleration through NVDEC
typedef struct NVDECContext ;
typedef struct NVDECFrame;
#include "compat/cuda/dynlink_loader.h"
int ff_nvdec_decode_init(AVCodecContext *avctx);
int ff_nvdec_decode_uninit(AVCodecContext *avctx);
int ff_nvdec_start_frame(AVCodecContext *avctx, AVFrame *frame);
int ff_nvdec_start_frame_sep_ref(AVCodecContext *avctx, AVFrame *frame, int has_sep_ref);
int ff_nvdec_end_frame(AVCodecContext *avctx);
int ff_nvdec_simple_end_frame(AVCodecContext *avctx);
int ff_nvdec_simple_decode_slice(AVCodecContext *avctx, const uint8_t *buffer,
uint32_t size);
int ff_nvdec_frame_params(AVCodecContext *avctx,
AVBufferRef *hw_frames_ctx,
int dpb_size,
int supports_444);
int ff_nvdec_get_ref_idx(AVFrame *frame);
typedef struct H264Context {
const AVClass *class;
AVCodecContext *avctx;
...
}
typedef struct AVCodecContext {
/**
* Hardware accelerator in use
* - encoding: unused.
* - decoding: Set by libavcodec
*/
const struct AVHWAccel *hwaccel;
...
}
const AVHWAccel ff_h264_nvdec_hwaccel = {
.name = "h264_nvdec",
.type = AVMEDIA_TYPE_VIDEO,
.id = AV_CODEC_ID_H264,
.pix_fmt = AV_PIX_FMT_CUDA,
.start_frame = nvdec_h264_start_frame,
.end_frame = ff_nvdec_end_frame,
.decode_slice = nvdec_h264_decode_slice,
.frame_params = nvdec_h264_frame_params,
.init = ff_nvdec_decode_init,
.uninit = ff_nvdec_decode_uninit,
.priv_data_size = sizeof(NVDECContext),
};
上面nvdec_h264_start_frame、nvdec_h264_frame_params
这些回调函数内部使用的就是ffnvcodec中的API.
Nvidia支持的加速解码还包含:
- nvdec_av1
- nvdec_h264
- nvdec_hevc
- nvdec_mjpeg
- nvdec_mpeg4
- nvdev_mpeg12
- nvdec_vc1
- nvdec_vp8
- nvdec_vp9
然后来看看在ffmpeg内部解码器中是怎么调用加速器的,下面是编解码器的上下文,
在struct AVCodecContext中有这么一个成员变量
/**
* Hardware accelerator in use
* - encoding: unused.
* - decoding: Set by libavcodec
*/
const struct AVHWAccel *hwaccel;
AVBufferRef *hw_device_ctx;
如果你在打开ffmpeg提供的解码器时,指定了加速器cuda
,那么就会在下面调用中进入硬件加速解码
这些函数实际在h264.c中调用:
static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
...
if (h->nb_slice_ctx_queued == max_slice_ctx) {
if (h->avctx->hwaccel) {
ret = avctx->hwaccel->decode_slice(avctx, nal->raw_data, nal->raw_size);
h->nb_slice_ctx_queued = 0;
} ...
}
...
}
static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
...
case H264_NAL_SPS: {
GetBitContext tmp_gb = nal->gb;
if (avctx->hwaccel && avctx->hwaccel->decode_params) {
ret = avctx->hwaccel->decode_params(avctx,
nal->type,
nal->raw_data,
nal->raw_size);
if (ret < 0)
goto end;
}
...
}
参考:http://trac.ffmpeg.org/wiki/HWAccelIntro