ffplay自定义系列
第一章 自定义播放器接口
第二章 倍速播放
第三章 dxva2硬解渲染(本章)
第四章 提供C#接口
第五章 制作wpf播放器
前言
ffplay本身是支持设置解码器的,比如设置h264_qsv、hevc_cuvid等就可以使用硬解功能,实际测试确实是有效的,cpu使用率也是有所下降。但是这并不是最佳的方案,在Windows上更好的方案是使用dxva2解码然后使用d3d9渲染,这种方法不仅极大降低cpu使用率、gpu使用率也有所下降、同时解码速度也比较快。但是ffplay本身是不支持使用dxva2的,所以这个时候就要我们进行拓展了。
一、ffmpeg使用dxva2
dxva2解码渲染包含2个步骤:解码和渲染。之所以是很优的方案是因为,解码和渲染都是显卡中处理,解码的数据不需要取出到内存,直接在显存转换然后渲染。ffmpeg有包含dxva2的示例代码,但没有显卡渲染功能,性能还是和设置解码器没有区别。我们需要参考的是项目名称叫Win32Project1的ffmpeg_dxva2解码渲染的博文(暂时没找到当时那篇,就不贴其他类似链接了)
头文件如下:
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef FFMPEG_DXVA2_H
#define FFMPEG_DXVA2_H
//#include "windows.h"
extern "C"{
#include "libavcodec/avcodec.h"
#include "libavutil/pixfmt.h"
#include "libavutil/rational.h"
}
enum HWAccelID {
HWACCEL_NONE = 0,
HWACCEL_AUTO,
HWACCEL_VDPAU,
HWACCEL_DXVA2,
HWACCEL_VDA,
HWACCEL_VIDEOTOOLBOX,
HWACCEL_QSV,
};
typedef struct AVStream AVStream;
typedef struct AVCodecContext AVCodecContext;
typedef struct AVCodec AVCodec;
typedef struct AVFrame AVFrame;
typedef struct AVDictionary AVDictionary;
typedef struct InputStream {
int file_index;
AVStream *st;
int discard; /* true if stream data should be discarded */
int user_set_discard;
int decoding_needed; /* non zero if the packets must be decoded in 'raw_fifo', see DECODING_FOR_* */
#define DECODING_FOR_OST 1
#define DECODING_FOR_FILTER 2
AVCodecContext *dec_ctx;
AVCodec *dec;
AVFrame *decoded_frame;
AVFrame *filter_frame; /* a ref of decoded_frame, to be sent to filters */
int64_t start; /* time when read started */
/* predicted dts of the next packet read for this stream or (when there are
* several frames in a packet) of the next frame in current packet (in AV_TIME_BASE units) */
int64_t next_dts;
int64_t dts; ///< dts of the last packet read for this stream (in AV_TIME_BASE units)
int64_t next_pts; ///< synthetic pts for the next decode frame (in AV_TIME_BASE units)
int64_t pts; ///< current pts of the decoded frame (in AV_TIME_BASE units)
int wrap_correction_done;
int64_t filter_in_rescale_delta_last;
int64_t min_pts; /* pts with the smallest value in a current stream */
int64_t max_pts; /* pts with the higher value in a current stream */
int64_t nb_samples; /* number of samples in the last decoded audio frame before looping */
double ts_scale;
int saw_first_ts;
int showed_multi_packet_warning;
AVDictionary *decoder_opts;
AVRational framerate; /* framerate forced with -r */
int top_field_first;
int guess_layout_max;
int autorotate;
int resample_height;
int resample_width;
int resample_pix_fmt;
int resample_sample_fmt;
int resample_sample_rate;
int resample_channels;
uint64_t resample_channel_layout;
int fix_sub_duration;
struct { /* previous decoded subtitle and related variables */
int got_output;
int ret;
AVSubtitle subtitle;
} prev_sub;
struct sub2video {
int64_t last_pts;
int64_t end_pts;
AVFrame *frame;
int w, h;
} sub2video;
int dr1;
/* decoded data from this stream goes into all those filters
* currently video and audio only */
//InputFilter **filters;
//int nb_filters;
//int reinit_filters;
/* hwaccel options */
enum HWAccelID hwaccel_id;
char *hwaccel_device;
/* hwaccel context */
enum HWAccelID active_hwaccel_id;
void *hwaccel_ctx;
void(*hwaccel_uninit)(AVCodecContext *s);
int(*hwaccel_get_buffer)(AVCodecContext *s, AVFrame *frame, int flags);
int(*hwaccel_retrieve_data)(AVCodecContext *s, AVFrame *frame);
enum AVPixelFormat hwaccel_pix_fmt;
enum AVPixelFormat hwaccel_retrieved_pix_fmt;
/* stats */
// combined size of all the packets read
uint64_t data_size;
/* number of packets successfully read for this stream */
uint64_t nb_packets;
// number of frames/samples retrieved from the decoder
uint64_t frames_decoded;
uint64_t samples_decoded;
} InputStream;
int dxva2_init(AVCodecContext *s, HWND hwnd);
int dxva2_retrieve_data_call(AVCodecContext *s, AVFrame *frame);
#endif /* FFMPEG_DXVA2_H */
二、解码
修改ffplay解码功能需要在stream_component_open中进行:
1、添加字段
引用Win32Project1的ffmpeg_dxva2.h头文件
#include "ffmpeg_dxva2.h"
定义一个枚举说明硬件加速类型
/// <summary>
/// 硬件加速选项
/// </summary>
typedef enum
{
AC_HARDWAREACCELERATETYPE_DISABLED,
AC_HARDWAREACCELERATETYPE_AUTO,
//使用dxva解码,仅在Windows有效,成功启动:started、display事件的pixformat为AC_PIXELFORMAT_DXVA2_VLD,render事件的data[3]为d3d9的surface对象。
AC_HARDWAREACCELERATETYPE_DXVA
}ACHardwareAccelerateType;
在VideoState中添加如下字段硬件加速类型,以及Win32Project1的InputStream对象
ACHardwareAccelerateType hwaccel;
InputStream* ist;
添加相应接口
//设置硬件加速类型
void ac_play_setHardwareAccelerateType(ACPlay play, ACHardwareAccelerateType value) {
VideoState* s = (VideoState*)play;
s->hwaccel = value;
}
2、初始化
在stream_component_open的avcodec_open2上一行,加入判断hwaccel初始化dxva逻辑。dxva2_init就是Win32Project1中的方法,此方法一定要有hwnd,这个hwnd必须是渲染窗口的。如果不想设置hwnd达到相同性能则需要另外做修改,本文就不深入讨论了。
if (is->hwaccel == AC_HARDWAREACCELERATETYPE_AUTO || is->hwaccel == AC_HARDWAREACCELERATETYPE_DXVA)
{
switch (codec->id)
//dxva2支持的格式
{
case AV_CODEC_ID_MPEG2VIDEO:
case AV_CODEC_ID_H264:
case AV_CODEC_ID_VC1:
case AV_CODEC_ID_WMV3:
case AV_CODEC_ID_HEVC:
case AV_CODEC_ID_VP9:
//while (1)
{
avctx->thread_count = 1; // Multithreading is apparently not compatible with hardware decoding
is->ist = av_mallocz(sizeof(InputStream));
is->ist->hwaccel_id = HWACCEL_AUTO;
is->ist->active_hwaccel_id = HWACCEL_AUTO;
is->ist->hwaccel_device = "dxva2";
is->ist->dec = codec;
is->ist->dec_ctx = avctx;
avctx->opaque = is->ist;
if (dxva2_init(avctx, is->hwnd) == 0)
{
avctx->get_buffer2 = is->ist->hwaccel_get_buffer;
avctx->get_format = GetHwFormat;
avctx->thread_safe_callbacks = 1;
avctx->pix_fmt = AV_PIX_FMT_DXVA2_VLD;
}
else
{
av_free(is->ist);
is->ist = NULL;
}
}
break;
}
}
将解码的avframe的格式设置为AV_PIX_FMT_DXVA2_VLD,上述代码中的GetHwFormat具体如下:
static enum AVPixelFormat GetHwFormat(AVCodecContext* s, const enum AVPixelFormat* pix_fmts)
{
InputStream* ist = (InputStream*)s->opaque;
ist->active_hwaccel_id = HWACCEL_DXVA2;
ist->hwaccel_pix_fmt = AV_PIX_FMT_DXVA2_VLD;
return ist->hwaccel_pix_fmt;
}
3、反初始化
在stream_close中加入如下反初始化代码,其中dxva2_uninit2是Win32Project1中的dxva2_uninit将其参数类型改为了InputStream*。
if (is->ist)
{
dxva2_uninit2(is->ist);
av_free(is->ist);
is->ist = NULL;
}
三、渲染
有了上述的解码设置之后,解码出来的数据将是d3d9的surface,这个对象在avframe.data[3]中,我们需要对它进行处理,将其显示到界面上。幸运的是Win32Project1包含了这部分功能,我们只需要调用方法就可以了。
在video_display的SDL_RenderClear上一行加入如下代码。
Frame* vp;
vp = frame_queue_peek_last(&is->pictq);
if (vp->format == AV_PIX_FMT_DXVA2_VLD)
{
dxva2_retrieve_data_call(is->viddec.avctx, vp->frame);
return;
}
总结
以上就是今天要讲的内容,通过上述方法实现的dxva2硬解渲染性能非常好,直观的感受就是渲染4k视频cpu使用率不超过1%,当然此时gpu使用率可能是50%左右的,但是设置硬解编码器的效果就是10%的cpu使用率,gpu使用率也是50%左右。原因在Win32Project1_ffmpeg_dxva2的博文中有说明,这里就不重复了。总的来说,ffplay支持xva2硬解渲染后就有更广泛和实际的应用了,比如直接解决了实时流多路渲染性能不足的问题等。