dxva加速AVC硬件解码
注:本人为新手小白,网上关于dxva解码的资料实在是太少了,只能靠自己慢慢摸索,因此可能有误,希望有大佬帮忙解答一下,本贴仅用于个人成长记录
0、dxva参考资料
- 官网教程
https://learn.microsoft.com/zh-cn/windows/win32/medfound/about-dxva-2-0?redirectedfrom=MSDN - ffmpeg源码
1、dxva解码步骤
- 创建D3D设备和设备管理器
- 打开设备句柄获取解码服务
- 使用解码服务创建surface池
- 使用对应的surface和guid创建解码器
- 测试device是否正常
- LockDevice锁定设备
- BeginFrame开始解码
- 获取各个buffer的内存地址并填充数据
- Execute执行解码
- EndFrame结束解码
- UnlockDevice解除锁定
2、解码步骤解析以及参数设置
dxva加速解码需要额外对avc码流的sps,pps和slice进行解析,提取关键信息后才能填入数据进行加速解码
typedef struct
{
IDirect3D9Ex* d3d9ex; //d3d接口
IDirect3DDeviceManager9* d3dmanager; //设备管理器
IDirect3DDevice9Ex* d3ddevice; //d3d设备
IDirectXVideoDecoderService* d3ddecode_service;// 解码服务
HANDLE d3ddevice_handle; // 设备句柄
}Dxva2ConfigContext;
首先需要创建设备管理器,并且获取解码服务
DxvaResult Dxva2DecodeManager::dxva2Create9()
{
HRESULT hr = S_OK;
UINT pResetToken = 0;
if (m_isinit)
{
return DXVA_OK;
}
hr = Direct3DCreate9Ex(D3D_SDK_VERSION, &m_dxva_conf_ctx.d3d9ex);
if (FAILED(hr)) return DXVA_FAIL;
D3DPRESENT_PARAMETERS d3dpp;
ZeroMemory(&d3dpp, sizeof(d3dpp));
d3dpp.Windowed = TRUE;
d3dpp.BackBufferWidth = 640;
d3dpp.BackBufferHeight = 480;
d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
d3dpp.BackBufferCount = 0;
d3dpp.Flags = D3DPRESENTFLAG_VIDEO;
m_dxva_conf_ctx.d3d9ex->CreateDeviceEx(
D3DADAPTER_DEFAULT,
D3DDEVTYPE_HAL,
nullptr,
D3DCREATE_FLAGS,
&d3dpp,
NULL,
&m_dxva_conf_ctx.d3ddevice
);
if (FAILED(hr))
{
m_dxva_conf_ctx.d3d9ex->Release();
return DXVA_FAIL;
}
hr = DXVA2CreateDirect3DDeviceManager9(&pResetToken, &m_dxva_conf_ctx.d3dmanager);
if (FAILED(hr))
{
return DXVA_FAIL;
}
hr = m_dxva_conf_ctx.d3dmanager->ResetDevice(m_dxva_conf_ctx.d3ddevice, pResetToken);
if (FAILED(hr))
{
return DXVA_FAIL;
}
hr = m_dxva_conf_ctx.d3dmanager->OpenDeviceHandle(&m_dxva_conf_ctx.d3ddevice_handle);
if (FAILED(hr))
{
return DXVA_FAIL;
}
hr = m_dxva_conf_ctx.d3dmanager->GetVideoService(m_dxva_conf_ctx.d3ddevice_handle, IID_PPV_ARGS(&m_dxva_conf_ctx.d3ddecode_service));
if (FAILED(hr))
{
return DXVA_FAIL;
}
hr = m_dxva_conf_ctx.d3dmanager->CloseDeviceHandle(m_dxva_conf_ctx.d3ddevice_handle);
if (FAILED(hr))
{
return DXVA_FAIL;
}
m_dxva_conf_ctx.d3ddevice_handle = NULL;
m_isinit = true;
return DXVA_OK;
}
然后创建surface池用来存储解码帧,解码出来的帧数据以IDirect3DSurface9的形式存储在gpu中
DxvaResult Dxva2DecodeManager::dxva2CreateSurfacePool(Dxva2SurfaceParam* desc, SurfaceType_t*& surface_pool, UINT count)
{
HRESULT hr = S_OK;
surface_pool = new SurfaceType_t[count];
hr = m_dxva_conf_ctx.d3ddecode_service->CreateSurface(
desc->Width,
desc->Height,
count - 1,
desc->Format,
D3DPOOL_DEFAULT,
0,
DXVA2_VideoDecoderRenderTarget,
surface_pool,
NULL
);
if (FAILED(hr))
{
return DXVA_FAIL;
}
return DXVA_OK;
}
接着需要创建解码器,其中guid采用的DXVA2_ModeH264_E,反正ffmpeg上好像也用这个
DxvaResult Dxva2DecodeManager::dxva2CreateDecoder(Dxva2SurfaceParam* desc, SurfaceType_t* surface_pool, UINT count, DXVADecoder_t& decoder)
{
HRESULT hr = S_OK;
DXVA2_VideoDesc decode_desc;
decode_desc.SampleWidth = desc->Width;
decode_desc.SampleHeight = desc->Height;
decode_desc.Format = desc->Format;
UINT config_count = 0;
DXVA2_ConfigPictureDecode* configs = NULL;
hr = m_dxva_conf_ctx.d3ddecode_service->GetDecoderConfigurations(m_decoder_guid, &decode_desc, nullptr, &config_count, &configs);
if (FAILED(hr))
{
return DXVA_FAIL;
}
UINT index = 0;
for (; index < config_count; index++)
{
if (configs[index].ConfigBitstreamRaw == 2) // surport short
{
hr = m_dxva_conf_ctx.d3ddecode_service->CreateVideoDecoder(
m_decoder_guid,
&decode_desc,
&configs[index],
surface_pool,
count,
&decoder
);
if (FAILED(hr))
{
return DXVA_FAIL;
}
break;
}
}
if (index >= config_count)
{
return DXVA_FAIL;
}
else
{
return DXVA_OK;
}
}
创建完解码器之后就需要开始解码了,解码需要用到自己解析的avc码流的sps,pps和slice数据信息,这个应该需要自己写,下面是填写dxvabuffer需要用到的数据,把这几个结构体的数据填好基本上就大功告成
typedef struct DxvaDecodeParam
{
DXVA2_DecodeExecuteParams executeParams;
DXVA2_DecodeBufferDesc bufferDesc[4];
DXVA_PicParams_H264 h264PictureParams;
DXVA_Qmatrix_H264 h264QuantaMatrix;
DXVA_Slice_H264_Short h264SliceShort[MAX_SUB_SLICE];
DxvaDecodeParam()
{
memset(&executeParams, 0, sizeof(DXVA2_DecodeExecuteParams));
memset(bufferDesc, 0, 4 * sizeof(DXVA2_DecodeBufferDesc));
memset(&h264PictureParams, 0, sizeof(DXVA_PicParams_H264));
memset(h264SliceShort, 0, MAX_SUB_SLICE * sizeof(DXVA_Slice_H264_Short));
memset(&h264QuantaMatrix, 16, sizeof(DXVA_Qmatrix_H264));
bufferDesc[0].CompressedBufferType = DXVA2_PictureParametersBufferType;
bufferDesc[0].DataSize = sizeof(DXVA_PicParams_H264);
bufferDesc[1].CompressedBufferType = DXVA2_InverseQuantizationMatrixBufferType;
bufferDesc[1].DataSize = sizeof(DXVA_Qmatrix_H264);
bufferDesc[2].CompressedBufferType = DXVA2_BitStreamDateBufferType;
bufferDesc[3].CompressedBufferType = DXVA2_SliceControlBufferType;
executeParams.NumCompBuffers = 4;
executeParams.pCompressedBuffers = bufferDesc;
}
}DxvaDecodeParam;
解析sps和pps以及slice的部分有点多,而且我还没搞清楚,暂时不弄了,下面是解码一帧的数据的代码
DxvaResult Dxva2Decoder::decodeFrame(SPS_DATA* sps, PPS_DATA* pps, SLICE_DATA* slice, uint8_t** nalu_data, uint32_t* nalu_len, uint32_t* sf_id)
{
HRESULT hr = S_OK;
IDirect3DDevice9* pDevice = NULL;
const unsigned char startcode[3] = { 0,0,1 };
uint32_t surface_id;
getFreeSurface(&surface_id);
*sf_id = surface_id;
m_used_count[surface_id].decoder_used = TRUE;
m_used_count[surface_id].render_used = TRUE;
Dxva2DecodeManager::instance().dxva2OpenDeviceHandle();
Dxva2DecodeManager::instance().dxva2TestDevice();
Dxva2DecodeManager::instance().dxva2LockDevice();
do {
hr = m_decoder->BeginFrame(m_surfaces[surface_id], NULL);
Sleep(1);
} while (hr == E_PENDING);
FRAME_INFO info;
info.sps = sps;
info.pps = pps;
info.slice = slice;
info.btNalRefIdc = nalu_data[0][0] >> 5;
info.NalUnitType = (NAL_UNIT_TYPE)(nalu_data[0][0] & 0x1f);
transFrameDataToDecodeParam(surface_id, sps, pps, slice, m_decode_param);
handlePOC(surface_id, info);
void* pBuffer = NULL;
UINT uiSize = 0;
/*----------------------------------- Picture ---------------------------------------*/
hr = DXVA2_GET_BUFFER(m_decoder, DXVA2_PictureParametersBufferType, pBuffer, uiSize);
if (FAILED(hr) || sizeof(DXVA_PicParams_H264) > uiSize)
{
return DXVA_FAIL;
}
memcpy(pBuffer, &m_decode_param.h264PictureParams, sizeof(DXVA_PicParams_H264));
if (FAILED(DXVA2_FREE_BUFFER(m_decoder, DXVA2_PictureParametersBufferType)))
{
return DXVA_FAIL;
}
/*----------------------------------------------------------------------------------------------*/
/*----------------------------------- QuantaMatrix ---------------------------------------*/
hr = DXVA2_GET_BUFFER(m_decoder, DXVA2_InverseQuantizationMatrixBufferType, pBuffer, uiSize);
if (FAILED(hr) || sizeof(DXVA_Qmatrix_H264) > uiSize)
{
return DXVA_FAIL;
}
memcpy(pBuffer, &m_decode_param.h264QuantaMatrix, sizeof(DXVA_Qmatrix_H264));
if (FAILED(DXVA2_FREE_BUFFER(m_decoder, DXVA2_InverseQuantizationMatrixBufferType)))
{
return DXVA_FAIL;
}
/*----------------------------------------------------------------------------------------------*/
/*----------------------------------- BitStream ------------------------------------------*/
hr = DXVA2_GET_BUFFER(m_decoder, DXVA2_BitStreamDateBufferType, pBuffer, uiSize);
UINT data_count = 0;
for (UINT i = 0; i < nalu_len[0]; i++)
{
if (uiSize < data_count + nalu_len[i + 1] + sizeof(startcode))
{
return DXVA_FAIL;
}
m_decode_param.h264SliceShort[i].SliceBytesInBuffer = nalu_len[i + 1] + sizeof(startcode);
m_decode_param.h264SliceShort[i].BSNALunitDataLocation = data_count;
m_decode_param.h264SliceShort[i].wBadSliceChopping = 0;
memcpy((unsigned char*)pBuffer, startcode, sizeof(startcode));
memcpy((unsigned char*)pBuffer + sizeof(startcode), nalu_data[i], nalu_len[i + 1]);
pBuffer = (unsigned char*)pBuffer + sizeof(startcode) + nalu_len[i + 1];
data_count += sizeof(startcode) + nalu_len[i + 1];
}
UINT pd = MIN(128 - (data_count & 127), uiSize - data_count);
memset(pBuffer, 0, pd);
if (FAILED(DXVA2_FREE_BUFFER(m_decoder, DXVA2_BitStreamDateBufferType)))
{
return DXVA_FAIL;
}
/*----------------------------------------------------------------------------------------------*/
/*----------------------------------- Slices ---------------------------------------------*/
int slice_count = nalu_len[0];
hr = DXVA2_GET_BUFFER(m_decoder, DXVA2_SliceControlBufferType, pBuffer, uiSize);
if (slice_count * sizeof(DXVA_Slice_H264_Short) > uiSize)
{
return DXVA_FAIL;
}
memcpy(pBuffer, &m_decode_param.h264SliceShort, slice_count * sizeof(DXVA_Slice_H264_Short));
if (FAILED(DXVA2_FREE_BUFFER(m_decoder, DXVA2_SliceControlBufferType)))
{
return DXVA_FAIL;
}
/*----------------------------------------------------------------------------------------------*/
m_decode_param.bufferDesc[2].DataSize = data_count + pd;
m_decode_param.bufferDesc[2].NumMBsInBuffer = (sps->pic_width_in_mbs_minus1 + 1) * (sps->pic_height_in_map_units_minus1 + 1);
m_decode_param.bufferDesc[3].DataSize = slice_count * sizeof(DXVA_Slice_H264_Short);
m_decode_param.bufferDesc[3].NumMBsInBuffer = m_decode_param.bufferDesc[2].NumMBsInBuffer;
if (FAILED(DXVA2_EXCUTE(m_decoder, m_decode_param.executeParams)))
{
return DXVA_FAIL;
}
if (FAILED(DXVA2_END_FRAME(m_decoder)))
{
return DXVA_FAIL;
}
m_decoded.push(surface_id);
return Dxva2DecodeManager::instance().dxva2UnlockDevice();
}