本文首先对HM中帧间预测的基本流程作简要介绍,接着对代码中关键变量的用途作出说明,最后以源代码+注释的形式进行具体分析。
备注:这位大神的博客对楼主帮助很大,解决了我的不少疑惑,最后才能顺利写下这篇博客。大神博客地址:(https://blog.csdn.net/NB_vol_1/article/details/55272434)。对帧间预测基本概念还不是很熟悉的同学,可以先看看大神的博客。
HM中帧间预测的基本流程
HEVC中帧间预测支持多种划分模式:Merge(Skip是特殊的Merge),2Nx2N,NxN, 2NxN, Nx2N以及AMP:2NxnU,2NxnD, nLx2N,nRx2N。HM中便是按照固定的顺序对上述帧间划分模式进行评估,最后选出最优的帧间模式(最后还会对帧内模式、PCM模式进行评估)。
xCompressCU中的关键变量
HM中进行帧间预测的入口函数是xCompressCU,当然函数里面不仅仅是做了帧间预测这一件事(还有帧内预测、PCM等)。以下列出函数体中的关键变量及其用途。
bool doNotBlockPu:默认为true。值为false时,跳过对后续帧间划分模式的评估。只有使能cbf快速模式(默认关闭)时,doNotBlockPu的值才会被设为false。注:cbf快速模式–cbf值为0时,跳过对当前深度后续剩余划分模式的评估,cbf值为0,表明残差值为0.
bool earlyDetectionSkipMode:默认为false。值为true时,跳过对剩余划分模式以及四叉树递归划分的评估操作。只有使能EarlySkipDetection模式(默认关闭),earlyDetectionSkipMode才会被设为true。
AMP相关变量:
bool bTestAMP_Hor:默认为false。值为true时,对水平AMP模式进行评估(2NxnU,2NxnD)
bool bTestAMP_Ver:默认为false。值为true时,对垂直AMP模式进行评估(nLx2N,nRx2N)
bool bTestMergeAMP_Hor:默认为false。值为true时,对水平AMP模式进行Merge评估,即选出最优的Merge候选项
bool bTestMergeAMP_Ver:默认为false。值为true时,对垂直AMP模式进行Merge评估。
rpcTempCU:当前评估的CU模式
rpcBestCU:当前最优CU模式
具体代码分析
注意:此代码段是从HM16.20源代码TEncCu.cpp中完整拷贝过来的,里面加入了个人的阅读注释。这里对于与帧间预测无关的代码未做解释说明。楼主对代码的注释可能会存在理解偏差,望读者可以指出来,一起交流探讨~毕竟楼主对于HM源代码研究不多,不少地方也存在疑惑。希望有大佬同仁一起学习交流,共同进步!
// ====================================================================================================================
// Protected member functions
// ====================================================================================================================
/** Compress a CU block recursively with enabling sub-CTU-level delta QP
* - for loop of QP value to compress the current CU with all possible QP
*/
#if AMP_ENC_SPEEDUP
Void TEncCu::xCompressCU( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, const UInt uiDepth DEBUG_STRING_FN_DECLARE(sDebug_), PartSize eParentPartSize )
#else
Void TEncCu::xCompressCU( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, const UInt uiDepth )
#endif
{
TComPic* pcPic = rpcBestCU->getPic();
DEBUG_STRING_NEW(sDebug)
const TComPPS &pps=*(rpcTempCU->getSlice()->getPPS());
const TComSPS &sps=*(rpcTempCU->getSlice()->getSPS());
// These are only used if getFastDeltaQp() is true
const UInt fastDeltaQPCuMaxSize = Clip3(sps.getMaxCUHeight()>>sps.getLog2DiffMaxMinCodingBlockSize(), sps.getMaxCUHeight(), 32u);
// get Original YUV data from picture
// *** 获取rpcBestCU的数据并存放到 m_ppcOrigYuv[uiDepth]地址处
m_ppcOrigYuv[uiDepth]->copyFromPicYuv( pcPic->getPicYuvOrg(), rpcBestCU->getCtuRsAddr(), rpcBestCU->getZorderIdxInCtu() ); // *** m_ppcOrigYuv 存放 YUV data
// variable for Cbf fast mode PU decision
Bool doNotBlockPu = true; // *** false时跳过后续模式的检测
Bool earlyDetectionSkipMode = false; // *** true时跳过后续模式及后续深度的检测
const UInt uiLPelX = rpcBestCU->getCUPelX(); // *** 上下左右边界的位置
const UInt uiRPelX = uiLPelX + rpcBestCU->getWidth(0) - 1;
const UInt uiTPelY = rpcBestCU->getCUPelY();
const UInt uiBPelY = uiTPelY + rpcBestCU->getHeight(0) - 1;
const UInt uiWidth = rpcBestCU->getWidth(0);
Int iBaseQP = xComputeQP( rpcBestCU, uiDepth );
Int iMinQP;
Int iMaxQP;
Bool isAddLowestQP = false;
const UInt numberValidComponents = rpcBestCU->getPic()->getNumberValidComponents();
if( uiDepth <= pps.getMaxCuDQPDepth() )
{
Int idQP = m_pcEncCfg->getMaxDeltaQP();
iMinQP = Clip3( -sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP-idQP );
iMaxQP = Clip3( -sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP+idQP );
}
else
{
iMinQP = rpcTempCU->getQP(0);
iMaxQP = rpcTempCU->getQP(0);
}
if ( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() ) // 若使能,则QP固定?
{
if ( uiDepth <= pps.getMaxCuDQPDepth() )
{
// keep using the same m_QP_LUMA_OFFSET in the same CTU
m_lumaQPOffset = calculateLumaDQP(rpcTempCU, 0, m_ppcOrigYuv[uiDepth]);
}
iMinQP = Clip3(-sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP - m_lumaQPOffset);
iMaxQP = iMinQP; // force encode choose the modified QO
}
if ( m_pcEncCfg->getUseRateCtrl() )
{
iMinQP = m_pcRateCtrl->getRCQP();
iMaxQP = m_pcRateCtrl->getRCQP();
}
// transquant-bypass (TQB) processing loop variable initialisation ---
const Int lowestQP = iMinQP; // *** For TQB, use this QP which is the lowest non TQB QP tested (rather than QP'=0) - that way delta QPs are smaller, *** and TQB can be tested at all CU levels.
if ( (pps.getTransquantBypassEnabledFlag()) )
{
isAddLowestQP = true; // mark that the first iteration is to cost TQB mode.
iMinQP = iMinQP - 1; // increase loop variable range by 1, to allow testing of TQB mode along with other QPs
if ( m_pcEncCfg->getCUTransquantBypassFlagForceValue() ) // *** ?
{
iMaxQP = iMinQP;
}
}
TComSlice * pcSlice = rpcTempCU->getPic()->getSlice(rpcTempCU->getPic()->getCurrSliceIdx());
const Bool bBoundary = !( uiRPelX < sps.getPicWidthInLumaSamples() && uiBPelY < sps.getPicHeightInLumaSamples() ); // true 表明是边界
if ( !bBoundary ) // 非边界情况
{
for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++) // ***
{
const Bool bIsLosslessMode = isAddLowestQP && (iQP == iMinQP); // 对应 TQB模式即无失真模式?
if (bIsLosslessMode)
{
iQP = lowestQP;
}
if ( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() && uiDepth <= pps.getMaxCuDQPDepth() )
{
getSliceEncoder()->updateLambda(pcSlice, iQP); // *** Lambda值 随 QP值变化
}
m_cuChromaQpOffsetIdxPlus1 = 0;
if (pcSlice->getUseChromaQpAdj())
{
/* Pre-estimation of chroma QP based on input block activity may be performed
* here, using for example m_ppcOrigYuv[uiDepth] */
/* To exercise the current code, the index used for adjustment is based on
* block position
*/
Int lgMinCuSize = sps.getLog2MinCodingBlockSize() +
std::max<Int>(0, sps.getLog2DiffMaxMinCodingBlockSize()-Int(pps.getPpsRangeExtension().getDiffCuChromaQpOffsetDepth()));
m_cuChromaQpOffsetIdxPlus1 = ((uiLPelX >> lgMinCuSize) + (uiTPelY >> lgMinCuSize)) % (pps.getPpsRangeExtension().getChromaQpOffsetListLen() + 1);
}
rpcTempCU-