在HEVC 参考代码中,一个CTU块通过xcompressCU()函数进行CU递归得到最优的CU深度。
递归的过程可如下图(from:Fast CU Splitting and Pruning for Suboptimal CU Partitioning in HEVC Intra Coding)所示。图中每一个方框表示一个CU块,方框内的数字表示xcompressCU()函数的执行顺序。显而易见,如果能在做xcompressCU()函数之前,将CU的递归深度确定下,显然可以减小HEVC编码器的复杂度。
针对帧内编码器,已经有很多文献提出了提前确定CU递归深度的方法。这里介绍了"Fast CU Size Decision and Mode Decision Algorithm for HEVC Intra Coding"中Section II.A部分的具体实现。在这篇文献中,周边块的CTU depth size用来给当前块深度进行预测。具体的细节可以去查看该文献。
Void TEncCu::compressCU( TComDataCU*& rpcCU )
{
// initialize CU data
m_ppcBestCU[0]->initCU( rpcCU->getPic(), rpcCU->getAddr() );
m_ppcTempCU[0]->initCU( rpcCU->getPic(), rpcCU->getAddr() );
memset( m_preAnalyzeDepth, 0, rpcCU->getTotalNumPart() );
memset( m_preAnaDepthDetermined, 0, rpcCU->getTotalNumPart() );
memset( m_preAnaDepthRange , 0, rpcCU->getTotalNumPart() );
// Neighboring CTUs.
TComDataCU* t_pcCULeft = rpcCU->getCULeft();
TComDataCU* t_pcCUAbove = rpcCU->getCUAbove();
TComDataCU* t_pcCUAboveLeft = rpcCU->getCUAboveLeft();
TComDataCU* t_pcCUAboveRight= rpcCU->getCUAboveRight();
UInt DepthLeft = 0; // Max Depth of LeftCTU.
UInt DepthAbove = 0; // Max Depth of AboveCTU.
UInt DepthAboveLeft = 0;
UInt DepthAboveRight = 0;
UInt picWidth = rpcCU->getSlice()->getSPS()->getPicWidthInLumaSamples();
UInt picHeight = rpcCU->getSlice()->getSPS()->getPicHeightInLumaSamples();
UInt uiLPelX = rpcCU->getCUPelX();
UInt uiRPelX = uiLPelX + rpcCU->getWidth(0) - 1;
UInt uiTPelY = rpcCU->getCUPelY();
UInt uiBPelY = uiTPelY + rpcCU->getHeight(0) - 1;
UChar tDepth;
m_insidePicture= (uiRPelX<picWidth) && (uiBPelY<picHeight);
// Considering Border CTUs.
if ( t_pcCULeft!=NULL ) //获取左边CTU块最大的depth信息
{
for ( Int i=0; i<256; i++ )
{
tDepth = t_pcCULeft->getDepth(i);
if ( tDepth>DepthLeft )
{
DepthLeft = (UInt)tDepth;
}
}
}
else
DepthLeft = 2; //如果是NULL,直接赋值2(16X16)
if ( t_pcCUAbove!=NULL )
{
for ( Int i=0; i<256; i++ )
{
tDepth = t_pcCUAbove->getDepth(i);
if ( tDepth>DepthAbove )
{
DepthAbove = (UInt)tDepth;
}
}
}
else
DepthAbove = 2;
if ( t_pcCUAboveLeft!=NULL )
{
DepthAboveLeft = t_pcCUAboveLeft->getDepth(g_auiRasterToZscan[16*15+15]);
}
else
DepthAboveLeft = 2;
if ( t_pcCUAboveRight!=NULL )
{
DepthAboveRight = t_pcCUAboveRight->getDepth(g_auiRasterToZscan[16*15]);
}
else
DepthAboveRight = 2;
Double DepthPre = 0.3*DepthLeft+0.3*DepthAbove+0.2*DepthAboveLeft+0.2*DepthAboveRight; // 论文中Prediction Depth Type
if ( DepthPre<=0.5 ) // 依据论文中的公式给出最小的depth level和最大的depth level
{
memset( m_preAnaDepthDetermined, 1, 256 );
memset( m_preAnaDepthRange, 2, 256 );
memset( m_preAnalyzeDepth, 0, 256 );
}
else if ( DepthPre<=1.5 )
{
memset( m_preAnaDepthDetermined, 1, 256 );
memset( m_preAnaDepthRange, 3, 256 );
memset( m_preAnalyzeDepth, 0, 256 );
}
else
{
memset( m_preAnaDepthDetermined, 1, 256 );
memset( m_preAnaDepthRange, 3, 256 );
memset( m_preAnalyzeDepth, 1, 256 );
}
DEBUG_STRING_NEW(sDebug)
xCompressCU( m_ppcBestCU[0], m_ppcTempCU[0], 0 DEBUG_STRING_PASS_INTO(sDebug) );
DEBUG_STRING_OUTPUT(std::cout, sDebug)
// Double Check.
UInt MaxDepthSize=0;
// UInt CTUPelX, CTUPelY;
if ( m_insidePicture )
{
for ( Int i=0; i<256; i++ )
{
// Decisioned.
tDepth = m_ppcBestCU[0]->getDepth(i);
UChar cuDepth = m_preAnalyzeDepth[i];
UChar cuPreDetermined = m_preAnaDepthDetermined[i];
UChar cuRange = m_preAnaDepthRange[i];
if ( tDepth<cuDepth && tDepth>=cuDepth+cuRange )
{
assert(0);
}
}
}
#if ADAPTIVE_QP_SELECTION
if( m_pcEncCfg->getUseAdaptQpSelect() )
{
if(rpcCU->getSlice()->getSliceType()!=I_SLICE) //IIII
{
xLcuCollectARLStats( rpcCU);
}
}
#endif
}
在xcompressCU函数中加入相关条件跳转。
// If slice start or slice end is within this cu...
TComSlice * pcSlice = rpcTempCU->getPic()->getSlice(rpcTempCU->getPic()->getCurrSliceIdx());
Bool bSliceStart = pcSlice->getSliceSegmentCurStartCUAddr()>rpcTempCU->getSCUAddr()&&pcSlice->getSliceSegmentCurStartCUAddr()<rpcTempCU->getSCUAddr()+rpcTempCU->getTotalNumPart();
Bool bSliceEnd = (pcSlice->getSliceSegmentCurEndCUAddr()>rpcTempCU->getSCUAddr()&&pcSlice->getSliceSegmentCurEndCUAddr()<rpcTempCU->getSCUAddr()+rpcTempCU->getTotalNumPart());
Bool bInsidePicture = ( uiRPelX < rpcBestCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) && ( uiBPelY < rpcBestCU->getSlice()->getSPS()->getPicHeightInLumaSamples() );
// Fast CU decision Process.
// When Current depth is not in the PreAnalyzedDepth Range, it just skips the PU/TU Decision process.
// Added by xfHuang.
Bool t_enCUSkip=false;
if ( m_insidePicture )
{
// Split Analysis For CU32X32 And CU16X16.
if ( checkCurDepthInPreAnaRange( rpcBestCU, uiDepth ) == false ) //如果当前的depth level不在预测的depth level之内,后面直接将cost赋值成最大,不进行后面的预测操作。
{
t_enCUSkip = true;
rpcBestCU->getTotalCost() = MAX_DOUBLE/16;
rpcBestCU->getTotalDistortion() = MAX_UINT>>3;
rpcBestCU->getTotalBits() = MAX_UINT>>3;
// avoid assert disable.
if ( uiDepth==3 )
{
rpcBestCU->setPartitionSize ( 0, SIZE_2Nx2N );
rpcBestCU->setPredictionMode( 0, MODE_INTRA );
}
}
}
// We need to split, so don't try these modes.
if(!bSliceEnd && !bSliceStart && bInsidePicture )
{
if( t_enCUSkip==false )
{
for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++)
{
const Bool bIsLosslessMode = isAddLowestQP && (iQP == iMinQP);
if (bIsLosslessMode)
{
iQP = lowestQP;
}
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
其中checkCurDepthInPreAnaRange函数如下:
Bool TEncCu::checkCurDepthInPreAnaRange( TComDataCU*& pCU, UInt uidepth )
{
UChar cuDepth = m_preAnalyzeDepth[pCU->getZorderIdxInCU()];
UChar cuPreDetermined = m_preAnaDepthDetermined[pCU->getZorderIdxInCU()];
UChar cuRange = m_preAnaDepthRange[pCU->getZorderIdxInCU()];
assert(cuDepth+cuRange<=5);
if ( /*cuPreDetermined &&*/ uidepth>=cuDepth && uidepth<cuDepth+cuRange )
{
return true;
}
else
{
return false;
}
}
以上是一种基于周边CTU块信息来进行CU深度优化的一种方法。这个方法对于大部分来说只是不做64X64这一层depth,因此性能损失很小,平均大概在0.2%左右。时间可以节省10%左右。
[转载请注明作者和出处]