帧内模式决策的认识
最近,随着对HM认识的了解更加深入,对一些以前只是在理论上认识的知识,在HM的实现上如何实现有了具体的认知。现在总结下,对帧内模式决策的认识。
首先,分析下帧内模式决策的理论知识。在HEVC帧内模式决策的最佳模式过程需要四个过程(其实也可以说是三个过程)。
首先,是对35种帧内模式进行RMD(粗选择)。在RMD中主要实现对35种模式低复杂度代价的计算,根据PU的大小选取代价较小的前几个模式构成初始候选模式集,进行下一过程。
其次,进行MPM(最有可能模式选择)。所谓的MPM即,判断RMD后的模式是否包含有当前PU左邻近与上邻近的最佳模式。如果RMD后的模式不包含当前PU左邻近与上邻近的最佳模式,则将相应不包含的模式纳入初始候选模式集;反之,则不进行处理。 再次,对经MPM的初始候选模式集进行RDO过程。此时的RDO过程是在TU最大尺寸上进行的,从候选模式集中选出RDcost最小的模式,作为当前PU的最佳模式。
最后,在最佳模式上进行RQT,即在最佳模式上决定最佳TU分割。(之所以说三个过程就可以,那是因为前三个过程已经确定了最佳模式,只不过在RDO中也涉及到了TU,所以就说是四个过程了。)
具体代码实现如下各图所示。
图1 RMD
图2 MPM
以下程序为RDO过程:
for( UInt uiMode = 0; uiMode < numModesForFullRD; uiMode++ )//对RMD和MPM后的模式集进行RDO
{
// set luma prediction mode
UInt uiOrgMode = uiRdModeList[uiMode];
pcCU->setLumaIntraDirSubParts ( uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth );
// set context models
if( m_bUseSBACRD )
{
m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] );
}
// determine residual for partition
UInt uiPUDistY = 0;
UInt uiPUDistC = 0;
Double dPUCost = 0.0;
#if HHI_RQT_INTRA_SPEEDUP
xRecurIntraCodingQT( pcCU, uiInitTrDepth, uiPartOffset, bLumaOnly, pcOrgYuv, pcPredYuv, pcResiYuv, uiPUDistY, uiPUDistC, true, dPUCost );
#else
xRecurIntraCodingQT( pcCU, uiInitTrDepth, uiPartOffset, bLumaOnly, pcOrgYuv, pcPredYuv, pcResiYuv, uiPUDistY, uiPUDistC, dPUCost );
#endif
// check r-d cost
if( dPUCost < dBestPUCost )
{
#if HHI_RQT_INTRA_SPEEDUP_MOD
uiSecondBestMode = uiBestPUMode;
dSecondBestPUCost = dBestPUCost;
#endif
uiBestPUMode = uiOrgMode;
uiBestPUDistY = uiPUDistY;
uiBestPUDistC = uiPUDistC;
dBestPUCost = dPUCost;
xSetIntraResultQT( pcCU, uiInitTrDepth, uiPartOffset, bLumaOnly, pcRecoYuv );
UInt uiQPartNum = pcCU->getPic()->getNumPartInCU() >> ( ( pcCU->getDepth(0) + uiInitTrDepth ) << 1 );
::memcpy( m_puhQTTempTrIdx, pcCU->getTransformIdx() + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempCbf[0], pcCU->getCbf( TEXT_LUMA ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempCbf[1], pcCU->getCbf( TEXT_CHROMA_U ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempCbf[2], pcCU->getCbf( TEXT_CHROMA_V ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempTransformSkipFlag[0], pcCU->getTransformSkip(TEXT_LUMA) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempTransformSkipFlag[1], pcCU->getTransformSkip(TEXT_CHROMA_U) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempTransformSkipFlag[2], pcCU->getTransformSkip(TEXT_CHROMA_V) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
}
#if HHI_RQT_INTRA_SPEEDUP_MOD
else if( dPUCost < dSecondBestPUCost )
{
uiSecondBestMode = uiOrgMode;
dSecondBestPUCost = dPUCost;
}
#endif
} // Mode loop
{
// set luma prediction mode
UInt uiOrgMode = uiRdModeList[uiMode];
pcCU->setLumaIntraDirSubParts ( uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth );
// set context models
if( m_bUseSBACRD )
{
m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] );
}
// determine residual for partition
UInt uiPUDistY = 0;
UInt uiPUDistC = 0;
Double dPUCost = 0.0;
#if HHI_RQT_INTRA_SPEEDUP
xRecurIntraCodingQT( pcCU, uiInitTrDepth, uiPartOffset, bLumaOnly, pcOrgYuv, pcPredYuv, pcResiYuv, uiPUDistY, uiPUDistC, true, dPUCost );
#else
xRecurIntraCodingQT( pcCU, uiInitTrDepth, uiPartOffset, bLumaOnly, pcOrgYuv, pcPredYuv, pcResiYuv, uiPUDistY, uiPUDistC, dPUCost );
#endif
// check r-d cost
if( dPUCost < dBestPUCost )
{
#if HHI_RQT_INTRA_SPEEDUP_MOD
uiSecondBestMode = uiBestPUMode;
dSecondBestPUCost = dBestPUCost;
#endif
uiBestPUMode = uiOrgMode;
uiBestPUDistY = uiPUDistY;
uiBestPUDistC = uiPUDistC;
dBestPUCost = dPUCost;
xSetIntraResultQT( pcCU, uiInitTrDepth, uiPartOffset, bLumaOnly, pcRecoYuv );
UInt uiQPartNum = pcCU->getPic()->getNumPartInCU() >> ( ( pcCU->getDepth(0) + uiInitTrDepth ) << 1 );
::memcpy( m_puhQTTempTrIdx, pcCU->getTransformIdx() + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempCbf[0], pcCU->getCbf( TEXT_LUMA ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempCbf[1], pcCU->getCbf( TEXT_CHROMA_U ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempCbf[2], pcCU->getCbf( TEXT_CHROMA_V ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempTransformSkipFlag[0], pcCU->getTransformSkip(TEXT_LUMA) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempTransformSkipFlag[1], pcCU->getTransformSkip(TEXT_CHROMA_U) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempTransformSkipFlag[2], pcCU->getTransformSkip(TEXT_CHROMA_V) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
}
#if HHI_RQT_INTRA_SPEEDUP_MOD
else if( dPUCost < dSecondBestPUCost )
{
uiSecondBestMode = uiOrgMode;
dSecondBestPUCost = dPUCost;
}
#endif
} // Mode loop
RQT过程的主要函数为:
xRecurIntraCodingQT( pcCU, uiInitTrDepth, uiPartOffset, bLumaOnly, pcOrgYuv, pcPredYuv, pcResiYuv, uiPUDistY, uiPUDistC, false, dPUCost );
该函数中,倒数第二个参数,主要用于实现是在最大TU时进行,还是在各深度TU进行。
如果要统计各个过程所需的时间,只要调用系统函数clock()即可。即在程序开始前记录时间,结束时记录时间,两者之差就是该过程所需时间了。