xCheckRDCostAffineMerge2Nx2N函数和xCheckRDCostMerge2Nx2N函数类似,但是过程要比其简单很多。xCheckRDCostAffineMerge2Nx2N函数是用来选出Affine Merge模式中的最佳候选MV。
主要过程如下:
- 获取Affine Merge候选列表,Affine Merge候选列表的推导参考:仿射运动补偿预测
- 若开启快速Merge模式,根据当前CU的编码信息缩减进行细选的Merge模式数目
- 若当前CU最佳模式不是Skip模式,则先遍历可用候选Merge模式,进行运动补偿计算预测值,然后根据SAD和Merge模式比特数更新RD代价列表,如果第i个模式代价 > 第一个模式的代价*MRG_FAST_RATIO(1.25),则将细选模式数设置为i
- 否则,进行细选的模式数为可用的Merge模式数
- 进行细选,细选过程和xCheckRDCostMerge2Nx2N的细选过程类似
代码及注释如下:
void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
{
if( m_modeCtrl->getFastDeltaQp() )
{
return;
}
if ( bestCS->area.lumaSize().width < 8 || bestCS->area.lumaSize().height < 8 )
{
return;
}
m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
const Slice &slice = *tempCS->slice;
CHECK( slice.getSliceType() == I_SLICE, "Affine Merge modes not available for I-slices" );
tempCS->initStructData( encTestMode.qp );
AffineMergeCtx affineMergeCtx; //Affine Merge上下文
const SPS &sps = *tempCS->sps;
if (sps.getMaxNumAffineMergeCand() == 0)
{
return;
}
setAFFBestSATDCost(MAX_DOUBLE);
MergeCtx mrgCtx;
if (sps.getSbTMVPEnabledFlag())
{
Size bufSize = g_miScaling.scale( tempCS->area.lumaSize() );
mrgCtx.subPuMvpMiBuf = MotionBuf( m_SubPuMiBuf, bufSize );
affineMergeCtx.mrgCtx = &mrgCtx;
}
{
// first get merge candidates 首先获得Merge候选模式
CodingUnit cu( tempCS->area );
cu.cs = tempCS;
cu.predMode = MODE_INTER;
cu.slice = tempCS->slice;
cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
cu.mmvdSkip = false;
PredictionUnit pu( tempCS->area );
pu.cu = &cu;
pu.cs = tempCS;
pu.regularMergeFlag = false;
PU::getAffineMergeCand( pu, affineMergeCtx ); //获得Merge候选模式
if ( affineMergeCtx.numValidMergeCand <= 0 )
{
return;
}
}
bool candHasNoResidual[AFFINE_MRG_MAX_NUM_CANDS];
for ( uint32_t ui = 0; ui < affineMergeCtx.numValidMergeCand; ui++ )
{
candHasNoResidual[ui] = false;
}
bool bestIsSkip = false;
uint32_t uiNumMrgSATDCand = affineMergeCtx.numValidMergeCand;
PelUnitBuf acMergeBuffer[AFFINE_MRG_MAX_NUM_CANDS];
static_vector<uint32_t, AFFINE_MRG_MAX_NUM_CANDS> RdModeList;
bool mrgTempBufSet = false;
for ( uint32_t i = 0; i < AFFINE_MRG_MAX_NUM_CANDS; i++ )
{
RdModeList.push_back( i );
}
if ( m_pcEncCfg->getUseFastMerge() ) //快速Merge模式
{
uiNumMrgSATDCand = std::min( NUM_AFF_MRG_SATD_CAND, affineMergeCtx.numValidMergeCand );
bestIsSkip = false;
if ( auto blkCache = dynamic_cast<CacheBlkInfoCtrl*>(m_modeCtrl) )
{
bestIsSkip = blkCache->isSkip( tempCS->area );
}
static_vector<double, AFFINE_MRG_MAX_NUM_CANDS> candCostList;
// 1. Pass: get SATD-cost for selected candidates and reduce their count 获得所选候选人的SATD费用,并减少其数量
if ( !bestIsSkip )
{
RdModeList.clear();
mrgTempBufSet = true;
const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( );
CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType );
partitioner.setCUData( cu );
cu.slice = tempCS->slice;
cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
cu.skip = false;
cu.affine = true;
cu.predMode = MODE_INTER;
cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
cu.qp = encTestMode.qp;
PredictionUnit &pu = tempCS->addPU( cu, partitioner.chType );
DistParam distParam;
const bool bUseHadamard = !tempCS->slice->getDisableSATDForRD();
m_pcRdCost->setDistParam( distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, bUseHadamard );
const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height ) );
// 遍历可用候选Merge模式
for ( uint32_t uiMergeCand = 0; uiMergeCand < affineMergeCtx.numValidMergeCand; uiMergeCand++ )
{
acMergeBuffer[uiMergeCand] = m_acMergeBuffer[uiMergeCand].getBuf( localUnitArea );
// set merge information
pu.interDir = affineMergeCtx.interDirNeighbours[uiMergeCand];
pu.mergeFlag = true;
pu.regularMergeFlag = false;
pu.mergeIdx = uiMergeCand;
cu.affineType = affineMergeCtx.affineType[uiMergeCand];
cu.BcwIdx = affineMergeCtx.BcwIdx[uiMergeCand];
pu.mergeType = affineMergeCtx.mergeType[uiMergeCand];
if ( pu.mergeType == MRG_TYPE_SUBPU_ATMVP )
{
pu.refIdx[0] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0][0].refIdx;
pu.refIdx[1] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1][0].refIdx;
PU::spanMotionInfo( pu, mrgCtx );
}
else
{
PU::setAllAffineMvField( pu, affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0], REF_PIC_LIST_0 );
PU::setAllAffineMvField( pu, affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1], REF_PIC_LIST_1 );
PU::spanMotionInfo( pu );
}
distParam.cur = acMergeBuffer[uiMergeCand].Y();
// 运动补偿
m_pcInterSearch->motionCompensation( pu, acMergeBuffer[uiMergeCand], REF_PIC_LIST_X, true, false );
Distortion uiSad = distParam.distFunc( distParam );
uint32_t uiBitsCand = uiMergeCand + 1;
if ( uiMergeCand == tempCS->picHeader->getMaxNumAffineMergeCand() - 1 )
{
uiBitsCand--;
}
double cost = (double)uiSad + (double)uiBitsCand * sqrtLambdaForFirstPass;
updateCandList( uiMergeCand, cost, RdModeList, candCostList
, uiNumMrgSATDCand );
CHECK( std::min( uiMergeCand + 1, uiNumMrgSATDCand ) != RdModeList.size(), "" );
}
// Try to limit number of candidates using SATD-costs 尽量限制使用SATD Cost的模式数
for ( uint32_t i = 1; i < uiNumMrgSATDCand; i++ )
{
if ( candCostList[i] > MRG_FAST_RATIO * candCostList[0] )
{
uiNumMrgSATDCand = i;
break;
}
}
tempCS->initStructData( encTestMode.qp );
setAFFBestSATDCost(candCostList[0]);
}
else
{
uiNumMrgSATDCand = affineMergeCtx.numValidMergeCand;
}
}
uint32_t iteration;
uint32_t iterationBegin = 0;
iteration = 2;
for (uint32_t uiNoResidualPass = iterationBegin; uiNoResidualPass < iteration; ++uiNoResidualPass)
{
for ( uint32_t uiMrgHADIdx = 0; uiMrgHADIdx < uiNumMrgSATDCand; uiMrgHADIdx++ )
{
uint32_t uiMergeCand = RdModeList[uiMrgHADIdx];
if ( ((uiNoResidualPass != 0) && candHasNoResidual[uiMergeCand])
|| ((uiNoResidualPass == 0) && bestIsSkip) )
{
continue;
}
// first get merge candidates
CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType );
partitioner.setCUData( cu );
cu.slice = tempCS->slice;
cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
cu.skip = false;
cu.affine = true;
cu.predMode = MODE_INTER;
cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
cu.qp = encTestMode.qp;
PredictionUnit &pu = tempCS->addPU( cu, partitioner.chType );
// set merge information
pu.mergeFlag = true;
pu.mergeIdx = uiMergeCand;
pu.interDir = affineMergeCtx.interDirNeighbours[uiMergeCand];
cu.affineType = affineMergeCtx.affineType[uiMergeCand];
cu.BcwIdx = affineMergeCtx.BcwIdx[uiMergeCand];
pu.mergeType = affineMergeCtx.mergeType[uiMergeCand];
if ( pu.mergeType == MRG_TYPE_SUBPU_ATMVP )
{
pu.refIdx[0] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0][0].refIdx;
pu.refIdx[1] = affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1][0].refIdx;
PU::spanMotionInfo( pu, mrgCtx );
}
else
{
PU::setAllAffineMvField( pu, affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0], REF_PIC_LIST_0 );
PU::setAllAffineMvField( pu, affineMergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1], REF_PIC_LIST_1 );
PU::spanMotionInfo( pu );
}
if( m_pcEncCfg->getMCTSEncConstraint() && ( !( MCTSHelper::checkMvBufferForMCTSConstraint( *cu.firstPU ) ) ) )
{
// Do not use this mode
tempCS->initStructData( encTestMode.qp );
return;
}
if ( mrgTempBufSet )
{
tempCS->getPredBuf().copyFrom(acMergeBuffer[uiMergeCand], true, false); // Copy Luma Only
m_pcInterSearch->motionCompensation(pu, REF_PIC_LIST_X, false, true);
}
else
{
m_pcInterSearch->motionCompensation( pu );
}
xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, ( uiNoResidualPass == 0 ? &candHasNoResidual[uiMergeCand] : NULL ) );
if ( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip )
{
bestIsSkip = bestCS->getCU( partitioner.chType )->rootCbf == 0;
}
tempCS->initStructData( encTestMode.qp );
}// end loop uiMrgHADIdx
if ( uiNoResidualPass == 0 && m_pcEncCfg->getUseEarlySkipDetection() )
{
const CodingUnit &bestCU = *bestCS->getCU( partitioner.chType );
const PredictionUnit &bestPU = *bestCS->getPU( partitioner.chType );
if ( bestCU.rootCbf == 0 )
{
if ( bestPU.mergeFlag )
{
m_modeCtrl->setEarlySkipDetected();
}
else if ( m_pcEncCfg->getMotionEstimationSearchMethod() != MESEARCH_SELECTIVE )
{
int absolute_MV = 0;
for ( uint32_t uiRefListIdx = 0; uiRefListIdx < 2; uiRefListIdx++ )
{
if ( slice.getNumRefIdx( RefPicList( uiRefListIdx ) ) > 0 )
{
absolute_MV += bestPU.mvd[uiRefListIdx].getAbsHor() + bestPU.mvd[uiRefListIdx].getAbsVer();
}
}
if ( absolute_MV == 0 )
{
m_modeCtrl->setEarlySkipDetected();
}
}
}
}
}
if ( m_bestModeUpdated && bestCS->cost != MAX_DOUBLE )
{
xCalDebCost( *bestCS, partitioner );
}
}