Void
TEncSearch::estIntraPredLumaQT(TComDataCU* pcCU,
TComYuv* pcOrgYuv,
TComYuv* pcPredYuv,
TComYuv* pcResiYuv,
TComYuv* pcRecoYuv,
Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE]
DEBUG_STRING_FN_DECLARE(sDebug))
{
const UInt uiDepth = pcCU->getDepth(0);
//<帧内预测模式,决定CU是否Split
const UInt uiInitTrDepth = pcCU->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
const UInt uiNumPU = 1<<(2*uiInitTrDepth);
const UInt uiQNumParts = pcCU->getTotalNumPart() >> 2;
const UInt uiWidthBit = pcCU->getIntraSizeIdx(0);
const ChromaFormat chFmt = pcCU->getPic()->getChromaFormat();
const UInt numberValidComponents = getNumberValidComponents(chFmt);
const TComSPS &sps = *(pcCU->getSlice()->getSPS());
const TComPPS &pps = *(pcCU->getSlice()->getPPS());
Distortion uiOverallDistY = 0;
UInt CandNum;
Double CandCostList[ FAST_UDI_MAX_RDMODE_NUM ];
Pel resiLumaPU[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE];
Bool bMaintainResidual[NUMBER_OF_STORED_RESIDUAL_TYPES];
for (UInt residualTypeIndex = 0; residualTypeIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; residualTypeIndex++)
{
bMaintainResidual[residualTypeIndex] = true; //assume true unless specified otherwise
}
bMaintainResidual[RESIDUAL_ENCODER_SIDE] = !(m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate());
// Lambda calculation at equivalent Qp of 4 is recommended because at that Qp, the quantisation divisor is 1.
#if FULL_NBIT
const Double sqrtLambdaForFirstPass= (m_pcEncCfg->getCostMode()==COST_MIXED_LOSSLESS_LOSSY_CODING && pcCU->getCUTransquantBypass(0)) ?
sqrt(0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12) / 3.0)))
: m_pcRdCost->getSqrtLambda();
#else
const Double sqrtLambdaForFirstPass= (m_pcEncCfg->getCostMode()==COST_MIXED_LOSSLESS_LOSSY_CODING && pcCU->getCUTransquantBypass(0)) ?
sqrt(0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - 6 * (sps.getBitDepth(CHANNEL_TYPE_LUMA) - 8)) / 3.0)))
: m_pcRdCost->getSqrtLambda();
#endif
//===== set QP and clear Cbf =====
if ( pps.getUseDQP() == true)
{
pcCU->setQPSubParts( pcCU->getQP(0), 0, uiDepth );
}
else
{
pcCU->setQPSubParts( pcCU->getSlice()->getSliceQp(), 0, uiDepth );
}
//===== loop over partitions =====
TComTURecurse tuRecurseCU(pcCU, 0);
TComTURecurse tuRecurseWithPU(tuRecurseCU, false, (uiInitTrDepth==0)?TComTU::DONT_SPLIT : TComTU::QUAD_SPLIT);
do
{
const UInt uiPartOffset=tuRecurseWithPU.GetAbsPartIdxTU();
// for( UInt uiPU = 0, uiPartOffset=0; uiPU < uiNumPU; uiPU++, uiPartOffset += uiQNumParts )
//{
//===== init pattern for luma prediction =====
DEBUG_STRING_NEW(sTemp2)
//===== determine set of modes to be tested (using prediction signal only) =====
Int numModesAvailable = 35; //total number of Intra modes
UInt uiRdModeList[FAST_UDI_MAX_RDMODE_NUM];
Int numModesForFullRD = m_pcEncCfg->getFastUDIUseMPMEnabled()?g_aucIntraModeNumFast_UseMPM[ uiWidthBit ] : g_aucIntraModeNumFast_NotUseMPM[ uiWidthBit ];
// this should always be true
assert (tuRecurseWithPU.ProcessComponentSection(COMPONENT_Y));
//<参考像素的处理,包含产生滤波后的参考像素
initIntraPatternChType( tuRecurseWithPU, COMPONENT_Y, true DEBUG_STRING_PASS_INTO(sTemp2) );
Bool doFastSearch = (numModesForFullRD != numModesAvailable);
//<粗估计,快速RD
if (doFastSearch)
{
assert(numModesForFullRD < numModesAvailable);
for( Int i=0; i < numModesForFullRD; i++ )
{
CandCostList[ i ] = MAX_DOUBLE;
}
CandNum = 0;
const TComRectangle &puRect=tuRecurseWithPU.getRect(COMPONENT_Y);
const UInt uiAbsPartIdx=tuRecurseWithPU.GetAbsPartIdxTU();
Pel* piOrg = pcOrgYuv ->getAddr( COMPONENT_Y, uiAbsPartIdx );
Pel* piPred = pcPredYuv->getAddr( COMPONENT_Y, uiAbsPartIdx );
UInt uiStride = pcPredYuv->getStride( COMPONENT_Y );
DistParam distParam;
const Bool bUseHadamard=pcCU->getCUTransquantBypass(0) == 0;
m_pcRdCost->setDistParam(distParam, sps.getBitDepth(CHANNEL_TYPE_LUMA), piOrg, uiStride, piPred, uiStride, puRect.width, puRect.height, bUseHadamard);
distParam.bApplyWeight = false;
for( Int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++ )
{
UInt uiMode = modeIdx;
Distortion uiSad = 0;
const Bool bUseFilter=TComPrediction::filteringIntraReferenceSamples(COMPONENT_Y, uiMode, puRect.width, puRect.height, chFmt, sps.getSpsRangeExtension().getIntraSmoothingDisabledFlag());
predIntraAng( COMPONENT_Y, uiMode, piOrg, uiStride, piPred, uiStride, tuRecurseWithPU, bUseFilter, TComPrediction::UseDPCMForFirstPassIntraEstimation(tuRecurseWithPU, uiMode) );
// use hadamard transform here
uiSad+=distParam.DistFunc(&distParam);
UInt iModeBits = 0;
// NB xModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
//<编码当前帧内预测所需的bits
iModeBits+=xModeBitsIntra( pcCU, uiMode, uiPartOffset, uiDepth, CHANNEL_TYPE_LUMA );
//<粗估计的cost:SATD+lamda*预测模式所需bits
Double cost = (Double)uiSad + (Double)iModeBits * sqrtLambdaForFirstPass;
#if DEBUG_INTRA_SEARCH_COSTS
std::cout << "1st pass mode " << uiMode << " SAD = " << uiSad << ", mode bits = " << iModeBits << ", cost = " << cost << "\n";
#endif
//<更新候选列表
CandNum += xUpdateCandList( uiMode, cost, numModesForFullRD, uiRdModeList, CandCostList );
}
if (m_pcEncCfg->getFastUDIUseMPMEnabled())
{
Int uiPreds[NUM_MOST_PROBABLE_MODES] = {-1, -1, -1};
Int iMode = -1;
//<利用临近PU构建MPM
pcCU->getIntraDirPredictor( uiPartOffset, uiPreds, COMPONENT_Y, &iMode );
const Int numCand = ( iMode >= 0 ) ? iMode : Int(NUM_MOST_PROBABLE_MODES);
//<如果当前候选列表中没有MPM中对应的模式,则加入当前候选列表
for( Int j=0; j < numCand; j++)
{
Bool mostProbableModeIncluded = false;
Int mostProbableMode = uiPreds[j];
for( Int i=0; i < numModesForFullRD; i++)
{
mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i]);
}
if (!mostProbableModeIncluded)
{
uiRdModeList[numModesForFullRD++] = mostProbableMode;
}
}
}
}//<全RD
else
{
for( Int i=0; i < numModesForFullRD; i++)
{
uiRdModeList[i] = i;
}
}
//===== check modes (using r-d costs) =====
#if HHI_RQT_INTRA_SPEEDUP_MOD
UInt uiSecondBestMode = MAX_UINT;
Double dSecondBestPUCost = MAX_DOUBLE;
#endif
DEBUG_STRING_NEW(sPU)
UInt uiBestPUMode = 0;
Distortion uiBestPUDistY = 0;
Double dBestPUCost = MAX_DOUBLE;
#if ENVIRONMENT_VARIABLE_DEBUG_AND_TEST
UInt max=numModesForFullRD;
if (DebugOptionList::ForceLumaMode.isSet())
{
max=0; // we are forcing a direction, so don't bother with mode check
}
for ( UInt uiMode = 0; uiMode < max; uiMode++)
#else
//<全RD开始细选
for( UInt uiMode = 0; uiMode < numModesForFullRD; uiMode++ )
#endif
{
// set luma prediction mode
UInt uiOrgMode = uiRdModeList[uiMode];
pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth );
DEBUG_STRING_NEW(sMode)
// set context models
m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] );
// determine residual for partition
Distortion uiPUDistY = 0;
Double dPUCost = 0.0;
#if HHI_RQT_INTRA_SPEEDUP
//<对TU四叉树递归
xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, true, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode) );
#else
xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode) );
#endif
#if DEBUG_INTRA_SEARCH_COSTS
std::cout << "2nd pass [luma,chroma] mode [" << Int(pcCU->getIntraDir(CHANNEL_TYPE_LUMA, uiPartOffset)) << "," << Int(pcCU->getIntraDir(CHANNEL_TYPE_CHROMA, uiPartOffset)) << "] cost = " << dPUCost << "\n";
#endif
// check r-d cost
if( dPUCost < dBestPUCost )
{
DEBUG_STRING_SWAP(sPU, sMode)
#if HHI_RQT_INTRA_SPEEDUP_MOD
uiSecondBestMode = uiBestPUMode;
dSecondBestPUCost = dBestPUCost;
#endif
uiBestPUMode = uiOrgMode;
uiBestPUDistY = uiPUDistY;
dBestPUCost = dPUCost;
xSetIntraResultLumaQT( pcRecoYuv, tuRecurseWithPU );
if (pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag())
{
const Int xOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).x0;
const Int yOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).y0;
for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++)
{
if (bMaintainResidual[storedResidualIndex])
{
xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaPU[storedResidualIndex], tuRecurseWithPU, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE );
}
}
}
UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts();
::memcpy( m_puhQTTempTrIdx, pcCU->getTransformIdx() + uiPartOffset, uiQPartNum * sizeof( UChar ) );
for (UInt component = 0; component < numberValidComponents; component++)
{
const ComponentID compID = ComponentID(component);
::memcpy( m_puhQTTempCbf[compID], pcCU->getCbf( compID ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempTransformSkipFlag[compID], pcCU->getTransformSkip(compID) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
}
}
#if HHI_RQT_INTRA_SPEEDUP_MOD
else if( dPUCost < dSecondBestPUCost )
{
uiSecondBestMode = uiOrgMode;
dSecondBestPUCost = dPUCost;
}
#endif
} // Mode loop
#if HHI_RQT_INTRA_SPEEDUP
#if HHI_RQT_INTRA_SPEEDUP_MOD
for( UInt ui =0; ui < 2; ++ui )
#endif
{
#if HHI_RQT_INTRA_SPEEDUP_MOD
UInt uiOrgMode = ui ? uiSecondBestMode : uiBestPUMode;
if( uiOrgMode == MAX_UINT )
{
break;
}
#else
UInt uiOrgMode = uiBestPUMode;
#endif
#if ENVIRONMENT_VARIABLE_DEBUG_AND_TEST
if (DebugOptionList::ForceLumaMode.isSet())
{
uiOrgMode = DebugOptionList::ForceLumaMode.getInt();
}
#endif
pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth );
DEBUG_STRING_NEW(sModeTree)
// set context models
m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] );
// determine residual for partition
Distortion uiPUDistY = 0;
Double dPUCost = 0.0;
xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, false, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sModeTree));
// check r-d cost
if( dPUCost < dBestPUCost )
{
DEBUG_STRING_SWAP(sPU, sModeTree)
uiBestPUMode = uiOrgMode;
uiBestPUDistY = uiPUDistY;
dBestPUCost = dPUCost;
xSetIntraResultLumaQT( pcRecoYuv, tuRecurseWithPU );
if (pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag())
{
const Int xOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).x0;
const Int yOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).y0;
for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++)
{
if (bMaintainResidual[storedResidualIndex])
{
xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaPU[storedResidualIndex], tuRecurseWithPU, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE );
}
}
}
const UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts();
::memcpy( m_puhQTTempTrIdx, pcCU->getTransformIdx() + uiPartOffset, uiQPartNum * sizeof( UChar ) );
for (UInt component = 0; component < numberValidComponents; component++)
{
const ComponentID compID = ComponentID(component);
::memcpy( m_puhQTTempCbf[compID], pcCU->getCbf( compID ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempTransformSkipFlag[compID], pcCU->getTransformSkip(compID) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
}
}
} // Mode loop
#endif
DEBUG_STRING_APPEND(sDebug, sPU)
//--- update overall distortion ---
uiOverallDistY += uiBestPUDistY;
//--- update transform index and cbf ---
const UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts();
::memcpy( pcCU->getTransformIdx() + uiPartOffset, m_puhQTTempTrIdx, uiQPartNum * sizeof( UChar ) );
for (UInt component = 0; component < numberValidComponents; component++)
{
const ComponentID compID = ComponentID(component);
::memcpy( pcCU->getCbf( compID ) + uiPartOffset, m_puhQTTempCbf[compID], uiQPartNum * sizeof( UChar ) );
::memcpy( pcCU->getTransformSkip( compID ) + uiPartOffset, m_puhQTTempTransformSkipFlag[compID ], uiQPartNum * sizeof( UChar ) );
}
//--- set reconstruction for next intra prediction blocks ---
if( !tuRecurseWithPU.IsLastSection() )
{
const TComRectangle &puRect=tuRecurseWithPU.getRect(COMPONENT_Y);
const UInt uiCompWidth = puRect.width;
const UInt uiCompHeight = puRect.height;
const UInt uiZOrder = pcCU->getZorderIdxInCtu() + uiPartOffset;
Pel* piDes = pcCU->getPic()->getPicYuvRec()->getAddr( COMPONENT_Y, pcCU->getCtuRsAddr(), uiZOrder );
const UInt uiDesStride = pcCU->getPic()->getPicYuvRec()->getStride( COMPONENT_Y);
const Pel* piSrc = pcRecoYuv->getAddr( COMPONENT_Y, uiPartOffset );
const UInt uiSrcStride = pcRecoYuv->getStride( COMPONENT_Y);
for( UInt uiY = 0; uiY < uiCompHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride )
{
for( UInt uiX = 0; uiX < uiCompWidth; uiX++ )
{
piDes[ uiX ] = piSrc[ uiX ];
}
}
}
//=== update PU data ====
pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiBestPUMode, uiPartOffset, uiDepth + uiInitTrDepth );
} while (tuRecurseWithPU.nextSection(tuRecurseCU));
if( uiNumPU > 1 )
{ // set Cbf for all blocks
UInt uiCombCbfY = 0;
UInt uiCombCbfU = 0;
UInt uiCombCbfV = 0;
UInt uiPartIdx = 0;
for( UInt uiPart = 0; uiPart < 4; uiPart++, uiPartIdx += uiQNumParts )
{
uiCombCbfY |= pcCU->getCbf( uiPartIdx, COMPONENT_Y, 1 );
uiCombCbfU |= pcCU->getCbf( uiPartIdx, COMPONENT_Cb, 1 );
uiCombCbfV |= pcCU->getCbf( uiPartIdx, COMPONENT_Cr, 1 );
}
for( UInt uiOffs = 0; uiOffs < 4 * uiQNumParts; uiOffs++ )
{
pcCU->getCbf( COMPONENT_Y )[ uiOffs ] |= uiCombCbfY;
pcCU->getCbf( COMPONENT_Cb )[ uiOffs ] |= uiCombCbfU;
pcCU->getCbf( COMPONENT_Cr )[ uiOffs ] |= uiCombCbfV;
}
}
//===== reset context models =====
m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
//===== set distortion (rate and r-d costs are determined later) =====
pcCU->getTotalDistortion() = uiOverallDistY;
}