Void
TEncSearch::estIntraPredQT( TComDataCU* pcCU,
TComYuv* pcOrgYuv,
TComYuv* pcPredYuv,
TComYuv* pcResiYuv,
TComYuv* pcRecoYuv,
UInt& ruiDistC,
Bool bLumaOnly )
{
UInt uiDepth = pcCU->getDepth(0);
UInt uiNumPU = pcCU->getNumPartitions();
UInt uiInitTrDepth = pcCU->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
UInt uiWidth = pcCU->getWidth (0) >> uiInitTrDepth;
UInt uiHeight = pcCU->getHeight(0) >> uiInitTrDepth;
UInt uiQNumParts = pcCU->getTotalNumPart() >> 2;
UInt uiWidthBit = pcCU->getIntraSizeIdx(0);
UInt uiOverallDistY = 0;
UInt uiOverallDistC = 0;
UInt CandNum;
Double CandCostList[ FAST_UDI_MAX_RDMODE_NUM ];
//===== set QP and clear Cbf =====
if ( pcCU->getSlice()->getPPS()->getUseDQP() == true)
{
pcCU->setQPSubParts( pcCU->getQP(0), 0, uiDepth );
}
else
{
pcCU->setQPSubParts( pcCU->getSlice()->getSliceQp(), 0, uiDepth );
}
//===== loop over partitions =====
UInt uiPartOffset = 0;
for( UInt uiPU = 0; uiPU < uiNumPU; uiPU++, uiPartOffset += uiQNumParts )
{
//===== init pattern for luma prediction =====
Bool bAboveAvail = false;
Bool bLeftAvail = false;
pcCU->getPattern()->initPattern ( pcCU, uiInitTrDepth, uiPartOffset );
pcCU->getPattern()->initAdiPattern( pcCU, uiPartOffset, uiInitTrDepth, m_piYuvExt, m_iYuvExtStride, m_iYuvExtHeight, bAboveAvail, bLeftAvail );
//===== determine set of modes to be tested (using prediction signal only) =====
Int numModesAvailable = 35; //total number of Intra modes
Pel* piOrg = pcOrgYuv ->getLumaAddr( uiPU, uiWidth );
Pel* piPred = pcPredYuv->getLumaAddr( uiPU, uiWidth );
UInt uiStride = pcPredYuv->getStride();
UInt uiRdModeList[FAST_UDI_MAX_RDMODE_NUM];
Int numModesForFullRD = g_aucIntraModeNumFast[ uiWidthBit ];
Bool doFastSearch = (numModesForFullRD != numModesAvailable);
if (doFastSearch)
{
assert(numModesForFullRD < numModesAvailable);
for( Int i=0; i < numModesForFullRD; i++ )
{
CandCostList[ i ] = MAX_DOUBLE;
}
CandNum = 0;
for( Int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++ )
{
UInt uiMode = modeIdx;
predIntraLumaAng( pcCU->getPattern(), uiMode, piPred, uiStride, uiWidth, uiHeight, bAboveAvail, bLeftAvail );
// use hadamard transform here
UInt uiSad = m_pcRdCost->calcHAD(g_bitDepthY, piOrg, uiStride, piPred, uiStride, uiWidth, uiHeight );
UInt iModeBits = xModeBitsIntra( pcCU, uiMode, uiPU, uiPartOffset, uiDepth, uiInitTrDepth );
Double cost = (Double)uiSad + (Double)iModeBits * m_pcRdCost->getSqrtLambda();
CandNum += xUpdateCandList( uiMode, cost, numModesForFullRD, uiRdModeList, CandCostList );
}
#if FAST_UDI_USE_MPM
Int uiPreds[3] = {-1, -1, -1};
Int iMode = -1;
Int numCand = pcCU->getIntraDirLumaPredictor( uiPartOffset, uiPreds, &iMode );
if( iMode >= 0 )
{
numCand = iMode;
}
for( Int j=0; j < numCand; j++)
{
Bool mostProbableModeIncluded = false;
Int mostProbableMode = uiPreds[j];
for( Int i=0; i < numModesForFullRD; i++)
{
mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i]);
}
if (!mostProbableModeIncluded)
{
uiRdModeList[numModesForFullRD++] = mostProbableMode;
}
}
#endif // FAST_UDI_USE_MPM
}
else
{
for( Int i=0; i < numModesForFullRD; i++)
{
uiRdModeList[i] = i;
}
}
//===== check modes (using r-d costs) =====
#if HHI_RQT_INTRA_SPEEDUP_MOD
UInt uiSecondBestMode = MAX_UINT;
Double dSecondBestPUCost = MAX_DOUBLE;
#endif
UInt uiBestPUMode = 0;
UInt uiBestPUDistY = 0;
UInt uiBestPUDistC = 0;
Double dBestPUCost = MAX_DOUBLE;
for( UInt uiMode = 0; uiMode < numModesForFullRD; uiMode++ )
{
// set luma prediction mode
UInt uiOrgMode = uiRdModeList[uiMode];
pcCU->setLumaIntraDirSubParts ( uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth );
// set context models
m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] );
// determine residual for partition
UInt uiPUDistY = 0;
UInt uiPUDistC = 0;
Double dPUCost = 0.0;
#if HHI_RQT_INTRA_SPEEDUP
xRecurIntraCodingQT( pcCU, uiInitTrDepth, uiPartOffset, bLumaOnly, pcOrgYuv, pcPredYuv, pcResiYuv, uiPUDistY, uiPUDistC, true, dPUCost );
#else
xRecurIntraCodingQT( pcCU, uiInitTrDepth, uiPartOffset, bLumaOnly, pcOrgYuv, pcPredYuv, pcResiYuv, uiPUDistY, uiPUDistC, dPUCost );
#endif
// check r-d cost
if( dPUCost < dBestPUCost )
{
#if HHI_RQT_INTRA_SPEEDUP_MOD
uiSecondBestMode = uiBestPUMode;
dSecondBestPUCost = dBestPUCost;
#endif
uiBestPUMode = uiOrgMode;
uiBestPUDistY = uiPUDistY;
uiBestPUDistC = uiPUDistC;
dBestPUCost = dPUCost;
xSetIntraResultQT( pcCU, uiInitTrDepth, uiPartOffset, bLumaOnly, pcRecoYuv );
UInt uiQPartNum = pcCU->getPic()->getNumPartInCU() >> ( ( pcCU->getDepth(0) + uiInitTrDepth ) << 1 );
::memcpy( m_puhQTTempTrIdx, pcCU->getTransformIdx() + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempCbf[0], pcCU->getCbf( TEXT_LUMA ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempCbf[1], pcCU->getCbf( TEXT_CHROMA_U ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempCbf[2], pcCU->getCbf( TEXT_CHROMA_V ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempTransformSkipFlag[0], pcCU->getTransformSkip(TEXT_LUMA) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempTransformSkipFlag[1], pcCU->getTransformSkip(TEXT_CHROMA_U) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempTransformSkipFlag[2], pcCU->getTransformSkip(TEXT_CHROMA_V) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
}
#if HHI_RQT_INTRA_SPEEDUP_MOD
else if( dPUCost < dSecondBestPUCost )
{
uiSecondBestMode = uiOrgMode;
dSecondBestPUCost = dPUCost;
}
#endif
} // Mode loop
#if HHI_RQT_INTRA_SPEEDUP
#if HHI_RQT_INTRA_SPEEDUP_MOD
for( UInt ui =0; ui < 2; ++ui )
#endif
{
#if HHI_RQT_INTRA_SPEEDUP_MOD
UInt uiOrgMode = ui ? uiSecondBestMode : uiBestPUMode;
if( uiOrgMode == MAX_UINT )
{
break;
}
#else
UInt uiOrgMode = uiBestPUMode;
#endif
pcCU->setLumaIntraDirSubParts ( uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth );
// set context models
m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] );
// determine residual for partition
UInt uiPUDistY = 0;
UInt uiPUDistC = 0;
Double dPUCost = 0.0;
xRecurIntraCodingQT( pcCU, uiInitTrDepth, uiPartOffset, bLumaOnly, pcOrgYuv, pcPredYuv, pcResiYuv, uiPUDistY, uiPUDistC, false, dPUCost );
// check r-d cost
if( dPUCost < dBestPUCost )
{
uiBestPUMode = uiOrgMode;
uiBestPUDistY = uiPUDistY;
uiBestPUDistC = uiPUDistC;
dBestPUCost = dPUCost;
xSetIntraResultQT( pcCU, uiInitTrDepth, uiPartOffset, bLumaOnly, pcRecoYuv );
UInt uiQPartNum = pcCU->getPic()->getNumPartInCU() >> ( ( pcCU->getDepth(0) + uiInitTrDepth ) << 1 );
::memcpy( m_puhQTTempTrIdx, pcCU->getTransformIdx() + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempCbf[0], pcCU->getCbf( TEXT_LUMA ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempCbf[1], pcCU->getCbf( TEXT_CHROMA_U ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempCbf[2], pcCU->getCbf( TEXT_CHROMA_V ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempTransformSkipFlag[0], pcCU->getTransformSkip(TEXT_LUMA) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempTransformSkipFlag[1], pcCU->getTransformSkip(TEXT_CHROMA_U) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempTransformSkipFlag[2], pcCU->getTransformSkip(TEXT_CHROMA_V) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
}
} // Mode loop
#endif
//--- update overall distortion ---
uiOverallDistY += uiBestPUDistY;
uiOverallDistC += uiBestPUDistC;
//--- update transform index and cbf ---
UInt uiQPartNum = pcCU->getPic()->getNumPartInCU() >> ( ( pcCU->getDepth(0) + uiInitTrDepth ) << 1 );
::memcpy( pcCU->getTransformIdx() + uiPartOffset, m_puhQTTempTrIdx, uiQPartNum * sizeof( UChar ) );
::memcpy( pcCU->getCbf( TEXT_LUMA ) + uiPartOffset, m_puhQTTempCbf[0], uiQPartNum * sizeof( UChar ) );
::memcpy( pcCU->getCbf( TEXT_CHROMA_U ) + uiPartOffset, m_puhQTTempCbf[1], uiQPartNum * sizeof( UChar ) );
::memcpy( pcCU->getCbf( TEXT_CHROMA_V ) + uiPartOffset, m_puhQTTempCbf[2], uiQPartNum * sizeof( UChar ) );
::memcpy( pcCU->getTransformSkip(TEXT_LUMA) + uiPartOffset, m_puhQTTempTransformSkipFlag[0], uiQPartNum * sizeof( UChar ) );
::memcpy( pcCU->getTransformSkip(TEXT_CHROMA_U) + uiPartOffset, m_puhQTTempTransformSkipFlag[1], uiQPartNum * sizeof( UChar ) );
::memcpy( pcCU->getTransformSkip(TEXT_CHROMA_V) + uiPartOffset, m_puhQTTempTransformSkipFlag[2], uiQPartNum * sizeof( UChar ) );
//--- set reconstruction for next intra prediction blocks ---
if( uiPU != uiNumPU - 1 )
{
Bool bSkipChroma = false;
Bool bChromaSame = false;
UInt uiLog2TrSize = g_aucConvertToBit[ pcCU->getSlice()->getSPS()->getMaxCUWidth() >> ( pcCU->getDepth(0) + uiInitTrDepth ) ] + 2;
if( !bLumaOnly && uiLog2TrSize == 2 )
{
assert( uiInitTrDepth > 0 );
bSkipChroma = ( uiPU != 0 );
bChromaSame = true;
}
UInt uiCompWidth = pcCU->getWidth ( 0 ) >> uiInitTrDepth;
UInt uiCompHeight = pcCU->getHeight( 0 ) >> uiInitTrDepth;
UInt uiZOrder = pcCU->getZorderIdxInCU() + uiPartOffset;
Pel* piDes = pcCU->getPic()->getPicYuvRec()->getLumaAddr( pcCU->getAddr(), uiZOrder );
UInt uiDesStride = pcCU->getPic()->getPicYuvRec()->getStride();
Pel* piSrc = pcRecoYuv->getLumaAddr( uiPartOffset );
UInt uiSrcStride = pcRecoYuv->getStride();
for( UInt uiY = 0; uiY < uiCompHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride )
{
for( UInt uiX = 0; uiX < uiCompWidth; uiX++ )
{
piDes[ uiX ] = piSrc[ uiX ];
}
}
if( !bLumaOnly && !bSkipChroma )
{
if( !bChromaSame )
{
uiCompWidth >>= 1;
uiCompHeight >>= 1;
}
piDes = pcCU->getPic()->getPicYuvRec()->getCbAddr( pcCU->getAddr(), uiZOrder );
uiDesStride = pcCU->getPic()->getPicYuvRec()->getCStride();
piSrc = pcRecoYuv->getCbAddr( uiPartOffset );
uiSrcStride = pcRecoYuv->getCStride();
for( UInt uiY = 0; uiY < uiCompHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride )
{
for( UInt uiX = 0; uiX < uiCompWidth; uiX++ )
{
piDes[ uiX ] = piSrc[ uiX ];
}
}
piDes = pcCU->getPic()->getPicYuvRec()->getCrAddr( pcCU->getAddr(), uiZOrder );
piSrc = pcRecoYuv->getCrAddr( uiPartOffset );
for( UInt uiY = 0; uiY < uiCompHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride )
{
for( UInt uiX = 0; uiX < uiCompWidth; uiX++ )
{
piDes[ uiX ] = piSrc[ uiX ];
}
}
}
}
//=== update PU data ====
pcCU->setLumaIntraDirSubParts ( uiBestPUMode, uiPartOffset, uiDepth + uiInitTrDepth );
pcCU->copyToPic ( uiDepth, uiPU, uiInitTrDepth );
} // PU loop
if( uiNumPU > 1 )
{ // set Cbf for all blocks
UInt uiCombCbfY = 0;
UInt uiCombCbfU = 0;
UInt uiCombCbfV = 0;
UInt uiPartIdx = 0;
for( UInt uiPart = 0; uiPart < 4; uiPart++, uiPartIdx += uiQNumParts )
{
uiCombCbfY |= pcCU->getCbf( uiPartIdx, TEXT_LUMA, 1 );
uiCombCbfU |= pcCU->getCbf( uiPartIdx, TEXT_CHROMA_U, 1 );
uiCombCbfV |= pcCU->getCbf( uiPartIdx, TEXT_CHROMA_V, 1 );
}
for( UInt uiOffs = 0; uiOffs < 4 * uiQNumParts; uiOffs++ )
{
pcCU->getCbf( TEXT_LUMA )[ uiOffs ] |= uiCombCbfY;
pcCU->getCbf( TEXT_CHROMA_U )[ uiOffs ] |= uiCombCbfU;
pcCU->getCbf( TEXT_CHROMA_V )[ uiOffs ] |= uiCombCbfV;
}
}
//===== reset context models =====
m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
//===== set distortion (rate and r-d costs are determined later) =====
ruiDistC = uiOverallDistC;
pcCU->getTotalDistortion() = uiOverallDistY + uiOverallDistC;
}
Void TComPrediction::predIntraLumaAng(TComPattern* pcTComPattern, UInt uiDirMode, Pel* piPred, UInt uiStride, Int iWidth, Int iHeight, Bool bAbove, Bool bLeft )
{
Pel *pDst = piPred;
Int *ptrSrc;
assert( g_aucConvertToBit[ iWidth ] >= 0 ); // 4x 4
assert( g_aucConvertToBit[ iWidth ] <= 5 ); // 128x128
assert( iWidth == iHeight );
ptrSrc = pcTComPattern->getPredictorPtr( uiDirMode, g_aucConvertToBit[ iWidth ] + 2, m_piYuvExt );
// get starting pixel in block
Int sw = 2 * iWidth + 1;
// Create the prediction
if ( uiDirMode == PLANAR_IDX )
{
xPredIntraPlanar( ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight );
}
else
{
if ( (iWidth > 16) || (iHeight > 16) )
{
xPredIntraAng(g_bitDepthY, ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight, uiDirMode, bAbove, bLeft, false );
}
else
{
xPredIntraAng(g_bitDepthY, ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight, uiDirMode, bAbove, bLeft, true );
if( (uiDirMode == DC_IDX ) && bAbove && bLeft )
{
xDCPredFiltering( ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight);
}
}
}
}
UInt TComRdCost::calcHAD(Int bitDepth, Pel* pi0, Int iStride0, Pel* pi1, Int iStride1, Int iWidth, Int iHeight )
{
UInt uiSum = 0;
Int x, y;
if ( ( (iWidth % 8) == 0 ) && ( (iHeight % 8) == 0 ) )
{
for ( y=0; y<iHeight; y+= 8 )
{
for ( x=0; x<iWidth; x+= 8 )
{
uiSum += xCalcHADs8x8( &pi0[x], &pi1[x], iStride0, iStride1, 1 );
}
pi0 += iStride0*8;
pi1 += iStride1*8;
}
}
else
{
assert(iWidth % 4 == 0 && iHeight % 4 == 0);
for ( y=0; y<iHeight; y+= 4 )
{
for ( x=0; x<iWidth; x+= 4 )
{
uiSum += xCalcHADs4x4( &pi0[x], &pi1[x], iStride0, iStride1, 1 );
}
pi0 += iStride0*4;
pi1 += iStride1*4;
}
}
return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(bitDepth-8);
}
UInt TEncSearch::xModeBitsIntra( TComDataCU* pcCU, UInt uiMode, UInt uiPU, UInt uiPartOffset, UInt uiDepth, UInt uiInitTrDepth )
{
// Reload only contexts required for coding intra mode information
m_pcRDGoOnSbacCoder->loadIntraDirModeLuma( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] );
pcCU->setLumaIntraDirSubParts ( uiMode, uiPartOffset, uiDepth + uiInitTrDepth );
m_pcEntropyCoder->resetBits();
m_pcEntropyCoder->encodeIntraDirModeLuma ( pcCU, uiPartOffset);
return m_pcEntropyCoder->getNumberOfWrittenBits();
}
UInt TEncSearch::xUpdateCandList( UInt uiMode, Double uiCost, UInt uiFastCandNum, UInt * CandModeList, Double * CandCostList )
{
UInt i;
UInt shift=0;
while ( shift<uiFastCandNum && uiCost<CandCostList[ uiFastCandNum-1-shift ] ) shift++;
if( shift!=0 )
{
for(i=1; i<shift; i++)
{
CandModeList[ uiFastCandNum-i ] = CandModeList[ uiFastCandNum-1-i ];
CandCostList[ uiFastCandNum-i ] = CandCostList[ uiFastCandNum-1-i ];
}
CandModeList[ uiFastCandNum-shift ] = uiMode;
CandCostList[ uiFastCandNum-shift ] = uiCost;
return 1;
}
return 0;
}