1、变换的相关知识
https://blog.csdn.net/allen_sdz/article/details/101764568
我当时主要是参考了这篇博客,相关理论知识在CSDN博客上基本能搜到。
2、代码实现
感觉VVC的代码真的复杂很多,我是看的AI 配置下的代码过程,看了很久才看懂大概的过程,先记录下来顺便再和大家分享下。
从void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )开始就有和变换有关的代码。注意:若开启了MTS,在这里控制MTS的变换核的选择,判断是否选择下一个变换核进行变换的条件在这个函数的结尾部分。
void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
{
double bestInterCost = m_modeCtrl->getBestInterCost();
double costSize2Nx2NmtsFirstPass = m_modeCtrl->getMtsSize2Nx2NFirstPassCost();//FirstPass
bool skipSecondMtsPass = m_modeCtrl->getSkipSecondMTSPass();//是否跳过MTS的第二阶段
const SPS& sps = *tempCS->sps;
const int maxSizeMTS = MTS_INTRA_MAX_CU_SIZE;//32
uint8_t considerMtsSecondPass = ( sps.getUseIntraMTS() && isLuma( partitioner.chType ) && partitioner.currArea().lwidth() <= maxSizeMTS && partitioner.currArea().lheight() <= maxSizeMTS ) ? 1 : 0;
const PPS &pps = *tempCS->pps;
bool useIntraSubPartitions = false;
double maxCostAllowedForChroma = MAX_DOUBLE;
const CodingUnit *bestCU = bestCS->getCU( partitioner.chType );
Distortion interHad = m_modeCtrl->getInterHad();
double dct2Cost = MAX_DOUBLE;
double trGrpBestCost [ 4 ] = { MAX_DOUBLE, MAX_DOUBLE, MAX_DOUBLE, MAX_DOUBLE };
double globalBestCost = MAX_DOUBLE;
bool bestSelFlag [ 4 ] = { false, false, false, false };//若最优代价更新了,则trGrpIdx对应的bestSelFlag[trGrpIdx]置为true;
bool trGrpCheck [ 4 ] = { true, true, true, true };
int startMTSIdx [ 4 ] = { 0, 1, 2, 3 };//表示多变换核的4个类型。0:DST7_DST7 1:DCT8_DST7 2:DST7_DST8 3:DCT8_DCT8
int endMTSIdx [ 4 ] = { 0, 1, 2, 3 };//由下面的trGrpIdx来控制
double trGrpStopThreshold[ 3 ] = { 1.001, 1.001, 1.001 };
int bestMtsFlag = 0;
int bestLfnstIdx = 0;
#if JVET_O0213_RESTRICT_LFNST_TO_MAX_TB_SIZE
#if JVET_O0050_LOCAL_DUAL_TREE
const int maxLfnstIdx = ( partitioner.isSepTree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) )
#else
const int maxLfnstIdx = ( CS::isDualITree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) )
#endif
#if JVET_O0545_MAX_TB_SIGNALLING
|| ( partitioner.currArea().lwidth() > sps.getMaxTbSize() || partitioner.currArea().lheight() > sps.getMaxTbSize() ) ? 0 : 2;
#else
|| ( partitioner.currArea().lwidth() > MAX_TB_SIZEY || partitioner.currArea().lheight() > MAX_TB_SIZEY ) ? 0 : 2;
#endif
#else
#if JVET_O0050_LOCAL_DUAL_TREE
const int maxLfnstIdx = partitioner.isSepTree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) ? 0 : 2;
#else
const int maxLfnstIdx = CS::isDualITree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) ? 0 : 2;
#endif
#endif
bool skipOtherLfnst = false;
int startLfnstIdx = 0;
int endLfnstIdx = sps.getUseLFNST() ? maxLfnstIdx : 0;//0或2
int grpNumMax = sps.getUseLFNST() ? 4 : 1;
m_pcIntraSearch->invalidateBestModeCost();
for( int trGrpIdx = 0; trGrpIdx < grpNumMax; trGrpIdx++ )//用来控制选择MTS4个变换核中一个,按照索引序号的优先值:0-0;1-1;2-2;3-3(1和2的时候还和预测方向有关)
{
const uint8_t startMtsFlag = trGrpIdx > 0;//0或1
const uint8_t endMtsFlag = sps.getUseLFNST() ? considerMtsSecondPass : 0;//取0或1
if( ( trGrpIdx == 0 || ( !skipSecondMtsPass && considerMtsSecondPass ) ) && trGrpCheck[ trGrpIdx ] )
{
/*
对于帧内编码CU,CU级的RD检测以LFNST索引值为循环索引。
每个帧内预测模式对应的变换集中有3种LFNST候选索引值:0,1,2
//0表示不使用二次变换,1、2是每个变换集中的2个不可分的变换矩阵内核
*/
for( int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++ )//0,1,2
{
// MTS flag=0或1,即MTS的两个阶段
for( uint8_t mtsFlag = startMtsFlag; mtsFlag <= endMtsFlag; mtsFlag++ )
{
#if JVET_O0368_LFNST_WITH_DCT2_ONLY
if (mtsFlag > 0 && lfnstIdx > 0)//当lfstIdx等于1或2时,不执行mtsflag=1的情况;lfnst变换只在mts_flag=0的时候执行
{
continue;
}
#endif
//3) if interHad is 0, only try further modes if some intra mode was already better than inter
if( sps.getUseLFNST() && m_pcEncCfg->getUsePbIntraFast() && !tempCS->slice->isIntra() && bestCU && CU::isInter( *bestCS->getCU( partitioner.chType ) ) && interHad == 0 )
{
continue;
}
tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
CodingUnit &cu = tempCS->addCU( CS::getArea( *tempCS, tempCS->area, partitioner.chType ), partitioner.chType );
partitioner.setCUData( cu );
cu.slice = tempCS->slice;
cu.tileIdx = tempCS->picture->brickMap->getBrickIdxRsMap( tempCS->area.lumaPos() );
cu.skip = false;
cu.mmvdSkip = false;
cu.predMode = MODE_INTRA;
cu.transQuantBypass = encTestMode.lossless;
cu.chromaQpAdj = cu.transQuantBypass ? 0 : m_cuChromaQpOffsetIdxPlus1;
cu.qp = encTestMode.qp;
//cu.ipcm = false;
cu.lfnstIdx = lfnstIdx;
cu.mtsFlag = mtsFlag;
cu.ispMode = NOT_INTRA_SUBPARTITIONS;
CU::addPUs( cu );
tempCS->interHad = interHad;
m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
bool validCandRet = false;
if( isLuma( partitioner.chType ) )
{
#if JVET_O0502_ISP_CLEANUP
//ISP uses the value of the best cost so far (luma if it is the fast version) to avoid test non-necessary subpartitions
//ISP使用到目前为止最佳成本的值(如果是快速版本,则为luma)以避免测试不必要的子分区
#if JVET_O0050_LOCAL_DUAL_TREE
double bestCostSoFar = partitioner.isSepTree(*tempCS) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost;
if (partitioner.isSepTree(*tempCS) && encTestMode.maxCostAllowed < bestCostSoFar)
#else
double bestCostSoFar = CS::isDualITree(*tempCS) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost;
if (CS::isDualITree(*tempCS) && encTestMode.maxCostAllowed < bestCostSoFar)
#endif
{
bestCostSoFar = encTestMode.maxCostAllowed;
}
#else
//the Intra SubPartitions mode uses the value of the best cost so far (luma if it is the fast version) to avoid test non-necessary lines
#if JVET_O0050_LOCAL_DUAL_TREE
const double bestCostSoFar = partitioner.isSepTree( *tempCS ) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost;
#else
const double bestCostSoFar = CS::isDualITree( *tempCS ) ? m_modeCtrl->getBestCostWithoutSplitFlags() : bestCU && bestCU->predMode == MODE_INTRA ? bestCS->lumaCost : bestCS->cost;
#endif
#endif //亮度分量预测
validCandRet = m_pcIntraSearch->estIntraPredLumaQT( cu, partitioner, bestCostSoFar, mtsFlag, startMTSIdx[ trGrpIdx ], endMTSIdx[ trGrpIdx ], ( trGrpIdx > 0 ) );
if( sps.getUseLFNST() && ( !validCandRet || ( cu.ispMode && cu.firstTU->cbf[ COMPONENT_Y ] == 0 ) ) )
{
continue;
}
useIntraSubPartitions = cu.ispMode != NOT_INTRA_SUBPARTITIONS;
#if JVET_O0050_LOCAL_DUAL_TREE
if( !partitioner.isSepTree( *tempCS ) )
#else
if( !CS::isDualITree( *tempCS ) )
#endif
{
tempCS->lumaCost = m_pcRdCost->calcRdCost( tempCS->fracBits, tempCS->dist );
if( useIntraSubPartitions )
{
//the difference between the best cost so far and the current luma cost is stored to avoid testing the Cr component if the cost of luma + Cb is larger than the best cost
maxCostAllowedForChroma = bestCS->cost < MAX_DOUBLE ? bestCS->cost - tempCS->lumaCost : MAX_DOUBLE;
}
}
if (m_pcEncCfg->getUsePbIntraFast() && tempCS->dist == std::numeric_limits<Distortion>::max()
&& tempCS->interHad == 0)
{
interHad = 0;
// JEM assumes only perfect reconstructions can from now on beat the inter mode
m_modeCtrl->enforceInterHad( 0 );
continue;
}
#if JVET_O0050_LOCAL_DUAL_TREE
if( !partitioner.isSepTree( *tempCS ) )
#else
if( !CS::isDualITree( *tempCS ) )
#endif
{//如果不是I帧,则将重建的YUV数据复制到亮度分量中,以备下面色度预测使用;
cu.cs->picture->getRecoBuf( cu.Y() ).copyFrom( cu.cs->getRecoBuf( COMPONENT_Y ) );
cu.cs->picture->getPredBuf(cu.Y()).copyFrom(cu.cs->getPredBuf(COMPONENT_Y));
}
}
//色度块的帧内预测
#if JVET_O0050_LOCAL_DUAL_TREE
if( tempCS->area.chromaFormat != CHROMA_400 && ( partitioner.chType == CHANNEL_TYPE_CHROMA || !cu.isSepTree() ) )//如果是色度分量或者是B SLICE或者是P SLICE
#else
if( tempCS->area.chromaFormat != CHROMA_400 && ( partitioner.chType == CHANNEL_TYPE_CHROMA || !CS::isDualITree( *tempCS ) ) )
#endif
{//选择当前块的色度最优预测模式,I帧是先亮度后色度预测,B帧和P帧是亮度色度同时进行(对于一个CTU来说)
TUIntraSubPartitioner subTuPartitioner( partitioner );
#if JVET_O0050_LOCAL_DUAL_TREE
m_pcIntraSearch->estIntraPredChromaQT( cu, ( !useIntraSubPartitions || ( cu.isSepTree() && !isLuma( CHANNEL_TYPE_CHROMA ) ) ) ? partitioner : subTuPartitioner, maxCostAllowedForChroma );
#else
m_pcIntraSearch->estIntraPredChromaQT( cu, ( !useIntraSubPartitions || ( CS::isDualITree( *cu.cs ) && !isLuma( CHANNEL_TYPE_CHROMA ) ) ) ? partitioner : subTuPartitioner, maxCostAllowedForChroma );
#endif
if( useIntraSubPartitions && !cu.ispMode )
{
//At this point the temp cost is larger than the best cost. Therefore, we can already skip the remaining calculations
continue;
}
}
cu.rootCbf = false;
for( uint32_t t = 0; t < getNumberValidTBlocks( *cu.cs->pcv ); t++ )
{
cu.rootCbf |= cu.firstTU->cbf[t] != 0;
}
// Get total bits for current mode: encode CU
m_CABACEstimator->resetBits();
if( pps.getTransquantBypassEnabledFlag() )
{
m_CABACEstimator->cu_transquant_bypass_flag( cu );
}
if ((!cu.cs->slice->isIntra() || cu.cs->slice->getSPS()->getIBCFlag())
&& cu.Y().valid()
)
{
m_CABACEstimator->cu_skip_flag ( cu );
}
m_CABACEstimator->pred_mode ( cu );
m_CABACEstimator->pcm_data ( cu, partitioner );
m_CABACEstimator->cu_pred_data ( cu );
m_CABACEstimator->bdpcm_mode ( cu, ComponentID(partitioner.chType) );
// Encode Coefficients
CUCtx cuCtx;
cuCtx.isDQPCoded = true;
cuCtx.isChromaQpAdjCoded = true;
m_CABACEstimator->cu_residual( cu, partitioner, cuCtx );
tempCS->fracBits = m_CABACEstimator->getEstFracBits();
tempCS->cost = m_pcRdCost->calcRdCost(tempCS->fracBits, tempCS->dist);//得到当前CU的总代价;
#if JVET_O0050_LOCAL_DUAL_TREE
double bestIspCost = cu.ispMode ? cu.isSepTree() ? tempCS->cost : tempCS->lumaCost : MAX_DOUBLE;
#else
double bestIspCost = cu.ispMode ? CS::isDualITree( *tempCS ) ? tempCS->cost : tempCS->lumaCost : MAX_DOUBLE;
#endif
const double tmpCostWithoutSplitFlags = tempCS->cost;
xEncodeDontSplit( *tempCS, partitioner );
xCheckDQP( *tempCS, partitioner );
// Check if low frequency non-separable transform (LFNST) is too expensive
#if JVET_O0472_LFNST_SIGNALLING_LAST_SCAN_POS
if( lfnstIdx && !cuCtx.lfnstLastScanPos )
{
#if JVET_O0050_LOCAL_DUAL_TREE
bool cbfAtZeroDepth = cu.isSepTree() ? cu.rootCbf : std::min( cu.firstTU->blocks[ 1 ].width, cu.firstTU->blocks[ 1 ].height ) < 4 ? TU::getCbfAtDepth( *cu.firstTU, COMPONENT_Y, 0 ) : cu.rootCbf;
#else
bool cbfAtZeroDepth = CS::isDualITree( *tempCS ) ? cu.rootCbf : std::min( cu.firstTU->blocks[ 1 ].width, cu.firstTU->blocks[ 1 ].height ) < 4 ? TU::getCbfAtDepth( *cu.firstTU, COMPONENT_Y, 0 ) : cu.rootCbf;
#endif
if( cbfAtZeroDepth )
{
tempCS->cost = MAX_DOUBLE;
}
}
#else
#if JVET_O0050_LOCAL_DUAL_TREE
const int nonZeroCoeffThr = cu.isSepTree() ? ( isLuma( partitioner.chType ) ? LFNST_SIG_NZ_LUMA : LFNST_SIG_NZ_CHROMA ) : LFNST_SIG_NZ_LUMA + LFNST_SIG_NZ_CHROMA;
#else
const int nonZeroCoeffThr = CS::isDualITree( *tempCS ) ? ( isLuma( partitioner.chType ) ? LFNST_SIG_NZ_LUMA : LFNST_SIG_NZ_CHROMA ) : LFNST_SIG_NZ_LUMA + LFNST_SIG_NZ_CHROMA;
#endif
if( lfnstIdx && cuCtx.numNonZeroCoeffNonTs <= nonZeroCoeffThr )
{
if (cuCtx.numNonZeroCoeffNonTs > 0)
{
tempCS->cost = MAX_DOUBLE;
}
}
#endif
if( mtsFlag == 0 && lfnstIdx == 0 )
{
dct2Cost = tempCS->cost;
}
if( tempCS->cost < bestCS->cost )
{
m_modeCtrl->setBestCostWithoutSplitFlags( tmpCostWithoutSplitFlags );
}
if( !mtsFlag ) static_cast< double& >( costSize2Nx2NmtsFirstPass ) = tempCS->cost;
if( sps.getUseLFNST() && !tempCS->cus.empty() )
{
skipOtherLfnst = m_modeCtrl->checkSkipOtherLfnst( encTestMode, tempCS, partitioner );
}
xCalDebCost( *tempCS, partitioner );
tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
#if WCG_EXT
DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) );
#else
DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() );
#endif
if( !sps.getUseLFNST() )
{
xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
}
else
{
if( xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ) )
{
trGrpBestCost[ trGrpIdx ] = globalBestCost = bestCS->cost;
bestSelFlag [ trGrpIdx ] = true;
bestMtsFlag = mtsFlag;
bestLfnstIdx = lfnstIdx;
if( bestCS->cus.size() == 1 )
{
CodingUnit &cu = *bestCS->cus.front();
if( cu.firstTU->mtsIdx == MTS_SKIP )
{
if( ( g_aucLog2[ cu.firstTU->blocks[ COMPONENT_Y ].width ] + g_aucLog2[ cu.firstTU->blocks[ COMPONENT_Y ].height ] ) >= 6 )
{
endLfnstIdx = 0;
}
}
}
}
#if JVET_O0502_ISP_CLEANUP
//we decide to skip the non-DCT-II transforms and LFNST according to the ISP results
if ((endMtsFlag > 0 || endLfnstIdx > 0) && cu.ispMode && !mtsFlag && !lfnstIdx && tempCS->slice->isIntra() && m_pcEncCfg->getUseFastISP())
#else
//we decide to skip the second emt pass or not according to the ISP results
if( considerMtsSecondPass && cu.ispMode && !mtsFlag && tempCS->slice->isIntra() )
#endif
{
double bestCostDct2NoIsp = m_modeCtrl->getMtsFirstPassNoIspCost();
CHECKD( bestCostDct2NoIsp <= bestIspCost, "wrong cost!" );
#if JVET_O0502_ISP_CLEANUP
double threshold = 1.4;
#else
double nSamples = ( double ) ( cu.lwidth() << g_aucLog2[ cu.lheight() ] );
double threshold = 1 + 1.4 / sqrt( nSamples );
#endif
double lfnstThreshold = 1.01 * threshold;
if( bestCostDct2NoIsp > bestIspCost*lfnstThreshold )
{
endLfnstIdx = lfnstIdx;//跳过剩下的LFNST
}
if( bestCostDct2NoIsp > bestIspCost*threshold )//跳过第二阶段的MTS
{
skipSecondMtsPass = true;
m_modeCtrl->setSkipSecondMTSPass( true );
break;
}
}
//now we check whether the second pass of SIZE_2Nx2N and the whole Intra SIZE_NxN should be skipped or not
if( !mtsFlag && !tempCS->slice->isIntra() && bestCU && bestCU->predMode != MODE_INTRA )
{
const double thEmtInterFastSkipIntra = 1.4; // Skip checking Intra if "2Nx2N using DCT2" is worse than best Inter mode
if( costSize2Nx2NmtsFirstPass > thEmtInterFastSkipIntra * bestInterCost )
{
skipSecondMtsPass = true;
m_modeCtrl->setSkipSecondMTSPass( true );
break;
}
}
}
} //for emtCuFlag
if( skipOtherLfnst )//跳过其他的lfnst变换
{
startLfnstIdx = lfnstIdx;
endLfnstIdx = lfnstIdx;
break;
}
} //for lfnstIdx
} //if (!skipSecondMtsPass && considerMtsSecondPass && trGrpCheck[iGrpIdx])
if( sps.getUseLFNST() && trGrpIdx < 3 )
{
trGrpCheck[ trGrpIdx + 1 ] = false;//用来判断trGrpIdx+1对应的变换核是否可以进行;才开始都初始化为true,再判断满足条件之前要置为false
if( bestSelFlag[ trGrpIdx ] && considerMtsSecondPass )
{
double dCostRatio = dct2Cost / trGrpBestCost[ trGrpIdx ];
//若最优代价比DCT2变换的代价小,且相差的部分在一定范围内则trGrpCheck[ trGrpIdx + 1 ] = true,
//会选择trGrpIdx+1对应的变换:(因为startMTSIdx[ trGrpIdx ]、endMTSIdx[ trGrpIdx ],对应xRecurIntraCodingLumaQT中的mtsFirstCheckId、mtsLastCheckId)
trGrpCheck[ trGrpIdx + 1 ] = ( bestMtsFlag != 0 || bestLfnstIdx != 0 ) && dCostRatio < trGrpStopThreshold[ trGrpIdx ];
}
}
} //trGrpIdx
}
在bool IntraSearch::xRecurIntraCodingLumaQT里真正的选定变换核,为接下来的变换做准备:
我使用AI配置时,最终都会进入到下面这段代码中确定变换核
//使用低频不可分变换(LFNST)
if (sps.getUseLFNST())
{
if (cu.mtsFlag)
{
if (moreProbMTSIdxFirst) //由trGrpIdx>0控制
{
const ChannelType chType = toChannelType(COMPONENT_Y);
const CompArea & area = tu.blocks[COMPONENT_Y];
const PredictionUnit &pu = *cs.getPU(area.pos(), chType);
uint32_t uiIntraMode = pu.intraDir[chType];
if (transformIndex == 1)
{
tu.mtsIdx = (uiIntraMode < 34) ? MTS_DST7_DCT8 : MTS_DCT8_DST7;
}
else if (transformIndex == 2)
{
tu.mtsIdx = (uiIntraMode < 34) ? MTS_DCT8_DST7 : MTS_DST7_DCT8;
}
else
{
tu.mtsIdx = MTS_DST7_DST7 + transformIndex;
}
}
else
{
tu.mtsIdx = MTS_DST7_DST7 + transformIndex;
}
}
else
{
tu.mtsIdx = transformIndex;
}
3、总结
这是我写的第一篇博客,写的有点混乱,自己也还是个小白,要是有什么错误的话大家可以直接指出来。希望能和大家一起探讨学习!