此系列是为了记录自己学习VTM10.0的过程,目前正在看编码端。主要的参考文档有JVET-S2001-vH和JVET-S2002-v1。由于本人水平有限,出现的错误恳请大家指正,欢迎与大家一起交流进步。
上一篇博文(VTM10.0代码学习14)留了两个计算RDcost的函数没讲,一个是xIntraCodingLumaISP(),另一个是xRecurIntraCodingLumaQT()。前一个是计算开启ISP时亮度的RDcost,后一个是计算不开启ISP时亮度RDcost,接下来就详细说说它们
1. xIntraCodingLumaISP
int subTuCounter = 0;//TU计数器
const CodingUnit& cu = *cs.getCU(partitioner.currArea().lumaPos(), partitioner.chType);
bool earlySkipISP = false;//是否为提早结束ISP测试
bool splitCbfLuma = false;//为false表示所有TU的cbf都为0,为true表示至少有一个TU的cbf不为0
const PartSplit ispType = CU::getISPType(cu, COMPONENT_Y);//表示ISP是垂直还是水平划分
cs.cost = 0;
partitioner.splitCurrArea(ispType, cs);//进行ISP划分
CUCtx cuCtx;
cuCtx.isDQPCoded = true;
cuCtx.isChromaQpAdjCoded = true;
subTuCounter:TU计数器
earlySkipISP:是否为提早结束ISP测试
splitCbfLuma:为false表示所有TU的cbf都为0,为true表示至少有一个TU的cbf不为0
ispType:表示ISP是垂直还是水平划分
splitCurrArea:进行ISP划分
do // subpartitions loop
{
} while (partitioner.nextPart(cs)); // subpartitions loop
循环遍历ISP划分后的TU,对每个TU进行RDcost计算,具体参考1.1小节
partitioner.exitCurrSplit();
const UnitArea& currArea = partitioner.currArea();
const uint32_t currDepth = partitioner.currTrDepth;
if (earlySkipISP)
{
cs.cost = MAX_DOUBLE;
}
else
{
cs.cost = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist);
// The cost check is necessary here again to avoid superfluous operations if the maximum number of coded subpartitions was reached and yet ISP did not win
if (cs.cost < bestCostSoFar)
{
cs.setDecomp(cu.Y());//设置m_isDecomp
cs.picture->getRecoBuf(currArea.Y()).copyFrom(cs.getRecoBuf(currArea.Y()));
//设置cbf
for (auto& ptu : cs.tus)
{
if (currArea.Y().contains(ptu->Y()))
{
TU::setCbfAtDepth(*ptu, COMPONENT_Y, currDepth, splitCbfLuma ? 1 : 0);
}
}
}
else
{
earlySkipISP = true;
}
}
if分支:如果是提早结束ISP测试的情况,此次测试无效。成功的话就要设置m_isDecomp、拷贝重建值、设置cbf
1.1 for循环
uint32_t numSig = 0;
Distortion singleDistTmpLuma = 0;//单个TU的亮度分量的SSE失真
uint64_t singleTmpFracBits = 0;//单个TU的亮度分量花费的比特数
double singleCostTmp = 0;//单个TU的RDcost
TransformUnit& tu = cs.addTU(CS::getArea(cs, partitioner.currArea(), partitioner.chType), partitioner.chType);
tu.depth = partitioner.currTrDepth;
singleDistTmpLuma:单个TU的亮度分量的SSE失真
singleTmpFracBits:单个TU的亮度分量花费的比特数
singleCostTmp:单个TU的RDcost
xIntraCodingTUBlock(tu, COMPONENT_Y, singleDistTmpLuma, 0, &numSig);
对亮度进行完整的编解码流程(除了熵编解码),计算SSE失真,具体参考第3大节
if (singleDistTmpLuma == MAX_INT) // all zero CBF skip
{
earlySkipISP = true;
partitioner.exitCurrSplit();
cs.cost = MAX_DOUBLE;
return false;
}
if (m_pcRdCost->calcRdCost(cs.fracBits, cs.dist + singleDistTmpLuma) > bestCostSoFar)
{
// The accumulated cost + distortion is already larger than the best cost so far, so it is not necessary to
// calculate the rate
earlySkipISP = true;
}
else
{
singleTmpFracBits = xGetIntraFracBitsQT(cs, partitioner, true, false, subTuCounter, ispType, &cuCtx);
}
第一个if分支:计算失真失败,提早结束此函数
第二个if分支:如果把先前的R和先前的D加上当前TU的D进行RDcost计算,如果已经超过目前的亮度最佳RDcost,就提前结束ISP测试。否则就计算当前TU所花费的比特数
singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
cs.cost += singleCostTmp;
cs.dist += singleDistTmpLuma;
cs.fracBits += singleTmpFracBits;
subTuCounter++;
splitCbfLuma |= TU::getCbfAtDepth(*cs.getTU(partitioner.currArea().lumaPos(), partitioner.chType, subTuCounter - 1), COMPONENT_Y, partitioner.currTrDepth);
int nSubPartitions = m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1];
更新一些变量
nSubPartitions:ISP划分后的TU数量
if (subTuCounter < nSubPartitions)
{
// exit condition if the accumulated cost is already larger than the best cost so far (no impact in RD performance)
if (cs.cost > bestCostSoFar)
{
earlySkipISP = true;
break;
}
else if (subTuCounter < nSubPartitions)
{
// more restrictive exit condition
double threshold = nSubPartitions == 2 ? 0.95 : subTuCounter == 1 ? 0.83 : 0.91;
if (subTuCounter < nSubPartitions && cs.cost > bestCostSoFar * threshold)
{
earlySkipISP = true;
break;
}
}
}
检查是否要提前结束ISP测试
2. xRecurIntraCodingLumaQT
int subTuCounter = subTuIdx;//一直为-1
const UnitArea &currArea = partitioner.currArea();
const CodingUnit &cu = *cs.getCU( currArea.lumaPos(), partitioner.chType );
uint32_t currDepth = partitioner.currTrDepth;
const SPS &sps = *cs.sps;
bool bCheckFull = true;//为true表示对当前块进行RDcost计算
bool bCheckSplit = false;//为true表示对当前块进一步划分,递归调用函数xRecurIntraCodingLumaQT()
bCheckFull = !partitioner.canSplit( TU_MAX_TR_SPLIT, cs );
bCheckSplit = partitioner.canSplit( TU_MAX_TR_SPLIT, cs );
const Slice &slice = *cs.slice;
subTuCounter:一直为-1,不用管
bCheckFull:为true表示对当前块进行RDcost计算
bCheckSplit:为true表示对当前块进一步划分,递归调用函数xRecurIntraCodingLumaQT()
之后就会分直接计算RDcost和进一步划分两种情况细说
uint32_t numSig = 0;
double dSingleCost = MAX_DOUBLE;//最佳变换的RDcost
Distortion uiSingleDistLuma = 0;//最佳变换的失真
uint64_t singleFracBits = 0;//最佳变换所花费的比特数
bool checkTransformSkip = sps.getTransformSkipEnabledFlag();//是否测试开启变换skip
int bestModeId[ MAX_NUM_COMPONENT ] = {
0, 0, 0 };//最佳变换的MTSIdx
uint8_t nNumTransformCands = cu.mtsFlag ? 4 : 1;
uint8_t numTransformIndexCands = nNumTransformCands;
const TempCtx ctxStart ( m_CtxCache, m_CABACEstimator->getCtx() );
TempCtx ctxBest ( m_CtxCache );
CodingStructure *csSplit = nullptr;
CodingStructure