一、帧内的CU、PU、TU
CU:编码单元,H.265中帧内的CU最大为64x64,最小为8x8,并且只能是方形块
PU:预测单元,H.265种PU有两种类型:
- SIZE_2Nx2N:进行预测时候不需要对当前CU进一步划分,PU尺寸等于CU尺寸
- SIZE_NxN:只有8x8的CU具有该划分方式,即进行预测时将当前CU使用四叉树递归划分为4个4x4的子PU
TU:变换单元,支持32x32、16x16、8x8、4x4
PU和TU的关系:由于PU和TU都是直接由CU划分得到,因此二者大小没有确定的关系,一个PU可以包含多个包含多个TU,一个TU可以跨越多个PU,但是二者大小都必须小于CU。对于帧内编码,由于相邻PU之间存在依赖关系,即当前PU进行预测时候需要参考相邻已经编码的PU,因此一个PU可以包含多个TU,但是一个TU最多只能对应一个PU。
二、x265z中帧内块划分流程
x265中,是在compressIntraCU函数中对帧内块进行块划分的RDO过程的。在划分时候,是通过递归执行的,并且H.265中仅支持四叉树划分,以如下图所示为例。
64x64的CU划分流程如下所示:
- 从根64x64CU开始进行划分,通过四叉树划分获得第一个32x32的CU
- 对于第一个32x32的CU,先通过调用checkIntra函数进行帧内预测模式的RDO,并计算RD Cost;将该32x32的CU进行四叉树划分获得4个16x16的CU
- 对于第一个16x16的CU,先通过调用checkIntra函数进行帧内预测模式的RDO,并计算RD Cost;将该16x16的CU进行四叉树划分获得4个8x8的CU
- 对于四个8x8的CU,分别对每一个8x8CU调用checkIntra函数计算RD Cost
- 返回到第三步中的16x16的CU,将其不进行四叉树划分所得的RD Cost和第四步得到的RD Cost进行比较,两者的比较结果决定了该16x16的CU是否划分为4个8x8的CU
- 用同样的方法,比较第二个、第三个和第四个的16x16的CU,并将这四个16x16CU的最优的RD Cost累加起来
- 返回到第二步中的32x32的CU,比较第一个32x32CU的RD Cost和第6中获得的四个16x16RD Cost累加和,从而决定对该32x32CU进行四叉树划分
- 同理,计算第二个、第三个和第四个32x32CU的最优RD Cost,决定是否对其进行四叉树划分
(注意:这里帧内的CU理论上最大应该是64x64,但是我从x265中的代码理解并没有64x64的帧内CU,希望有大佬可以指正)
帧内预测模式的RDO参考:https://blog.csdn.net/BigDream123/article/details/112383895
代码及注释如下
uint64_t Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp)
{
uint32_t depth = cuGeom.depth;
ModeDepth& md = m_modeDepth[depth];
md.bestMode = NULL;
bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
bool bAlreadyDecided = m_param->intraRefine != 4 && parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX && !(m_param->bAnalysisType == HEVC_INFO);
bool bDecidedDepth = m_param->intraRefine != 4 && parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
int split = 0;
if (m_param->intraRefine && m_param->intraRefine != 4)
{
split = m_param->scaleFactor && bDecidedDepth && (!mightNotSplit ||
((cuGeom.log2CUSize == (uint32_t)(g_log2Size[m_param->minCUSize] + 1))));
if (cuGeom.log2CUSize == (uint32_t)(g_log2Size[m_param->minCUSize]) && !bDecidedDepth)
bAlreadyDecided = false;
}
if (bAlreadyDecided)
{
if (bDecidedDepth && mightNotSplit)
{
Mode& mode = md.pred[0];
md.bestMode = &mode;
mode.cu.initSubCU(parentCTU, cuGeom, qp);
bool reuseModes = !((m_param->intraRefine == 3) ||
(m_param->intraRefine == 2 && parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] > DC_IDX));
if (reuseModes)
{
memcpy(mode.cu.m_lumaIntraDir, parentCTU.m_lumaIntraDir + cuGeom.absPartIdx, cuGeom.numPartitions);
memcpy(mode.cu.m_chromaIntraDir, parentCTU.m_chromaIntraDir + cuGeom.absPartIdx, cuGeom.numPartitions);
}
checkIntra(mode, cuGeom, (PartSize)parentCTU.m_partSize[cuGeom.absPartIdx]);
if (m_bTryLossless)
tryLossless(cuGeom);
if (mightSplit)
addSplitFlagCost(*md.bestMode, cuGeom.depth);
}
}
else if (cuGeom.log2CUSize != MAX_LOG2_CU_SIZE && mightNotSplit)
{ // 如果当前尺寸不等于最大CU尺寸(64x64)且可能不会继续划分,则开始选择预测模式
md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N);
checkBestMode(md.pred[PRED_INTRA], depth);
if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3)
{ // 如果当前CU尺寸为8x8,则计算将CU划分为4个4x4 PU进行预测所需的RD Cost
md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom, qp);
checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN);
checkBestMode(md.pred[PRED_INTRA_NxN], depth);
}
if (m_bTryLossless)
tryLossless(cuGeom);
if (mightSplit)
addSplitFlagCost(*md.bestMode, cuGeom.depth);
}
// stop recursion if we reach the depth of previous analysis decision
// 如果我们达到先前分析决策的深度,停止递归
mightSplit &= !(bAlreadyDecided && bDecidedDepth) || split;
if (mightSplit)
{ //如果可能继续划分,则进行递归划分
Mode* splitPred = &md.pred[PRED_SPLIT];
splitPred->initCosts();
CUData* splitCU = &splitPred->cu;
splitCU->initSubCU(parentCTU, cuGeom, qp);
uint32_t nextDepth = depth + 1;
ModeDepth& nd = m_modeDepth[nextDepth];
invalidateContexts(nextDepth);
Entropy* nextContext = &m_rqt[depth].cur;
int32_t nextQP = qp;
uint64_t curCost = 0;
int skipSplitCheck = 0;
for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
{
const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
if (childGeom.flags & CUGeom::PRESENT)
{
m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);
m_rqt[nextDepth].cur.load(*nextContext);
if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom));
if (m_param->bEnableSplitRdSkip)
{
curCost += compressIntraCU(parentCTU, childGeom, nextQP);
// 如果当前划分的CU的RD Cost大于总的RD Cost,则停止划分
if (m_modeDepth[depth].bestMode && curCost > m_modeDepth[depth].bestMode->rdCost)
{
skipSplitCheck = 1;
break;
}
}
else
compressIntraCU(parentCTU, childGeom, nextQP);
// Save best CU and pred data for this sub CU
splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
splitPred->addSubCosts(*nd.bestMode);
nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);
nextContext = &nd.bestMode->contexts;
}
else
{
/* record the depth of this non-present sub-CU */
splitCU->setEmptyPart(childGeom, subPartIdx);
/* Set depth of non-present CU to 0 to ensure that correct CU is fetched as reference to code deltaQP */
if (bAlreadyDecided)
memset(parentCTU.m_cuDepth + childGeom.absPartIdx, 0, childGeom.numPartitions);
}
}
if (!skipSplitCheck)
{
nextContext->store(splitPred->contexts);
if (mightNotSplit)
addSplitFlagCost(*splitPred, cuGeom.depth);
else
updateModeCost(*splitPred);
checkDQPForSplitPred(*splitPred, cuGeom);
checkBestMode(*splitPred, depth);
}
}
if (m_param->bEnableRdRefine && depth <= m_slice->m_pps->maxCuDQPDepth)
{
int cuIdx = (cuGeom.childOffset - 1) / 3;
cacheCost[cuIdx] = md.bestMode->rdCost;
}
if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
{
CUData* ctu = md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr);
int8_t maxTUDepth = -1;
for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
maxTUDepth = X265_MAX(maxTUDepth, md.bestMode->cu.m_tuDepth[i]);
ctu->m_refTuDepth[cuGeom.geomRecurId] = maxTUDepth;
}
/* Copy best data to encData CTU and recon */
md.bestMode->cu.copyToPic(depth);
if (md.bestMode != &md.pred[PRED_SPLIT])
md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx);
return md.bestMode->rdCost;
}