H.266/VVC-VTM代码学习20-CU层进行RDO函数xCompressCU

最新推荐文章于 2022-11-04 10:21:01 发布
liaojq2020
最新推荐文章于 2022-11-04 10:21:01 发布
阅读量2.3k
点赞数 5
分类专栏： H.266/VCC代码学习文章标签：视频处理视频编码 VVC VTM 调制与编码策略
本文链接：https://blog.csdn.net/qq_43616471/article/details/115765202
版权
H.266/VCC代码学习专栏收录该内容
27 篇文章
订阅专栏
本文详细介绍了H.266/VVC编码标准参考软件VTM中CU（编码单元）层的压缩流程，包括函数xCompressCU的功能、工作原理及RDO（率失真优化）过程。文章通过分析代码展示了如何遍历不同的测试模式，如INTER、INTRA、MERGE、SPLIT等，并进行成本计算，以选取最优编码策略。同时，还探讨了色彩空间转换、子块变换（SBT）以及色度QP调整等特性在编码过程中的应用。
摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >
H.266/VVC专栏传送

上一篇：H.266/VVC-VTM代码学习19-CU层确定测试模式函数initCULevel
下一篇：H.266/VVC-VTM代码学习21-对划分模式进行RDO的函数xCheckModeSplit
前言

VTM是H.266/VVC视频编码标准的参考软件，研究VTM代码给研究人员解释了VVC编码标准的详细标准规范与细节。
本文是笔者对VTM代码的一点学习记录，成文于笔者刚开始接触VVC期间，期间很多概念和理论框架还很不成熟，若文中存在错误欢迎批评指正，也欢迎广大视频编码学习者沟通交流、共同进步。
VTM代码的下载及编译请参考博文：
【视频编码学习】H.266/VVC参考软件VTM配置运行(VTM-6.0版本)
一、函数作用

函数 xCompressCU 在 VTM10.0 的 EncCu.cpp 中，主要负责遍历 m_ComprCUCtxList 列表并调用相应的函数进行 RDO。通过了解、调整这部分代码，可以调整 encoder 进行 RD 测试的 mode、split、QP 等，从而实现提升编码性能。
建立 m_ComprCUCtxList 列表的过程请查阅 H.266/VVC-VTM代码学习19-CU层确定测试模式函数initCULevel
Intra 模式具体的 RDO 过程请查阅 H.266/VVC-VTM代码学习-帧内预测11-编码端亮度块模式选择estIntraPredLumaQT函数
二、函数详解

void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Partitioner& partitioner, double maxCostAllowed )
{
  CHECK(maxCostAllowed < 0, "Wrong value of maxCostAllowed!");
#if ENABLE_SPLIT_PARALLELISM
  CHECK( m_dataId != tempCS->picture->scheduler.getDataId(), "Working in the wrong dataId!" );

  if( m_pcEncCfg->getNumSplitThreads() != 1 && tempCS->picture->scheduler.getSplitJobId() == 0 )
  {
    if( m_modeCtrl->isParallelSplit( *tempCS, partitioner ) )
    {
      m_modeCtrl->setParallelSplit( true );
      xCompressCUParallel( tempCS, bestCS, partitioner );
      return;
    }
  }

#endif
  uint32_t compBegin;
  uint32_t numComp;
  bool jointPLT = false;
  if (partitioner.isSepTree( *tempCS ))
  {
    // 若当前不是 DualTree 且划分树类型不是 TREE_D
    // 即为 joint tree 时
    // 则起始分量为 Y，总分量数400时为1，否则为3，使用 jointPLT
    if( !CS::isDualITree(*tempCS) && partitioner.treeType != TREE_D )
    {
      compBegin = COMPONENT_Y;
      numComp = (tempCS->area.chromaFormat != CHROMA_400)?3: 1;
      jointPLT = true;
    }
    else
    {
    // 若当前是 DualTree 且划分树类型是 TREE_D 且当前为亮度通道时
    // 即当前是 TREE_L
    // 则起始分量为 Y，总分量数为1
    if (isLuma(partitioner.chType))
    {
      compBegin = COMPONENT_Y;
      numComp = 1;
    }
    // 若当前是 DualTree 且划分树类型是 TREE_D 且当前为色度通道时
    // 即当前是 TREE_C
    // 则起始分量为 Cb，总分量数为2
    else
    {
      compBegin = COMPONENT_Cb;
      numComp = 2;
    }
    }
  }
  // 若当前不是 DualTree 且划分树类型是 TREE_D 时
  // 即为 joint tree 时
  // 则起始分量为 Y，总分量数400时为1，否则为3，使用 jointPLT
  else
  {
    compBegin = COMPONENT_Y;
    numComp = (tempCS->area.chromaFormat != CHROMA_400) ? 3 : 1;
    jointPLT = true;
  }
  SplitSeries splitmode = -1;
  // 以下是用于存储 PLT mode 的相关变量
  uint8_t   bestLastPLTSize[MAX_NUM_CHANNEL_TYPE];
  Pel       bestLastPLT[MAX_NUM_COMPONENT][MAXPLTPREDSIZE]; // store LastPLT for
  uint8_t   curLastPLTSize[MAX_NUM_CHANNEL_TYPE];
  Pel       curLastPLT[MAX_NUM_COMPONENT][MAXPLTPREDSIZE]; // store LastPLT if no partition
  // 遍历所有分量，为 PLT mode 的存储变量赋值
  for (int i = compBegin; i < (compBegin + numComp); i++)
  {
    ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
    bestLastPLTSize[comID] = 0;
    curLastPLTSize[comID] = tempCS->prevPLT.curPLTSize[comID];
    memcpy(curLastPLT[i], tempCS->prevPLT.curPLT[i], tempCS->prevPLT.curPLTSize[comID] * sizeof(Pel));
  }

  // 当前 slice
  Slice&   slice      = *tempCS->slice;
  // 当前 pps
  const PPS &pps      = *tempCS->pps;
  // 当前 sps
  const SPS &sps      = *tempCS->sps;
  // 当前块的左上角x
  const uint32_t uiLPelX  = tempCS->area.Y().lumaPos().x;
  // 当前块的左上角y
  const uint32_t uiTPelY  = tempCS->area.Y().lumaPos().y;

  // 父结点的模式类型（MODE_TYPE_ALL / MODE_TYPE_INTER / MODE_TYPE_INTRA）
  const ModeType modeTypeParent  = partitioner.modeType;
  // 父结点的划分树类型（TREE_D / TREE_L / TREE_C）
  const TreeType treeTypeParent  = partitioner.treeType;
  // 父结点的通道类型（CHANNEL_TYPE_LUMA / CHANNEL_TYPE_CHROMA）
  const ChannelType chTypeParent = partitioner.chType;
  // 获取当前区域
  const UnitArea currCsArea = clipArea( CS::getArea( *bestCS, bestCS->area, partitioner.chType ), *tempCS->picture );

  // CU 层初始化（获取 CU 层 RDO 的模式、划分等）
  m_modeCtrl->initCULevel( partitioner, *tempCS );
  // 对使用子块变换（sub-block transform,SBT）情况的初始化
  if( partitioner.currQtDepth == 0 && partitioner.currMtDepth == 0 && !tempCS->slice->isIntra() && ( sps.getUseSBT() || sps.getUseInterMTS() ) )
  {
    auto slsSbt = dynamic_cast<SaveLoadEncInfoSbt*>( m_modeCtrl );
    int maxSLSize = sps.getUseSBT() ? tempCS->slice->getSPS()->getMaxTbSize() : MTS_INTER_MAX_CU_SIZE;
    slsSbt->resetSaveloadSbt( maxSLSize );
#if ENABLE_SPLIT_PARALLELISM
    CHECK( tempCS->picture->scheduler.getSplitJobId() != 0, "The SBT search reset need to happen in sequential region." );
    if (m_pcEncCfg->getNumSplitThreads() > 1)
    {
      for (int jId = 1; jId < NUM_RESERVERD_SPLIT_JOBS; jId++)
      {
        auto slsSbt = dynamic_cast<SaveLoadEncInfoSbt *>(m_pcEncLib->getCuEncoder(jId)->m_modeCtrl);
        slsSbt->resetSaveloadSbt(maxSLSize);
      }
    }
#endif
  }
  // 用于存储子块变换（sub-block transform,SBT）模式的 cost
  m_sbtCostSave[0] = m_sbtCostSave[1] = MAX_DOUBLE;
  // 上下文
  m_CurrCtx->start = m_CABACEstimator->getCtx();
  // if 0, then cu_chroma_qp_offset_flag will be 0, otherwise cu_chroma_qp_offset_flag will be 1.
  m_cuChromaQpOffsetIdxPlus1 = 0;

  // 若使用色度QP调整
  if( slice.getUseChromaQpAdj() )
  {
    // TODO M0133 : double check encoder decisions with respect to chroma QG detection and actual encode
    int lgMinCuSize = sps.getLog2MinCodingBlockSize() +
      std::max<int>(0, floorLog2(sps.getCTUSize()) - sps.getLog2MinCodingBlockSize() - int(slice.getCuChromaQpOffsetSubdiv() / 2));
    if( partitioner.currQgChromaEnable() )
    {
      m_cuChromaQpOffsetIdxPlus1 = ( ( uiLPelX >> lgMinCuSize ) + ( uiTPelY >> lgMinCuSize ) ) % ( pps.getChromaQpOffsetListLen() + 1 );
    }
  }
  // 若 RDO 测试列表为空，则完成当前层次的 RDO，直接返回
  if( !m_modeCtrl->anyMode() )
  {
    m_modeCtrl->finishCULevel( partitioner );
    return;
  }
  // 以下是 RDO 前的初始化
  // 记录当前的位置、尺寸信息
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cux", uiLPelX ) );
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cuy", uiTPelY ) );
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cuw", tempCS->area.lwidth() ) );
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cuh", tempCS->area.lheight() ) );
  DTRACE( g_trace_ctx, D_COMMON, "@(%4d,%4d) [%2dx%2d]\n", tempCS->area.lx(), tempCS->area.ly(), tempCS->area.lwidth(), tempCS->area.lheight() );

  // 初始化 Affine（仿射运动补偿预测）模式的相关变量
  m_pcInterSearch->resetSavedAffineMotion();

  double bestIntPelCost = MAX_DOUBLE;

  // 若使用色彩变换
  // 当前CS与最佳CS的 cost 均初始化为最大值，且选中第一色彩空间
  if (tempCS->slice->getSPS()->getUseColorTrans())
  {
    tempCS->tmpColorSpaceCost = MAX_DOUBLE;
    bestCS->tmpColorSpaceCost = MAX_DOUBLE;
    tempCS->firstColorSpaceSelected = true;
    bestCS->firstColorSpaceSelected = true;
  }
  // 若使用色彩变换，且不是双重intra树时
  // 当前CS与最佳CS均设置不止测试第一色彩空间，且两色彩空间的 Intra cost 初始化为最大值
  if (tempCS->slice->getSPS()->getUseColorTrans() && !CS::isDualITree(*tempCS))
  {
    tempCS->firstColorSpaceTestOnly = false;
    bestCS->firstColorSpaceTestOnly = false;
    tempCS->tmpColorSpaceIntraCost[0] = MAX_DOUBLE;
    tempCS->tmpColorSpaceIntraCost[1] = MAX_DOUBLE;
    bestCS->tmpColorSpaceIntraCost[0] = MAX_DOUBLE;
    bestCS->tmpColorSpaceIntraCost[1] = MAX_DOUBLE;

    // 若当前 CS 的最佳父结点存在，且最佳父结点只测试第一色彩空间、
    // 则当前 CS 与最佳 CS 均只测试第一色彩空间
    if (tempCS->bestParent && tempCS->bestParent->firstColorSpaceTestOnly)
    {
      tempCS->firstColorSpaceTestOnly = bestCS->firstColorSpaceTestOnly = true;
    }
  }

  // 以下 do 循环开始遍历 m_ComprCUCtxList 中的测试模式（划分、模式、QP）
  do
  {
    // 遍历分量，将 PLT 的相关变量填充
    for (int i = compBegin; i < (compBegin + numComp); i++)
    {
      ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
      tempCS->prevPLT.curPLTSize[comID] = curLastPLTSize[comID];
      memcpy(tempCS->prevPLT.curPLT[i], curLastPLT[i], curLastPLTSize[comID] * sizeof(Pel));
    }
    // 当前测试模式
    EncTestMode currTestMode = m_modeCtrl->currTestMode();
    // 当前测试模式的允许最大 cost
    currTestMode.maxCostAllowed = maxCostAllowed;

    // 若使用 DQP 且是 SepTree 且当前为色度分量
    if (pps.getUseDQP() && partitioner.isSepTree(*tempCS) && isChroma( partitioner.chType ))
    {
      const Position chromaCentral(tempCS->area.Cb().chromaPos().offset(tempCS->area.Cb().chromaSize().width >> 1, tempCS->area.Cb().chromaSize().height >> 1));
      // 定位对应亮度分量参考的中心位置
      const Position lumaRefPos(chromaCentral.x << getComponentScaleX(COMPONENT_Cb, tempCS->area.chromaFormat), chromaCentral.y << getComponentScaleY(COMPONENT_Cb, tempCS->area.chromaFormat));
      const CodingStructure* baseCS = bestCS->picture->cs;
      const CodingUnit* colLumaCu = baseCS->getCU(lumaRefPos, CHANNEL_TYPE_LUMA);
      // 若对应亮度位置存在，则当前测试模式 QP 值设定为对应 bestCS 亮度位置的 QP 值
      if (colLumaCu)
      {
        currTestMode.qp = colLumaCu->qp;
      }
    }
    // 若当前 QG 可用，且使用亮度 delta QP 映射或（使用 perceptQPA，且不使用 rate control，且使用DQP） 
#if SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU
    if (partitioner.currQgEnable() && (
#if SHARP_LUMA_DELTA_QP
        (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()) ||
#endif
#if ENABLE_QPA_SUB_CTU
        (m_pcEncCfg->getUsePerceptQPA() && !m_pcEncCfg->getUseRateCtrl() && pps.getUseDQP())
#else
        false
#endif
      ))
    {
#if ENABLE_SPLIT_PARALLELISM
      CHECK( tempCS->picture->scheduler.getSplitJobId() > 0, "Changing lambda is only allowed in the master thread!" );
#endif
      // 若当前测试模式的 QP 值有效，则更新 lambda
      if (currTestMode.qp >= 0)
      {
        updateLambda (&slice, currTestMode.qp,
 #if WCG_EXT && ER_CHROMA_QP_WCG_PPS
                      m_pcEncCfg->getWCGChromaQPControl().isEnabled(),
 #endif
                      CS::isDualITree (*tempCS) || (partitioner.currDepth == 0));
      }
    }
#endif

    // 若当前测试模式是 INTER
    if( currTestMode.type == ETM_INTER_ME )
    {
      // imv parameter (bit 1-3) 不全为 0
      if( ( currTestMode.opts & ETO_IMV ) != 0 )
      {
        const bool skipAltHpelIF = ( int( ( currTestMode.opts & ETO_IMV ) >> ETO_IMV_SHIFT ) == 4 ) && ( bestIntPelCost > 1.25 * bestCS->cost );
        // 若 parameter 为 100，且 bestCS->cost * 1.25 < bestIntPelCost 时，检查 InterIMV 的 RD cost
        if (!skipAltHpelIF)
        {
          tempCS->bestCS = bestCS;
          xCheckRDCostInterIMV(tempCS, bestCS, partitioner, currTestMode, bestIntPelCost);
          tempCS->bestCS = nullptr;
        }
      }
      // imv parameter （bit 1-3）全为 0
      // 则检查 Inter 的 RD cost
      else
      {
        tempCS->bestCS = bestCS;
        xCheckRDCostInter( tempCS, bestCS, partitioner, currTestMode );
        tempCS->bestCS = nullptr;
      }

    }
    // 若当前测试模式是 HASH INTER
    // 则检查 HashInter 的 RD cost
    else if (currTestMode.type == ETM_HASH_INTER)
    {
      xCheckRDCostHashInter( tempCS, bestCS, partitioner, currTestMode );
    }
    // 若当前测试模式是 AFFINE
    // 则检查 Affine 的 RD cost
    else if( currTestMode.type == ETM_AFFINE )
    {
      xCheckRDCostAffineMerge2Nx2N( tempCS, bestCS, partitioner, currTestMode );
    }
#if REUSE_CU_RESULTS
    // 若当前测试模式是 RECO_CACHED
    // 则检查 Reuse Cached 的 RD cost
    else if( currTestMode.type == ETM_RECO_CACHED )
    {
      xReuseCachedResult( tempCS, bestCS, partitioner );
    }
#endif
    // 若当前测试模式是 MERGE_SKIP
    // 则检查 Merge 的 RD cost
    else if( currTestMode.type == ETM_MERGE_SKIP )
    {
      xCheckRDCostMerge2Nx2N( tempCS, bestCS, partitioner, currTestMode );
      CodingUnit* cu = bestCS->getCU(partitioner.chType);
      // 若 bestCS 的 cu 不使用 skip，则不使用 mmvd（Merge mode with MVD）
      if (cu)
      cu->mmvdSkip = cu->skip == false ? false : cu->mmvdSkip;
    }
    // 若当前测试模式是 MERGE_GEO
    // 则检查 Merge Geo 的 RD cost
    else if( currTestMode.type == ETM_MERGE_GEO )
    {
      xCheckRDCostMergeGeo2Nx2N( tempCS, bestCS, partitioner, currTestMode );
    }
    // 若当前测试模式是 INTRA
    else if( currTestMode.type == ETM_INTRA )
    {
      // 若使用色彩变换，且不是 Intra Dual Tree
      if (slice.getSPS()->getUseColorTrans() && !CS::isDualITree(*tempCS))
      {
        // 不跳过第二色彩空间
        bool skipSecColorSpace = false;
        // 检查 Intra 的 RD cost，且使用 RGB 格式时才使用色彩变换
        skipSecColorSpace = xCheckRDCostIntra(tempCS, bestCS, partitioner, currTestMode, (m_pcEncCfg->getRGBFormatFlag() ? true : false));
        // 若 cost 模式是 COST_LOSSLESS_CODING，且 slice 层 lossless 有效，且不使用 RGB 模式
        // 则跳过第二色彩空间
        if ((m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && slice.isLossless()) && !m_pcEncCfg->getRGBFormatFlag())
        {
          skipSecColorSpace = true;
        }
        // 若不跳过第二色彩空间，且不止测试第一色彩空间
        // 则检查 Intra 的 RD cost，且不使用 RGB 格式时才使用色彩变换
        if (!skipSecColorSpace && !tempCS->firstColorSpaceTestOnly)
        {
          xCheckRDCostIntra(tempCS, bestCS, partitioner, currTestMode, (m_pcEncCfg->getRGBFormatFlag() ? false : true));
        }
        // 若不止测试第一色彩空间
        if (!tempCS->firstColorSpaceTestOnly)
        {
          // 若当前两个色彩空间存储的 intra cost 均不为正无穷
          if (tempCS->tmpColorSpaceIntraCost[0] != MAX_DOUBLE && tempCS->tmpColorSpaceIntraCost[1] != MAX_DOUBLE)
          {
            // 使用 RGB：1.1     不使用 RGB：1.0
            double skipCostRatio = m_pcEncCfg->getRGBFormatFlag() ? 1.1 : 1.0;
            // 若当前测试结果中第二色彩空间的 cost 值 > 第一色彩空间的 cost 值 * ratio
            // 则仅测试第一色彩空间
            if (tempCS->tmpColorSpaceIntraCost[1] > (skipCostRatio*tempCS->tmpColorSpaceIntraCost[0]))
            {
              tempCS->firstColorSpaceTestOnly = bestCS->firstColorSpaceTestOnly = true;
            }
          }
        }
        // 若只测试第一色彩空间
        else
        {
          // 第二色彩空间的 cost 不为正无穷则应当报错
          CHECK(tempCS->tmpColorSpaceIntraCost[1] != MAX_DOUBLE, "the RD test of the second color space should be skipped");
        }
      }
      // 若不使用色彩变换，或是 Intra Dual Tree
      else
      {
        // 不使用色彩变换的情况下测试 Intra RD cost
        xCheckRDCostIntra(tempCS, bestCS, partitioner, currTestMode, false);
      }
    }
    // 若当前测试模式是 PALETTE
    // 则检查 PLT 的 RD cost
    else if (currTestMode.type == ETM_PALETTE)
    {
      xCheckPLT( tempCS, bestCS, partitioner, currTestMode );
    }
    // 若当前测试模式是 IBC（Intra block copy）
    // 则检查 IBC 的 RD cost
    else if (currTestMode.type == ETM_IBC)
    {
      xCheckRDCostIBCMode(tempCS, bestCS, partitioner, currTestMode);
    }
    // 若当前测试模式是 IBC_MERGE
    // 则检查 IBC_MERGE 的 RD cost
    else if (currTestMode.type == ETM_IBC_MERGE)
    {
      xCheckRDCostIBCModeMerge2Nx2N(tempCS, bestCS, partitioner, currTestMode);
    }
    // 若当前测试模式划分模式（ETM_SPLIT_QT、ETM_SPLIT_BT_H、ETM_SPLIT_BT_V、ETM_SPLIT_TT_H、ETM_SPLIT_TT_V）
    else if( isModeSplit( currTestMode ) )
    {
      // 若当前 bestCS 的 cu 数量不为 0
      if (bestCS->cus.size() != 0)
      {
        // 将划分模式设定为 bestCS 指定模式
        splitmode = bestCS->cus[0]->splitSeries;
      }
      assert( partitioner.modeType == tempCS->modeType );
      // 获得当前信号模式值（LDT_MODE_TYPE_INHERIT、LDT_MODE_TYPE_INFER、LDT_MODE_TYPE_SIGNAL）
      int signalModeConsVal = tempCS->signalModeCons( getPartSplit( currTestMode ), partitioner, modeTypeParent );
      // 若当前信号模式值为 LDT_MODE_TYPE_SIGNAL（Need to signal mode_constraint_flag, and the modeType of the region is determined by the flag），则 RDO 轮次为2，否则为1
      int numRoundRdo = signalModeConsVal == LDT_MODE_TYPE_SIGNAL ? 2 : 1;
      bool skipInterPass = false;
      // 遍历 RDO 轮次
      for( int i = 0; i < numRoundRdo; i++ )
      {
        //change cons modes
        // 若当前信号模式值为 LDT_MODE_TYPE_SIGNAL
        // 则当前测试模式类型在第一轮设置为 INTER，在第二轮设置为 INTRA
        if( signalModeConsVal == LDT_MODE_TYPE_SIGNAL )
        {
          CHECK( numRoundRdo != 2, "numRoundRdo shall be 2 - [LDT_MODE_TYPE_SIGNAL]" );
          tempCS->modeType = partitioner.modeType = (i == 0) ? MODE_TYPE_INTER : MODE_TYPE_INTRA;
        }
        // 若当前信号模式值为 LDT_MODE_TYPE_INFER
        // 则当前测试模式类型为 INTRA
        else if( signalModeConsVal == LDT_MODE_TYPE_INFER )
        {
          CHECK( numRoundRdo != 1, "numRoundRdo shall be 1 - [LDT_MODE_TYPE_INFER]" );
          tempCS->modeType = partitioner.modeType = MODE_TYPE_INTRA;
        }
        // 若当前信号模式值为 LDT_MODE_TYPE_INHERIT
        // 则当前测试模式类型为父结点模式类型
        else if( signalModeConsVal == LDT_MODE_TYPE_INHERIT )
        {
          CHECK( numRoundRdo != 1, "numRoundRdo shall be 1 - [LDT_MODE_TYPE_INHERIT]" );
          tempCS->modeType = partitioner.modeType = modeTypeParent;
        }

        //for lite intra encoding fast algorithm, set the status to save inter coding info
        // 若父结点模式类型为 MODE_TYPE_ALL（可以尝试所有类型），且当前测试模式类型为 Inter
        // 则 SaveCuCostInSCIPU，且设置 NumCuInSCIPU 为0
        if( modeTypeParent == MODE_TYPE_ALL && tempCS->modeType == MODE_TYPE_INTER )
        {
          m_pcIntraSearch->setSaveCuCostInSCIPU( true );
          m_pcIntraSearch->setNumCuInSCIPU( 0 );
        }
        // 若父结点模式类型为 MODE_TYPE_ALL（可以尝试所有类型），且当前测试模式类型不为 Inter
        // 则 SaveCuCostInSCIPU 为 false，且当前测试模式 MODE_TYPE_ALL 时设置 NumCuInSCIPU 为0
        else if( modeTypeParent == MODE_TYPE_ALL && tempCS->modeType != MODE_TYPE_INTER )
        {
          m_pcIntraSearch->setSaveCuCostInSCIPU( false );
          if( tempCS->modeType == MODE_TYPE_ALL )
          {
            m_pcIntraSearch->setNumCuInSCIPU( 0 );
          }
        }

        // 测试划分模式 RD cost
        xCheckModeSplit( tempCS, bestCS, partitioner, currTestMode, modeTypeParent, skipInterPass );
        //recover cons modes
        tempCS->modeType = partitioner.modeType = modeTypeParent;
        tempCS->treeType = partitioner.treeType = treeTypeParent;
        partitioner.chType = chTypeParent;
        if( modeTypeParent == MODE_TYPE_ALL )
        {
          m_pcIntraSearch->setSaveCuCostInSCIPU( false );
          if( numRoundRdo == 2 && tempCS->modeType == MODE_TYPE_INTRA )
          {
            m_pcIntraSearch->initCuAreaCostInSCIPU();
          }
        }
        if( skipInterPass )
        {
          break;
        }
      }
      // 若划分模式不为 bestCS 指定模式
      if (splitmode != bestCS->cus[0]->splitSeries)
      {
        splitmode = bestCS->cus[0]->splitSeries;
        const CodingUnit&     cu = *bestCS->cus.front();
        cu.cs->prevPLT = bestCS->prevPLT;
        for (int i = compBegin; i < (compBegin + numComp); i++)
        {
          ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
          bestLastPLTSize[comID] = bestCS->cus[0]->cs->prevPLT.curPLTSize[comID];
          memcpy(bestLastPLT[i], bestCS->cus[0]->cs->prevPLT.curPLT[i], bestCS->cus[0]->cs->prevPLT.curPLTSize[comID] * sizeof(Pel));
        }
      }
    }
    else
    {
      THROW( "Don't know how to handle mode: type = " << currTestMode.type << ", options = " << currTestMode.opts );
    }
  } while( m_modeCtrl->nextMode( *tempCS, partitioner ) );


  //
  // Finishing CU
#if ENABLE_SPLIT_PARALLELISM
  if( bestCS->cus.empty() )
  {
    CHECK( bestCS->cost != MAX_DOUBLE, "Cost should be maximal if no encoding found" );
    CHECK( bestCS->picture->scheduler.getSplitJobId() == 0, "Should always get a result in serial case" );

    m_modeCtrl->finishCULevel( partitioner );
    return;
  }

#endif
  if( tempCS->cost == MAX_DOUBLE && bestCS->cost == MAX_DOUBLE )
  {
    //although some coding modes were planned to be tried in RDO, no coding mode actually finished encoding due to early termination
    //thus tempCS->cost and bestCS->cost are both MAX_DOUBLE; in this case, skip the following process for normal case
    m_modeCtrl->finishCULevel( partitioner );
    return;
  }

  // set context states
  m_CABACEstimator->getCtx() = m_CurrCtx->best;

  // QP from last processed CU for further processing
  //copy the qp of the last non-chroma CU
  int numCUInThisNode = (int)bestCS->cus.size();
  if( numCUInThisNode > 1 && bestCS->cus.back()->chType == CHANNEL_TYPE_CHROMA && !CS::isDualITree( *bestCS ) )
  {
    CHECK( bestCS->cus[numCUInThisNode-2]->chType != CHANNEL_TYPE_LUMA, "wrong chType" );
    bestCS->prevQP[partitioner.chType] = bestCS->cus[numCUInThisNode-2]->qp;
  }
  else
  {
  bestCS->prevQP[partitioner.chType] = bestCS->cus.back()->qp;
  }
  if ((!slice.isIntra() || slice.getSPS()->getIBCFlag())
    && partitioner.chType == CHANNEL_TYPE_LUMA
    && bestCS->cus.size() == 1 && (bestCS->cus.back()->predMode == MODE_INTER || bestCS->cus.back()->predMode == MODE_IBC)
    && bestCS->area.Y() == (*bestCS->cus.back()).Y()
    )
  {
    const CodingUnit&     cu = *bestCS->cus.front();

    bool isIbcSmallBlk = CU::isIBC(cu) && (cu.lwidth() * cu.lheight() <= 16);
    CU::saveMotionInHMVP( cu, isIbcSmallBlk );
  }
  bestCS->picture->getPredBuf(currCsArea).copyFrom(bestCS->getPredBuf(currCsArea));
  bestCS->picture->getRecoBuf( currCsArea ).copyFrom( bestCS->getRecoBuf( currCsArea ) );
  m_modeCtrl->finishCULevel( partitioner );
  if( m_pcIntraSearch->getSaveCuCostInSCIPU() && bestCS->cus.size() == 1 )
  {
    m_pcIntraSearch->saveCuAreaCostInSCIPU( Area( partitioner.currArea().lumaPos(), partitioner.currArea().lumaSize() ), bestCS->cost );
  }

#if ENABLE_SPLIT_PARALLELISM
  if( tempCS->picture->scheduler.getSplitJobId() == 0 && m_pcEncCfg->getNumSplitThreads() != 1 )
  {
    tempCS->picture->finishParallelPart( currCsArea );
  }

#endif
  if (bestCS->cus.size() == 1) // no partition
  {
    CHECK(bestCS->cus[0]->tileIdx != bestCS->pps->getTileIdx(bestCS->area.lumaPos()), "Wrong tile index!");
    if (bestCS->cus[0]->predMode == MODE_PLT)
    {
      for (int i = compBegin; i < (compBegin + numComp); i++)
      {
        ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
        bestCS->prevPLT.curPLTSize[comID] = curLastPLTSize[comID];
        memcpy(bestCS->prevPLT.curPLT[i], curLastPLT[i], curLastPLTSize[comID] * sizeof(Pel));
      }
      bestCS->reorderPrevPLT(bestCS->prevPLT, bestCS->cus[0]->curPLTSize, bestCS->cus[0]->curPLT, bestCS->cus[0]->reuseflag, compBegin, numComp, jointPLT);
    }
    else
    {
      for (int i = compBegin; i<(compBegin + numComp); i++)
      {
        ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
        bestCS->prevPLT.curPLTSize[comID] = curLastPLTSize[comID];
        memcpy(bestCS->prevPLT.curPLT[i], curLastPLT[i], bestCS->prevPLT.curPLTSize[comID] * sizeof(Pel));
      }
    }
  }
  else
  {
    for (int i = compBegin; i<(compBegin + numComp); i++)
    {
      ComponentID comID = jointPLT ? (ComponentID)compBegin : ((i > 0) ? COMPONENT_Cb : COMPONENT_Y);
      bestCS->prevPLT.curPLTSize[comID] = bestLastPLTSize[comID];
      memcpy(bestCS->prevPLT.curPLT[i], bestLastPLT[i], bestCS->prevPLT.curPLTSize[comID] * sizeof(Pel));
    }
  }
  const CodingUnit&     cu = *bestCS->cus.front();
  cu.cs->prevPLT = bestCS->prevPLT;
  // Assert if Best prediction mode is NONE
  // Selected mode's RD-cost must be not MAX_DOUBLE.
  CHECK( bestCS->cus.empty()                                   , "No possible encoding found" );
  CHECK( bestCS->cus[0]->predMode == NUMBER_OF_PREDICTION_MODES, "No possible encoding found" );
  CHECK( bestCS->cost             == MAX_DOUBLE                , "No possible encoding found" );
}