HM16.7量化部分学习记录

量化部分与其他函数

主函数入口为TAppEncoder中的encmain.app。然后进入:

cTAppEncTop.encode()

然后进入:

m_cTEncTop.encode()

然后进入:

m_cGOPEncoder.compressGOP()//编码图像组

再然后进入:

m_cSliceEncoder->compressSlice() //编码片

再然后进入:

m_pcCuEncoder->encodeCtu() //编码Ctu

依次进入:

xCompressCu() //编码Cu
xCheckRDCostIntra
estIntraPredLumaQT
estIntraPredLumaQT
xIntraCodingTUBlock
transformNxN()

量化函数及其结构

先总说其结构:
ToComTrQuant.h —— transformNxN —— xQuant —— xRateDistOptQuant
除了沿着上面的路径找到transformNxN ,也可以直接在解决方案资源管理器这个位置搜transformNxN,单击打开。会打开头文件ToComTrQuant.h

transformNxN

在这里插入图片描述
在打开的头文件里面会看到量化函数的声明

  Void transformNxN(       TComTU         & rTu,
                     const ComponentID      compID,//这个ComponentID是枚举类型,代表的是y、u、v
                           Pel           *  pcResidual,//!< 残差
                     const UInt             uiStride,
                           TCoeff        *  rpcCoeff,//!< 残差经变换且量化后的系数
#if ADAPTIVE_QP_SELECTION
                           TCoeff        * rpcArlCoeff,!< 残差经变换且自适应量化后的系数
#endif
                           TCoeff         & uiAbsSum,//过程中需要求和 
                     const QpParam        & cQP//这个类里面装的<量化参数>
                    );

接下来,转到函数定义

Void TComTrQuant::transformNxN(       TComTU        & rTu,
                                const ComponentID     compID, //enum表示枚举类别
                                      Pel          *  pcResidual,//!< 残差 ///pel表示pixel type
                                const UInt            uiStride,//unsigned int的缩写
                                      TCoeff       *  rpcCoeff,//!< 残差经变换且量化后的系数,TCoeff这个类表示变换系数
#if ADAPTIVE_QP_SELECTION
                                      TCoeff       *  pcArlCoeff,//!< 残差经变换且自适应量化后的系数
#endif
                                      TCoeff        & uiAbsSum,//过程中需要求和 
                                const QpParam       & cQP这个类里面装的<量化参数>
                              )
{
  const TComRectangle &rect = rTu.getRect(compID); //TU块
  const UInt uiWidth        = rect.width;//TU宽
  const UInt uiHeight       = rect.height; //TU高
  TComDataCU* pcCU          = rTu.getCU();//当前CU
  const UInt uiAbsPartIdx   = rTu.GetAbsPartIdxTU();//TU起始地址
  const UInt uiOrgTrDepth   = rTu.GetTransformDepthRel();

  uiAbsSum=0;

  RDPCMMode rdpcmMode = RDPCM_OFF;
  rdpcmNxN( rTu, compID, pcResidual, uiStride, cQP, rpcCoeff, uiAbsSum, rdpcmMode );

  if (rdpcmMode == RDPCM_OFF)//不适用RDPCM
  {
    uiAbsSum = 0;//!< 计算残差绝对值的和并返回 
    //transform and quantise
    if(pcCU->getCUTransquantBypass(uiAbsPartIdx))//!< 如果变换、量化过程被旁路,则直接将残差pcResidual赋值给rpcCoeff  
    {
      const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID); //残差旋转
      const UInt uiSizeMinus1   = (uiWidth * uiHeight) - 1; //索引最大值

      for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)
      {
        for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)
        {
          const Pel currentSample = pcResidual[(y * uiStride) + x];

		  rpcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = currentSample; //残差赋值
          uiAbsSum += TCoeff(abs(currentSample));系数和
        }
      }
    }
    else
    {
#if DEBUG_TRANSFORM_AND_QUANTISE
      std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to transform\n";
      printBlock(pcResidual, uiWidth, uiHeight, uiStride);
#endif

      assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );

      if(pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0)//TransformSkip模式
      {
        xTransformSkip( pcResidual, uiStride, m_plTempCoeff, rTu, compID );
      }
	  else//(非SKIP模式)
      {
        const Int channelBitDepth=pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID));
		//变换
        xT( channelBitDepth, rTu.useDST(compID), pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight, pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID)) );
      }

#if DEBUG_TRANSFORM_AND_QUANTISE
      std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between transform and quantiser\n";
      printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth);
#endif
	  //量化
      xQuant( rTu, m_plTempCoeff, rpcCoeff,

#if ADAPTIVE_QP_SELECTION
              pcArlCoeff,
#endif
              uiAbsSum, compID, cQP );

#if DEBUG_TRANSFORM_AND_QUANTISE
      std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of quantiser\n";
      printBlock(rpcCoeff, uiWidth, uiHeight, uiWidth);
#endif
    }
  }

    //set the CBF/set the CBF 为子块设置编码块flag
  pcCU->setCbfPartRange((((uiAbsSum > 0) ? 1 : 0) << uiOrgTrDepth), compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID));
}

QpParam 类是储存参数的,把它的定义也贴在这里

struct QpParam
{
  Int Qp;//QP = floor(QP/6)+QP%6
  Int per;//per = QP/6
  Int rem;//rem = QP%6

  QpParam(const Int           qpy,
          const ChannelType   chType,
          const Int           qpBdOffset,
          const Int           chromaQPOffset,
          const ChromaFormat  chFmt );

  QpParam(const TComDataCU   &cu, const ComponentID compID);

}; // END STRUCT DEFINITION QpParam
QpParam::QpParam(const Int           qpy,
                 const ChannelType   chType,
                 const Int           qpBdOffset,
                 const Int           chromaQPOffset,
                 const ChromaFormat  chFmt )
{
  Int baseQp;//基础QP

  if (isLuma(chType)) //亮度分量
  {
    baseQp = qpy + qpBdOffset;
  }
  else //色度分量
  {
    baseQp = Clip3( -qpBdOffset, (chromaQPMappingTableSize - 1), qpy + chromaQPOffset );

    if(baseQp < 0)
    {
      baseQp = baseQp + qpBdOffset;//这里出现了qpBdOffset,它是这个函数传入的,可通过改这个参数,来改量化参数,亮度分量通过cu中的函数来改这个offset,色度分量offset更改就在下面重载的函数里。
    }
    else
    {
      baseQp = getScaledChromaQP(baseQp, chFmt) + qpBdOffset;
    }
  }

  Qp =baseQp;
  per=baseQp/6;
  rem=baseQp%6;
}

QpParam::QpParam(const TComDataCU &cu, const ComponentID compID)
{
  Int chromaQpOffset = 0;

  if (isChroma(compID)) //色度分量
  {
    chromaQpOffset += cu.getSlice()->getPPS()->getQpOffset(compID);
    chromaQpOffset += cu.getSlice()->getSliceChromaQpDelta(compID);

    chromaQpOffset += cu.getSlice()->getPPS()->getPpsRangeExtension().getChromaQpOffsetListEntry(cu.getChromaQpAdj(0)).u.offset[Int(compID)-1];
  }

  *this = QpParam(cu.getQP( 0 ),// 色度分量改offset
                  toChannelType(compID),
                  cu.getSlice()->getSPS()->getQpBDOffset(toChannelType(compID)),
                  chromaQpOffset,
                  cu.getPic()->getChromaFormat());
}


QpParam 类是储存参数的,把它的定义也贴在这里

改变量化参数

在这里很重要的点:改变亮度分量的QpOffset

QpParam类中有改变色度的QpOffset的操作,改变亮度分量的QpOffset在TEncCu类当中的xComputeQP函数中进行,根据pcCU->getCUPelX()pcCU->getCUPelY()返回的当前CU的位置,通过这个位置来设置iQpOffset,总QP=iBaseQp+iQpOffset,iBaseQp为配置文件当中设置的初始QP值。

Int TEncCu::xComputeQP( TComDataCU* pcCU, UInt uiDepth )
{
  Int iBaseQp = pcCU->getSlice()->getSliceQp();//iBaseQp为我们cfg文件中的配置的Qp
  Int iQpOffset = 0;
  if ( m_pcEncCfg->getUseAdaptiveQP() //如果使用自适应Qp
  {
    TEncPic* pcEPic = dynamic_cast<TEncPic*>( pcCU->getPic() );
    UInt uiAQDepth = min( uiDepth, pcEPic->getMaxAQDepth()-1 );
    TEncPicQPAdaptationLayer* pcAQLayer = pcEPic->getAQLayer( uiAQDepth );
    UInt uiAQUPosX = pcCU->getCUPelX() / pcAQLayer->getAQPartWidth();//返回当前CU的位置x
    UInt uiAQUPosY = pcCU->getCUPelY() / pcAQLayer->getAQPartHeight();//返回当前CU的位置y
    UInt uiAQUStride = pcAQLayer->getAQPartStride();
    TEncQPAdaptationUnit* acAQU = pcAQLayer->getQPAdaptationUnit();

    Double dMaxQScale = pow(2.0, m_pcEncCfg->getQPAdaptationRange()/6.0);
    Double dAvgAct = pcAQLayer->getAvgActivity();
    Double dCUAct = acAQU[uiAQUPosY * uiAQUStride + uiAQUPosX].getActivity();
    Double dNormAct = (dMaxQScale*dCUAct + dAvgAct) / (dCUAct + dMaxQScale*dAvgAct);
    Double dQpOffset = log(dNormAct) / log(2.0) * 6.0;
    iQpOffset = Int(floor( dQpOffset + 0.49999 ));
  }

  return Clip3(-pcCU->getSlice()->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQp+iQpOffset );
}

iMaxQP与iMinQp的控制

在xComputeQP函数值改变iQpOffset之后,编码出来的视频并没有改变,发现是后面的xCompressCu中的iMaxQP与iMinQp的控制限制了量化参数之前的改变。


  Int iBaseQP = xComputeQP( rpcBestCU, uiDepth );// 获得当前Cu的Qp,传入当前CU和深度,计算对当前CU的QP;如果不是对每个CU自适应的改变QP,则直接用之前slice算出的QP
  cout << "xCompressCu:" << iBaseQP << endl;
  Int iMinQP;
  Int iMaxQP;
  Bool isAddLowestQP = false;//  是否增加最小的量化步长

  const UInt numberValidComponents = rpcBestCU->getPic()->getNumberValidComponents(); // 获取成分数量,如果色度格式是CHROMA_400,数量为1,反之为3(最大)
  // 计算最小/最大的量化步长
  cout << "uiDepth:" << uiDepth << "          getMaxCuDQPDepth:" << pps.getMaxCuDQPDepth() << endl;
  if( uiDepth <= pps.getMaxCuDQPDepth() )// uiDepth表示CU深度,pps中获得的是配置文件中MaxCuDQPDepth
  {
    Int idQP = m_pcEncCfg->getMaxDeltaQP();// 配置文件中MaxDeltaQP
    iMinQP = Clip3( -sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP-idQP );
    iMaxQP = Clip3( -sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP+idQP );
	//cout << "idQP:" << idQP << "      iMinQP:" << iMinQP << "     iMaxQP:" << iMaxQP << endl;
  }
  else
  {
	 // cout << rpcTempCU->getQP()<<endl;
    iMinQP = rpcTempCU->getQP(0);
    iMaxQP = rpcTempCU->getQP(0);
	//cout << "1      iMinQP:" << iMinQP << "     iMaxQP:" << iMaxQP << endl;
  }

  if ( m_pcEncCfg->getUseRateCtrl() )//是否使用码率控制
  {
    iMinQP = m_pcRateCtrl->getRCQP();
    iMaxQP = m_pcRateCtrl->getRCQP();
	//cout << "2      iMinQP:" << iMinQP << "     iMaxQP:" << iMaxQP << endl;
  }

在[iMinQp,iMaxQp]的范围内遍历QP,选取最优的Qp(没有看懂这里,而且也不明白最终选择的最优Qp到底是什么,代码太长了不贴完)

  if ( !bBoundary )// 如果不在边界
  {
    for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++)// 在之前确定的QP范围中枚举QP/遍历每一个QP,执行下面的步骤,选取最优QP
    {
      const Bool bIsLosslessMode = isAddLowestQP && (iQP == iMinQP);

      if (bIsLosslessMode)
      {
        iQP = lowestQP;
      }

      m_cuChromaQpOffsetIdxPlus1 = 0;
      if (pcSlice->getUseChromaQpAdj())
      {

经过和学长的讨论,发现后面如何在[iMinQp,iMaxQp]之间挑选Qp,不明白也没有关系,因为他是RDO一块的内容。是在iBaseQp的附近进行优化的,之前改iBaseQp的思路是正确的,编码后的视频Qp没有改变是由于在下面这段代码中。
(1)当CU深度(深度0代表大小为64*64)<=配置文件中MaxCuDQPDepth(CU量化最大深度)时,就使idQp= 配置文件中MaxDeltaQP(iMaxQP =iBaseQP+idQP,iMinQP =iBaseQP-idQP)。
(2)之前运行编码未改变,是因为配置文件中这两个参数都为0。当uiDepth 为0时,iMinQp=iMaxQp=iBaseQp,虽然iBaseQp在xComputeQp中改变了,但是继续分下去都会进入else语句,此时iMaxQp=iMinQp=getQp(0)(getQp(0)还不知道是哪里的Qp)。
(3)总结:只要不进入else语句,就不会使最终使用的Qp为getQp(0)。函数getQp(uiIdx)返回的是m_phQP[uiIdx],m_phQP是一个矩阵,这个矩阵的值由setQP设置,查找setQp的引用,发现setQp将m_iQp这个变量设置为m_phQP中的值。继续查找m_iQp引用发现m_iQp=int(m_fQp),继续查找m_fQp引用发现m_fQp是有初始值定义的,语句为("QP,q", m_fQP, 30.0, "Qp value, if value is float, QP is switched once during encoding")这句英文对于初学者而言也较难理解,但是有大佬做的文档关于浮点Qp可以作为参考。

  if( uiDepth <= pps.getMaxCuDQPDepth() )// uiDepth表示CU深度,pps中获得的是配置文件中MaxCuDQPDepth
  {
    Int idQP = m_pcEncCfg->getMaxDeltaQP();// 配置文件中MaxDeltaQP
    iMinQP = Clip3( -sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP-idQP );
    iMaxQP = Clip3( -sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP+idQP );
	cout << "idQP:" << idQP << "      iMinQP:" << iMinQP << "     iMaxQP:" << iMaxQP << endl;
  }
  else
  {
	 // cout << rpcTempCU->getQP()<<endl;
    iMinQP = rpcTempCU->getQP(0);
    iMaxQP = rpcTempCU->getQP(0);
	cout << "1      iMinQP:" << iMinQP << "     iMaxQP:" << iMaxQP << endl;
  }

戏剧得一幕来了,偶然间将前文所提到得两个参数改回0,发现else语句中iMinQP与iMaxQP的值发生改变,没有继续限制ioffset的改变,即getQP(0)的值发生了改变。但最后编码的视频没有改变。说明之前分析的原因错误,回到原点。
跟踪数据流发现,rpc->temp里面的m_phQP保持在之前xComputeQp返回的值,直到经过xCheckRDCostIntra函数,会还原在配置文件中的QP值。在xCheckRDCostIntra函数内部,Qp值发生突变的地方位于m_pcPredSearch->estIntraPredLumaQT函数,进入函数内部有一个模块:

 //===== set QP and clear Cbf =====
  if ( pps.getUseDQP() == true)//是否允许在CU层改变QP大小,原本是true我给改掉了,这个命令获取到的参数来自于pps中的cu_qp_delta_enabled_flag
  {
    pcCU->setQPSubParts( pcCU->getQP(0), 0, uiDepth );
  }
  else
  {
    pcCU->setQPSubParts( pcCU->getSlice()->getSliceQp(), 0, uiDepth );
  }

由代码可知,如果 pps.getUseDQP() == true,就把使用的QP值改为getQP(0)(xComputeQp返回的值),否则用slice的Qp。pps.getUseDQP() 返回的是pps中的cu_qp_delta_enabled_flag,书上说这个语法元素指定是否允许在CU层调整Qp。
它的定义代码如下:


Void TEncTop::xInitPPS()
{
  m_cPPS.setConstrainedIntraPred( m_bUseConstrainedIntraPred );
  Bool bUseDQP = (getMaxCuDQPDepth() > 0)? true : false;

  if((getMaxDeltaQP() != 0 )|| getUseAdaptiveQP())
  {
    bUseDQP = true;
  }

  if (m_costMode==COST_SEQUENCE_LEVEL_LOSSLESS || m_costMode==COST_LOSSLESS_CODING)
  {
    bUseDQP=false;
  }


  if ( m_RCEnableRateControl )
  {
    m_cPPS.setUseDQP(true);
    m_cPPS.setMaxCuDQPDepth( 0 );
  }
  else if(bUseDQP)
  {
    m_cPPS.setUseDQP(true);
    m_cPPS.setMaxCuDQPDepth( m_iMaxCuDQPDepth );
  }
  else
  {
    m_cPPS.setUseDQP(false);
    m_cPPS.setMaxCuDQPDepth( 0 );
  }

根据代码Bool bUseDQP = (getMaxCuDQPDepth() > 0)? true : false;我把配置文件中的MaxCuDQPDepth改成了3(把这里填成3,码流分析出来diff_cu_qp_delta_depth就是3,书上说表示给定QG的大小),至此,所有的m_phQP都变成了xComputeQP的返回值,但是最后编码出来的视频和我将配置文件里的Qp改成相同值编码出的视频相比质量更好。

模仿码率控制改变CU的量化参数

上文的引用部分是开启了多QP优化功能 ,使CU层能够改变Qp。但没有必要使用这个功能,直接在代码里面改就可以。 m_cPPS.setUseDQP(true);这个语句就是将pps参数改变,使之能够允许在CU层改变QP。

Void TEncTop::xInitPPS()
{
  m_cPPS.setConstrainedIntraPred( m_bUseConstrainedIntraPred );
  Bool bUseDQP = (getMaxCuDQPDepth() > 0)? true : false;

  if((getMaxDeltaQP() != 0 )|| getUseAdaptiveQP())
  {
    bUseDQP = true;
  }
  if (m_costMode==COST_SEQUENCE_LEVEL_LOSSLESS || m_costMode==COST_LOSSLESS_CODING)
  {
    bUseDQP=false;
  }
  if ( m_RCEnableRateControl )
  {
    m_cPPS.setUseDQP(true);
    m_cPPS.setMaxCuDQPDepth( 0 );
  }
  else if(bUseDQP)
  {
    m_cPPS.setUseDQP(true);
    m_cPPS.setMaxCuDQPDepth( m_iMaxCuDQPDepth );
  }
  else
  {
    m_cPPS.setUseDQP(false);
    m_cPPS.setMaxCuDQPDepth( 0 );
  }
  //my code 设置CU层允许改变Qp*****************
  if (true)
  {
	  m_cPPS.setUseDQP(true);
  }
  //my***********

经过码流分析,发现编码的QP值的确是xComputeQP的值,但是CU的划分更加细小了,所以也导致编码出来的视频质量更好。

为探求原因,寻求老师帮助,从老师那里获取到RDO是以CTU(64*64)为单位进行下面的划分的,故尝试控制编码CTU的参数,参考rateComtrol的代码,进行代码更改。因为内容简单,不想定义一个类,故直接借用TEncRateCtrl这个类,添加了两个函数与一个data,都是模仿TEncRateCtrl这个类控制CTU的QP参数改的。

class TEncRateCtrl
{
public:
  TEncRateCtrl();
  ~TEncRateCtrl();

public:
  Void init( Int totalFrames, Int targetBitrate, Int frameRate, Int GOPSize, Int picWidth, Int picHeight, Int LCUWidth, Int LCUHeight, Int keepHierBits, Bool useLCUSeparateModel, GOPEntry GOPList[MAX_GOP] );
  Void destroy();
  Void initRCPic( Int frameLevel );
  Void initRCGOP( Int numberOfPictures );
  Void destroyRCGOP();

public:
	//my code
  Void       setMyCtuQP(Int QP) { m_CtuQP = QP; }
  Int        getMyCtuQP()         { return m_CtuQP; }
  //my code
  Void       setRCQP ( Int QP ) { m_RCQP = QP;   }
  Int        getRCQP ()         { return m_RCQP; }
  TEncRCSeq* getRCSeq()          { assert ( m_encRCSeq != NULL ); return m_encRCSeq; }
  TEncRCGOP* getRCGOP()          { assert ( m_encRCGOP != NULL ); return m_encRCGOP; }
  TEncRCPic* getRCPic()          { assert ( m_encRCPic != NULL ); return m_encRCPic; }
  list<TEncRCPic*>& getPicList() { return m_listRCPictures; }
#if U0132_TARGET_BITS_SATURATION
  Bool       getCpbSaturationEnabled()  { return m_CpbSaturationEnabled;  }
  UInt       getCpbState()              { return m_cpbState;       }
  UInt       getCpbSize()               { return m_cpbSize;        }
  UInt       getBufferingRate()         { return m_bufferingRate;  }
  Int        updateCpbState(Int actualBits);
  Void       initHrdParam(const TComHRD* pcHrd, Int iFrameRate, Double fInitialCpbFullness);
#endif

private:
  TEncRCSeq* m_encRCSeq;
  TEncRCGOP* m_encRCGOP;
  TEncRCPic* m_encRCPic;
  list<TEncRCPic*> m_listRCPictures;
  //my
  Int        m_CtuQP;
  //my
  Int        m_RCQP;
#if U0132_TARGET_BITS_SATURATION
  Bool       m_CpbSaturationEnabled;    // Enable target bits saturation to avoid CPB overflow and underflow
  Int        m_cpbState;                // CPB State 
  UInt       m_cpbSize;                 // CPB size
  UInt       m_bufferingRate;           // Buffering rate
#endif
};

#endif

在CompressSlice函数中 ,在rateComtrol起作用的位置前,模仿其控制Ctu量化参数的方法,加了如下代码:

	//my code*************
	if (ctuRsAddr <10)//如果Ctu的序号小于10
	{
		m_pcRateCtrl->setMyCtuQP(37)
	}
	else{
		m_pcRateCtrl->setMyCtuQP(pcSlice->getSliceQp());
	}
	//my*****************
    if ( m_pcCfg->getUseRateCtrl() )
    {
      Int estQP        = pcSlice->getSliceQp();
      Double estLambda = -1.0;
      Double bpp       = -1.0;

现在仅仅是将TEncRateCtrl中的数据m_CtuQP的值改为了我们想要的Qp,但还未应用。接下来在rateComtrol参数的应用位置,模仿做同样的应用。在xCompressCu函数中:

  //my code
  if (true)//每一次递归进来都使用我们设置的Qp
  {
	  iMinQP = m_pcRateCtrl->getMyCtuQP();
	  iMaxQP = m_pcRateCtrl->getMyCtuQP();
	  cout << "1      iMinQP:" << iMinQP << "     iMaxQP:" << iMaxQP << endl;
  }
  //
  if ( m_pcEncCfg->getUseRateCtrl() )//是否使用码率控制
  {
    iMinQP = m_pcRateCtrl->getRCQP();
    iMaxQP = m_pcRateCtrl->getRCQP();
	cout << "2      iMinQP:" << iMinQP << "     iMaxQP:" << iMaxQP << endl;
  }
  

通过码流分析,发现在cfg文件中Qp设置32在代码中改为45编码出来的图像质量仍然高于在cfg文中设置45编码出的图像质量。又回到了原点。

为验证更改代码正确性,将cfg文件中Qp设置45而代码中也改为45编码出的图像 与 cfg文件中Qp设置45而代码中不改编码出的图像 比较,发现完全一样。至此,说明代码更改没有问题。

码流分析显示,cfg文件中Qp设置32而代码中改为45这种情况下编码出的图像的最终CU划分得更细致。非常幸运,我找到原因,在RDO中会根据cfg中QP参数修改λ,从而会根据配置文件里面的cfg定一个基础的质量,后面在CU层更改量化参数,由于λ的改变,会导致CU划分更细致,而TU不会超过CU。

改变λ

λ的值由Qp设置,在initEncSlice函数里面有根据Qp的值以及帧的类型设置λ,这里我不是很明白,但是通过调试看数据的变化,选取了必要的I帧的λ设置代码复制对每个CTU进行编码的代码中去,所以最终代码如下:

 Double oldLambda = m_pcRdCost->getLambda();
	//my******************
	if (ctuRsAddr <100)//如果Ctu的序号小于100
	{
		Int  iQp_ctu = 37;
		Int    SHIFT_QP1 = 12;
		Int    bitdepth_luma_qp_scale1 = 0;
		Double qp_temp1 = (Double)iQp_ctu + bitdepth_luma_qp_scale1 - SHIFT_QP1;
		Double dLambda = 0.57*pow(2.0, qp_temp1 / 3.0);
		m_pcRateCtrl->setMyCtuQP(iQp_ctu);
		pcSlice->setSliceQp(iQp_ctu);
		setUpLambda(pcSlice, dLambda, iQp_ctu);
		//m_pcRdCost->setLambda(dLambda, pcSlice->getSPS()->getBitDepths());
		cout << "setLambda:" << dLambda<<endl;
	}
	else{
		m_pcRateCtrl->setMyCtuQP(pcSlice->getSliceQp());
	}
	//my*****************

到目前为止,将cfg中QP设置为37,代码中CTUQP也设置为37与图像不经过CTU修改QP的代码编码出的图像(称为图像1)完全一致。但将cfg参数改为32,CTU QP设置为37,编码出的图像(称为2)与前文图像1的CU划分不完全一样,但图像质量人眼效果看起来类似。

xQuant

接下来转到函数 xQuant(···)的定义

Void TComTrQuant::xQuant(       TComTU       &rTu,
                                TCoeff      * pSrc,
                                TCoeff      * pDes,
#if ADAPTIVE_QP_SELECTION
								TCoeff      *pArlDes,//自适应后的变换系数
#endif
                                TCoeff       &uiAbsSum,
                          const ComponentID   compID,
                          const QpParam      &cQP )
{
  const TComRectangle &rect = rTu.getRect(compID);
  const UInt uiWidth        = rect.width;
  const UInt uiHeight       = rect.height;
  TComDataCU* pcCU          = rTu.getCU();
  const UInt uiAbsPartIdx   = rTu.GetAbsPartIdxTU();
  const Int channelBitDepth = pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID));

  TCoeff* piCoef    = pSrc;//系数
  TCoeff* piQCoef   = pDes;//变换后系数
  // 自适应量化系数选择
#if ADAPTIVE_QP_SELECTION
  TCoeff* piArlCCoef = pArlDes;
#endif

  const Bool useTransformSkip      = pcCU->getTransformSkip(uiAbsPartIdx, compID);
  const Int  maxLog2TrDynamicRange = pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID));
  //是否跳过了变换
  Bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_useRDOQ;
  if ( useRDOQ && (isLuma(compID) || RDOQ_CHROMA) )//RDOQ
  {
#if T0196_SELECTIVE_RDOQ
    if ( !m_useSelectiveRDOQ || xNeedRDOQ( rTu, piCoef, compID, cQP ) )
    {
#endif
#if ADAPTIVE_QP_SELECTION
      xRateDistOptQuant( rTu, piCoef, pDes, pArlDes, uiAbsSum, compID, cQP );// 量化函数,重要!!!!!!!!!!!!!!!!!!!!!!!!!!
#else
      xRateDistOptQuant( rTu, piCoef, pDes, uiAbsSum, compID, cQP );// 量化函数,重要!!!!!!!!!!!!!!!!!!!!!!!!!!
#endif
#if T0196_SELECTIVE_RDOQ
    }
    else
    {
      memset( pDes, 0, sizeof( TCoeff ) * uiWidth *uiHeight );
      uiAbsSum = 0;
    }
#endif
  }
  else
  {
    TUEntropyCodingParameters codingParameters;
    getTUEntropyCodingParameters(codingParameters, rTu, compID);

    const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
    const TCoeff entropyCodingMaximum =  (1 << maxLog2TrDynamicRange) - 1;

    TCoeff deltaU[MAX_TU_SIZE * MAX_TU_SIZE];

    const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);

    Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
    assert(scalingListType < SCALING_LIST_NUM);
    Int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrSize-2);

    const Bool enableScalingLists             = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
    const Int  defaultQuantisationCoefficient = g_quantScales[cQP.rem];

    /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
     * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
     * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
     * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
     */

    // Represents scaling through forward transform
    Int iTransformShift = getTransformShift(channelBitDepth, uiLog2TrSize, maxLog2TrDynamicRange);
    if (useTransformSkip && pcCU->getSlice()->getSPS()->getSpsRangeExtension().getExtendedPrecisionProcessingFlag())
    {
      iTransformShift = std::max<Int>(0, iTransformShift);
    }

    const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
    // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset

#if ADAPTIVE_QP_SELECTION
    Int iQBitsC = MAX_INT;
    Int iAddC   = MAX_INT;

    if (m_bUseAdaptQpSelect)
    {
      iQBitsC = iQBits - ARL_C_PRECISION;
      iAddC   = 1 << (iQBitsC-1);
    }
#endif

    const Int iAdd   = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
    const Int qBits8 = iQBits - 8;

    for( Int uiBlockPos = 0; uiBlockPos < uiWidth*uiHeight; uiBlockPos++ )
    {
      const TCoeff iLevel   = piCoef[uiBlockPos];
      const TCoeff iSign    = (iLevel < 0 ? -1: 1);

      const Int64  tmpLevel = (Int64)abs(iLevel) * (enableScalingLists ? piQuantCoeff[uiBlockPos] : defaultQuantisationCoefficient);

#if ADAPTIVE_QP_SELECTION
      if( m_bUseAdaptQpSelect )
      {
        piArlCCoef[uiBlockPos] = (TCoeff)((tmpLevel + iAddC ) >> iQBitsC);
      }
#endif

      const TCoeff quantisedMagnitude = TCoeff((tmpLevel + iAdd ) >> iQBits);
      deltaU[uiBlockPos] = (TCoeff)((tmpLevel - (quantisedMagnitude<<iQBits) )>> qBits8);

      uiAbsSum += quantisedMagnitude;
      const TCoeff quantisedCoefficient = quantisedMagnitude * iSign;

      piQCoef[uiBlockPos] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );
    } // for n

    if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
    {
      if(uiAbsSum >= 2) //this prevents TUs with only one coefficient of value 1 from being tested
      {
        signBitHidingHDQ( piQCoef, piCoef, deltaU, codingParameters, maxLog2TrDynamicRange ) ;
      }
    }
  } //if RDOQ
  //return;
}


xRateDistOptQuant

接下来重要的函数是xRateDistOptQuant(···),转到其定义,这个函数很长。

Void TComTrQuant::xRateDistOptQuant                 (       TComTU       &rTu,
                                                            TCoeff      * plSrcCoeff,
                                                            TCoeff      * piDstCoeff,
#if ADAPTIVE_QP_SELECTION
                                                            TCoeff      * piArlDstCoeff,
#endif
                                                            TCoeff       &uiAbsSum,
                                                      const ComponentID   compID,
                                                      const QpParam      &cQP  )
{
	// 带率失真优化的量化函数
  const TComRectangle  & rect             = rTu.getRect(compID);
  const UInt             uiWidth          = rect.width;
  const UInt             uiHeight         = rect.height;
        TComDataCU    *  pcCU             = rTu.getCU();
  const UInt             uiAbsPartIdx     = rTu.GetAbsPartIdxTU();
  const ChannelType      channelType      = toChannelType(compID);
  const UInt             uiLog2TrSize     = rTu.GetEquivalentLog2TrSize(compID);

  const Bool             extendedPrecision = pcCU->getSlice()->getSPS()->getSpsRangeExtension().getExtendedPrecisionProcessingFlag();
  const Int              maxLog2TrDynamicRange = pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID));
  const Int              channelBitDepth = rTu.getCU()->getSlice()->getSPS()->getBitDepth(channelType);

  /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
   * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
   * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
   * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
   */

  // Represents scaling through forward transform
  Int iTransformShift = getTransformShift(channelBitDepth, uiLog2TrSize, maxLog2TrDynamicRange);
  if ((pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && extendedPrecision)
  {
    iTransformShift = std::max<Int>(0, iTransformShift);
  }

  const Bool bUseGolombRiceParameterAdaptation = pcCU->getSlice()->getSPS()->getSpsRangeExtension().getPersistentRiceAdaptationEnabledFlag();
  const UInt initialGolombRiceParameter        = m_pcEstBitsSbac->golombRiceAdaptationStatistics[rTu.getGolombRiceStatisticsIndex(compID)] / RExt__GOLOMB_RICE_INCREMENT_DIVISOR;
        UInt uiGoRiceParam                     = initialGolombRiceParameter;
  Double     d64BlockUncodedCost               = 0;
  const UInt uiLog2BlockWidth                  = g_aucConvertToBit[ uiWidth  ] + 2;
  const UInt uiLog2BlockHeight                 = g_aucConvertToBit[ uiHeight ] + 2;
  const UInt uiMaxNumCoeff                     = uiWidth * uiHeight;
  assert(compID<MAX_NUM_COMPONENT);

  Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
  assert(scalingListType < SCALING_LIST_NUM);

#if ADAPTIVE_QP_SELECTION
  memset(piArlDstCoeff, 0, sizeof(TCoeff) *  uiMaxNumCoeff);
#endif

  Double pdCostCoeff [ MAX_TU_SIZE * MAX_TU_SIZE ];
  Double pdCostSig   [ MAX_TU_SIZE * MAX_TU_SIZE ];
  Double pdCostCoeff0[ MAX_TU_SIZE * MAX_TU_SIZE ];
  memset( pdCostCoeff, 0, sizeof(Double) *  uiMaxNumCoeff );
  memset( pdCostSig,   0, sizeof(Double) *  uiMaxNumCoeff );
  Int rateIncUp   [ MAX_TU_SIZE * MAX_TU_SIZE ];
  Int rateIncDown [ MAX_TU_SIZE * MAX_TU_SIZE ];
  Int sigRateDelta[ MAX_TU_SIZE * MAX_TU_SIZE ];
  TCoeff deltaU   [ MAX_TU_SIZE * MAX_TU_SIZE ];
  memset( rateIncUp,    0, sizeof(Int   ) *  uiMaxNumCoeff );
  memset( rateIncDown,  0, sizeof(Int   ) *  uiMaxNumCoeff );
  memset( sigRateDelta, 0, sizeof(Int   ) *  uiMaxNumCoeff );
  memset( deltaU,       0, sizeof(TCoeff) *  uiMaxNumCoeff );

  const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;                   // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
  const Double *const pdErrScale = getErrScaleCoeff(scalingListType, (uiLog2TrSize-2), cQP.rem);
  const Int    *const piQCoef    = getQuantCoeff(scalingListType, cQP.rem, (uiLog2TrSize-2));

  const Bool   enableScalingLists             = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
  const Int    defaultQuantisationCoefficient = g_quantScales[cQP.rem];
  const Double defaultErrorScale              = getErrScaleCoeffNoScalingList(scalingListType, (uiLog2TrSize-2), cQP.rem);

  const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
  const TCoeff entropyCodingMaximum =  (1 << maxLog2TrDynamicRange) - 1;

#if ADAPTIVE_QP_SELECTION
  Int iQBitsC = iQBits - ARL_C_PRECISION;
  Int iAddC =  1 << (iQBitsC-1);
#endif

  TUEntropyCodingParameters codingParameters;
  getTUEntropyCodingParameters(codingParameters, rTu, compID);
  const UInt uiCGSize = (1 << MLS_CG_SIZE);

  Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
  UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
  Int iCGLastScanPos = -1;

  UInt    uiCtxSet            = 0;
  Int     c1                  = 1;
  Int     c2                  = 0;
  Double  d64BaseCost         = 0;
  Int     iLastScanPos        = -1;

  UInt    c1Idx     = 0;
  UInt    c2Idx     = 0;
  Int     baseLevel;

  memset( pdCostCoeffGroupSig,   0, sizeof(Double) * MLS_GRP_NUM );
  memset( uiSigCoeffGroupFlag,   0, sizeof(UInt) * MLS_GRP_NUM );

  UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
  Int iScanPos;
  coeffGroupRDStats rdStats;

  const UInt significanceMapContextOffset = getSignificanceMapContextOffset(compID);

  for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
  {
    UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];
    UInt uiCGPosY   = uiCGBlkPos / codingParameters.widthInGroups;
    UInt uiCGPosX   = uiCGBlkPos - (uiCGPosY * codingParameters.widthInGroups);

    memset( &rdStats, 0, sizeof (coeffGroupRDStats));

    const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups);

    for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
    {// 这里是实际进行量化的地方
      iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
      //===== quantization =====
      UInt    uiBlkPos          = codingParameters.scan[iScanPos];
      // set coeff

      const Int    quantisationCoefficient = (enableScalingLists) ? piQCoef   [uiBlkPos] : defaultQuantisationCoefficient;
      const Double errorScale              = (enableScalingLists) ? pdErrScale[uiBlkPos] : defaultErrorScale;

      const Int64  tmpLevel                = Int64(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;

      const Intermediate_Int lLevelDouble  = (Intermediate_Int)min<Int64>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (iQBits - 1)));

#if ADAPTIVE_QP_SELECTION
      if( m_bUseAdaptQpSelect )
      {
        piArlDstCoeff[uiBlkPos]   = (TCoeff)(( lLevelDouble + iAddC) >> iQBitsC );
      }
#endif
      const UInt uiMaxAbsLevel  = std::min<UInt>(UInt(entropyCodingMaximum), UInt((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));

      const Double dErr         = Double( lLevelDouble );
      pdCostCoeff0[ iScanPos ]  = dErr * dErr * errorScale;
      d64BlockUncodedCost      += pdCostCoeff0[ iScanPos ];
      piDstCoeff[ uiBlkPos ]    = uiMaxAbsLevel;

      if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
      {
        iLastScanPos            = iScanPos;
        uiCtxSet                = getContextSetIndex(compID, (iScanPos >> MLS_CG_SIZE), 0);
        iCGLastScanPos          = iCGScanPos;
      }

      if ( iLastScanPos >= 0 )
      {
        //===== coefficient level estimation =====
        UInt  uiLevel;
        UInt  uiOneCtx         = (NUM_ONE_FLAG_CTX_PER_SET * uiCtxSet) + c1;
        UInt  uiAbsCtx         = (NUM_ABS_FLAG_CTX_PER_SET * uiCtxSet) + c2;

        if( iScanPos == iLastScanPos )
        {
          uiLevel              = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
                                                  lLevelDouble, uiMaxAbsLevel, significanceMapContextOffset, uiOneCtx, uiAbsCtx, uiGoRiceParam,
                                                  c1Idx, c2Idx, iQBits, errorScale, 1, extendedPrecision, maxLog2TrDynamicRange
                                                  );
        }
        else
        {
          UShort uiCtxSig      = significanceMapContextOffset + getSigCtxInc( patternSigCtx, codingParameters, iScanPos, uiLog2BlockWidth, uiLog2BlockHeight, channelType );

          uiLevel              = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
                                                  lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
                                                  c1Idx, c2Idx, iQBits, errorScale, 0, extendedPrecision, maxLog2TrDynamicRange
                                                  );

          sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
        }

        deltaU[ uiBlkPos ]        = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));

        if( uiLevel > 0 )
        {
          Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, maxLog2TrDynamicRange );
          rateIncUp   [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
          rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
        }
        else // uiLevel == 0
        {
          rateIncUp   [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
        }
        piDstCoeff[ uiBlkPos ] = uiLevel;
        d64BaseCost           += pdCostCoeff [ iScanPos ];

        baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
        if( uiLevel >= baseLevel )
        {
          if (uiLevel > 3*(1<<uiGoRiceParam))
          {
            uiGoRiceParam = bUseGolombRiceParameterAdaptation ? (uiGoRiceParam + 1) : (std::min<UInt>((uiGoRiceParam + 1), 4));
          }
        }
        if ( uiLevel >= 1)
        {
          c1Idx ++;
        }

        //===== update bin model =====
        if( uiLevel > 1 )
        {
          c1 = 0;
          c2 += (c2 < 2);
          c2Idx ++;
        }
        else if( (c1 < 3) && (c1 > 0) && uiLevel)
        {
          c1++;
        }

        //===== context set update =====
        if( ( iScanPos % uiCGSize == 0 ) && ( iScanPos > 0 ) )
        {
          uiCtxSet          = getContextSetIndex(compID, ((iScanPos - 1) >> MLS_CG_SIZE), (c1 == 0)); //(iScanPos - 1) because we do this **before** entering the final group
          c1                = 1;
          c2                = 0;
          c1Idx             = 0;
          c2Idx             = 0;
          uiGoRiceParam     = initialGolombRiceParameter;
        }
      }
      else
      {
        d64BaseCost    += pdCostCoeff0[ iScanPos ];
      }
      rdStats.d64SigCost += pdCostSig[ iScanPos ];
      if (iScanPosinCG == 0 )
      {
        rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
      }
      if (piDstCoeff[ uiBlkPos ] )
      {
        uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
        rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
        rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
        if ( iScanPosinCG != 0 )
        {
          rdStats.iNNZbeforePos0++;
        }
      }
    } //end for (iScanPosinCG)

    if (iCGLastScanPos >= 0)
    {
      if( iCGScanPos )
      {
        if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
        {
          UInt  uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );
          d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
          pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
        }
        else
        {
          if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
          {
            if ( rdStats.iNNZbeforePos0 == 0 )
            {
              d64BaseCost -= rdStats.d64SigCost_0;
              rdStats.d64SigCost -= rdStats.d64SigCost_0;
            }
            // rd-cost if SigCoeffGroupFlag = 0, initialization
            Double d64CostZeroCG = d64BaseCost;

            // add SigCoeffGroupFlag cost to total cost
            UInt  uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );

            if (iCGScanPos < iCGLastScanPos)
            {
              d64BaseCost  += xGetRateSigCoeffGroup(1, uiCtxSig);
              d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
              pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
            }

            // try to convert the current coeff group from non-zero to all-zero
            d64CostZeroCG += rdStats.d64UncodedDist;  // distortion for resetting non-zero levels to zero levels
            d64CostZeroCG -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
            d64CostZeroCG -= rdStats.d64SigCost;     // sig cost for all coeffs, including zero levels and non-zerl levels

            // if we can save cost, change this block to all-zero block
            if ( d64CostZeroCG < d64BaseCost )
            {
              uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
              d64BaseCost = d64CostZeroCG;
              if (iCGScanPos < iCGLastScanPos)
              {
                pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
              }
              // reset coeffs to 0 in this block
              for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
              {
                iScanPos      = iCGScanPos*uiCGSize + iScanPosinCG;
                UInt uiBlkPos = codingParameters.scan[ iScanPos ];

                if (piDstCoeff[ uiBlkPos ])
                {
                  piDstCoeff [ uiBlkPos ] = 0;
                  pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
                  pdCostSig  [ iScanPos ] = 0;
                }
              }
            } // end if ( d64CostAllZeros < d64BaseCost )
          }
        } // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
      }
      else
      {
        uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
      }
    }
  } //end for (iCGScanPos)

  //===== estimate last position =====
  if ( iLastScanPos < 0 )
  {
    return;
  }

  Double  d64BestCost         = 0;
  Int     ui16CtxCbf          = 0;
  Int     iBestLastIdxP1      = 0;
  if( !pcCU->isIntra( uiAbsPartIdx ) && isLuma(compID) && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
  {
    ui16CtxCbf   = 0;
    d64BestCost  = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
    d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
  }
  else
  {
    ui16CtxCbf   = pcCU->getCtxQtCbf( rTu, channelType );
    ui16CtxCbf  += getCBFContextOffset(compID);
    d64BestCost  = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
    d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
  }


  Bool bFoundLast = false;
  for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
  {
    UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];

    d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
    if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
    {
      for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
      {
        iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;

        if (iScanPos > iLastScanPos)
        {
          continue;
        }
        UInt   uiBlkPos     = codingParameters.scan[iScanPos];

        if( piDstCoeff[ uiBlkPos ] )
        {
          UInt   uiPosY       = uiBlkPos >> uiLog2BlockWidth;
          UInt   uiPosX       = uiBlkPos - ( uiPosY << uiLog2BlockWidth );

          Double d64CostLast= codingParameters.scanType == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, compID ) : xGetRateLast( uiPosX, uiPosY, compID );
          Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];

          if( totalCost < d64BestCost )
          {
            iBestLastIdxP1  = iScanPos + 1;
            d64BestCost     = totalCost;
          }
          if( piDstCoeff[ uiBlkPos ] > 1 )
          {
            bFoundLast = true;
            break;
          }
          d64BaseCost      -= pdCostCoeff[ iScanPos ];
          d64BaseCost      += pdCostCoeff0[ iScanPos ];
        }
        else
        {
          d64BaseCost      -= pdCostSig[ iScanPos ];
        }
      } //end for
      if (bFoundLast)
      {
        break;
      }
    } // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
  } // end for


  for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
  {
    Int blkPos = codingParameters.scan[ scanPos ];
    TCoeff level = piDstCoeff[ blkPos ];
    uiAbsSum += level;
    piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
  }

  //===== clean uncoded coefficients =====
  for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
  {
    piDstCoeff[ codingParameters.scan[ scanPos ] ] = 0;
  }


  if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
  {
    const Double inverseQuantScale = Double(g_invQuantScales[cQP.rem]);
    Int64 rdFactor = (Int64)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per))
                             / m_dLambda / 16 / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth - 8)))
                             + 0.5);

    Int lastCG = -1;
    Int absSum = 0 ;
    Int n ;

    for( Int subSet = (uiWidth*uiHeight-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- )
    {
      Int  subPos     = subSet << MLS_CG_SIZE;
      Int  firstNZPosInCG=uiCGSize , lastNZPosInCG=-1 ;
      absSum = 0 ;

      for(n = uiCGSize-1; n >= 0; --n )
      {
        if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
        {
          lastNZPosInCG = n;
          break;
        }
      }

      for(n = 0; n <uiCGSize; n++ )
      {
        if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
        {
          firstNZPosInCG = n;
          break;
        }
      }

      for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
      {
        absSum += Int(piDstCoeff[ codingParameters.scan[ n + subPos ]]);
      }

      if(lastNZPosInCG>=0 && lastCG==-1)
      {
        lastCG = 1;
      }

      if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
      {
        UInt signbit = (piDstCoeff[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1);
        if( signbit!=(absSum&0x1) )  // hide but need tune
        {
          // calculate the cost
          Int64 minCostInc = std::numeric_limits<Int64>::max(), curCost = std::numeric_limits<Int64>::max();
          Int minPos = -1, finalChange = 0, curChange = 0;

          for( n = (lastCG==1?lastNZPosInCG:uiCGSize-1) ; n >= 0; --n )
          {
            UInt uiBlkPos   = codingParameters.scan[ n + subPos ];
            if(piDstCoeff[ uiBlkPos ] != 0 )
            {
              Int64 costUp   = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];
              Int64 costDown = rdFactor * (   deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
                               -   ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);

              if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
              {
                costDown -= (4<<15);
              }

              if(costUp<costDown)
              {
                curCost = costUp;
                curChange =  1;
              }
              else
              {
                curChange = -1;
                if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
                {
                  curCost = std::numeric_limits<Int64>::max();
                }
                else
                {
                  curCost = costDown;
                }
              }
            }
            else
            {
              curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
              curChange = 1 ;

              if(n<firstNZPosInCG)
              {
                UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
                if(thissignbit != signbit )
                {
                  curCost = std::numeric_limits<Int64>::max();
                }
              }
            }

            if( curCost<minCostInc)
            {
              minCostInc = curCost;
              finalChange = curChange;
              minPos = uiBlkPos;
            }
          }

          if(piDstCoeff[minPos] == entropyCodingMaximum || piDstCoeff[minPos] == entropyCodingMinimum)
          {
            finalChange = -1;
          }

          if(plSrcCoeff[minPos]>=0)
          {
            piDstCoeff[minPos] += finalChange ;
          }
          else
          {
            piDstCoeff[minPos] -= finalChange ;
          }
        }
      }

      if(lastCG==1)
      {
        lastCG=0 ;
      }
    }
  }
}

xquant()中还有一个重要函数getQuantCoeff()//获取量化矩阵中的系数

在HM16.7里面翻

CU的划分

因为每一次划分CU之后,经过RDO过程,都会经历一次量化,所以为了确定图像中某个位置的量化参数,还要弄明白一帧中的CU具体划分过程。

CU划分原理

参考自CTU、CU、PU、TU简介
传统的视频编码都是基于宏块实现的,对于4:2:0采样格式的视频,个宏块包含一个16×16大小的亮度块和两个8×8大小的色度块。考虑到高清视频超高清视频的自身特性,H.265/HEVC标准中引入了树形编码单元CTU,其尺寸由编码器指定,且可大于宏块尺寸。同一位置处的个亮度CTB和两个色度CTB,再加上相应的语法元素形成一个CTU。

在H265HEVC中,一幅图像可以被划分为若干个互不重叠的CTU,在CTU内部,采用基于四叉树的循环分层结构。同一层次上的编码单元具有相同的分割深度。一个CTU可能只包含一个CU(没有进行划分),也可能被划分为多个CU。
在这里插入图片描述
编码单元是否继续被划分取决于分割标志位 Split flag。

①编码单元的大小可以大于传统的宏块大小(16×16)。对于平坦区域,用一个较大的编码单元编码可以减少所用的比特数,提高编码效率。这一点在高清视频应用领域体现得尤为明显。
②通过合理地选择CTU大小和最大层次深度,编码器的编码结构可以根据不同的图片内容、图片大小以及应用需求获得较大程度的优化
③所有的单元类型都统称为编码单元,消除了宏块与亚宏块之分,并且编码单元的结构可以根据CTU大小、最大编码深度以及一系列划分标志 Split flag简单地表示出来。

CU经过PU(预测单元)与TU(变换单元)才能到达量化过程。
在这里插入图片描述

变换单元是独立完成变换和量化的基本单元,其尺寸也是灵活变化的。H265/HEVC突破了原有的变换尺寸限制,可支持大小为4×4~32×32的编码变换,以变换单元(TU)为基本单元进行变换和量化。它的大小依赖于CU模式,在一个CU内,允许TU跨越多个PU,以四叉树的形式递归划分。对于一个2N×2N的CU,有一个标志位决定其是否划分为4个NXN的TU,是否可以进一步划分由SPS中的TU的最大划分深度决定。根据预测残差的局部变化特性,TU可以自适应地选择最优的模式。大块的TU模式能够将能量更好地集中,小块的TU模式能够保存更多的图像细节。 这种灵活的分割结构,可以使变换后的残差能量得到充分压缩,以进一步提高编码增益。

RDO

{
  if( rpcTempCU->getTotalCost() < rpcBestCU->getTotalCost() )
  {
    TComYuv* pcYuv;
    // Change Information data
    TComDataCU* pcCU = rpcBestCU;
    rpcBestCU = rpcTempCU;
    rpcTempCU = pcCU;

    // Change Prediction data
    pcYuv = m_ppcPredYuvBest[uiDepth];
    m_ppcPredYuvBest[uiDepth] = m_ppcPredYuvTemp[uiDepth];
    m_ppcPredYuvTemp[uiDepth] = pcYuv;

    // Change Reconstruction data
    pcYuv = m_ppcRecoYuvBest[uiDepth];
    m_ppcRecoYuvBest[uiDepth] = m_ppcRecoYuvTemp[uiDepth];
    m_ppcRecoYuvTemp[uiDepth] = pcYuv;

    pcYuv = NULL;
    pcCU  = NULL;

    // store temp best CI for next CU coding
    m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]->store(m_pppcRDSbacCoder[uiDepth][CI_NEXT_BEST]);


#if DEBUG_STRING
    DEBUG_STRING_SWAP(sParent, sTest)
    const PredMode predMode=rpcBestCU->getPredictionMode(0);
    if ((DebugOptionList::DebugString_Structure.getInt()&DebugStringGetPredModeMask(predMode)) && bAddSizeInfo)
    {
      std::stringstream ss(stringstream::out);
      ss <<"###: " << (predMode==MODE_INTRA?"Intra   ":"Inter   ") << partSizeToString[rpcBestCU->getPartitionSize(0)] << " CU at " << rpcBestCU->getCUPelX() << ", " << rpcBestCU->getCUPelY() << " width=" << UInt(rpcBestCU->getWidth(0)) << std::endl;
      sParent+=ss.str();
    }
#endif
  }
}

getRefQP函数:获取QG的预测QP值

QG是指将一副图像分成大小固定的正方形像素块,其大小由pps指定。其大小包含最大CU和最小CU。这个主要是解码CU的时候用得着,编码得时候用不着。deltaQp是编码时,记录下来,函数是codeDeltaQP

SChar TComDataCU::getRefQP( UInt uiCurrAbsIdxInCtu ) const
{
  UInt lPartIdx = MAX_UINT;
  UInt aPartIdx = MAX_UINT;
  const TComDataCU* cULeft  = getQpMinCuLeft ( lPartIdx, m_absZIdxInCtu + uiCurrAbsIdxInCtu );//获取左边QG的QP_Y
  const TComDataCU* cUAbove = getQpMinCuAbove(aPartIdx, m_absZIdxInCtu + uiCurrAbsIdxInCtu); // 获取上边QG的QP_Y
  return (((cULeft? cULeft->getQP( lPartIdx ): getLastCodedQP( uiCurrAbsIdxInCtu )) + (cUAbove? cUAbove->getQP( aPartIdx ): getLastCodedQP( uiCurrAbsIdxInCtu )) + 1) >> 1);//返回上面两个QP_Y的和的一半
}

一个QG的亮度QP(QPY )是由预测QP(predQP)和预测误差(deltaQP)相加得到:
Q P Y = p r e d Q P + d e l t a Q P QP_Y =predQP+deltaQP QPY=predQP+deltaQP
当前QG的predQP基本上来自于上方和和左方的QG的QPY q P Y P R E D   =   ( q P Y A   +   q P Y B ) > > 1 qPY_{PRED} = (qPY_A + qPY_B)>>1 qPYPRED=(qPYA+qPYB)>>1
qPYA代表上方的QG的QPY,qPYB代表左方的QG的QPY。细节见HEVC获取参考QP

  • 5
    点赞
  • 19
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值