HM16.7量化部分学习记录
量化部分与其他函数
主函数入口为TAppEncoder中的encmain.app。然后进入:
cTAppEncTop.encode()
然后进入:
m_cTEncTop.encode()
然后进入:
m_cGOPEncoder.compressGOP()//编码图像组
再然后进入:
m_cSliceEncoder->compressSlice() //编码片
再然后进入:
m_pcCuEncoder->encodeCtu() //编码Ctu
依次进入:
xCompressCu() //编码Cu
xCheckRDCostIntra
estIntraPredLumaQT
estIntraPredLumaQT
xIntraCodingTUBlock
transformNxN()
量化函数及其结构
先总说其结构:
ToComTrQuant.h —— transformNxN —— xQuant —— xRateDistOptQuant
除了沿着上面的路径找到transformNxN ,也可以直接在解决方案资源管理器这个位置搜transformNxN,单击打开。会打开头文件ToComTrQuant.h
transformNxN
在打开的头文件里面会看到量化函数的声明
Void transformNxN( TComTU & rTu,
const ComponentID compID,//这个ComponentID是枚举类型,代表的是y、u、v
Pel * pcResidual,//!< 残差
const UInt uiStride,
TCoeff * rpcCoeff,//!< 残差经变换且量化后的系数
#if ADAPTIVE_QP_SELECTION
TCoeff * rpcArlCoeff,!< 残差经变换且自适应量化后的系数
#endif
TCoeff & uiAbsSum,//过程中需要求和
const QpParam & cQP//这个类里面装的<量化参数>
);
接下来,转到函数定义
Void TComTrQuant::transformNxN( TComTU & rTu,
const ComponentID compID, //enum表示枚举类别
Pel * pcResidual,//!< 残差 ///pel表示pixel type
const UInt uiStride,//unsigned int的缩写
TCoeff * rpcCoeff,//!< 残差经变换且量化后的系数,TCoeff这个类表示变换系数
#if ADAPTIVE_QP_SELECTION
TCoeff * pcArlCoeff,//!< 残差经变换且自适应量化后的系数
#endif
TCoeff & uiAbsSum,//过程中需要求和
const QpParam & cQP这个类里面装的<量化参数>
)
{
const TComRectangle &rect = rTu.getRect(compID); //TU块
const UInt uiWidth = rect.width;//TU宽
const UInt uiHeight = rect.height; //TU高
TComDataCU* pcCU = rTu.getCU();//当前CU
const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();//TU起始地址
const UInt uiOrgTrDepth = rTu.GetTransformDepthRel();
uiAbsSum=0;
RDPCMMode rdpcmMode = RDPCM_OFF;
rdpcmNxN( rTu, compID, pcResidual, uiStride, cQP, rpcCoeff, uiAbsSum, rdpcmMode );
if (rdpcmMode == RDPCM_OFF)//不适用RDPCM
{
uiAbsSum = 0;//!< 计算残差绝对值的和并返回
//transform and quantise
if(pcCU->getCUTransquantBypass(uiAbsPartIdx))//!< 如果变换、量化过程被旁路,则直接将残差pcResidual赋值给rpcCoeff
{
const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID); //残差旋转
const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1; //索引最大值
for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)
{
for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)
{
const Pel currentSample = pcResidual[(y * uiStride) + x];
rpcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = currentSample; //残差赋值
uiAbsSum += TCoeff(abs(currentSample));系数和
}
}
}
else
{
#if DEBUG_TRANSFORM_AND_QUANTISE
std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to transform\n";
printBlock(pcResidual, uiWidth, uiHeight, uiStride);
#endif
assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
if(pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0)//TransformSkip模式
{
xTransformSkip( pcResidual, uiStride, m_plTempCoeff, rTu, compID );
}
else//(非SKIP模式)
{
const Int channelBitDepth=pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID));
//变换
xT( channelBitDepth, rTu.useDST(compID), pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight, pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID)) );
}
#if DEBUG_TRANSFORM_AND_QUANTISE
std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between transform and quantiser\n";
printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth);
#endif
//量化
xQuant( rTu, m_plTempCoeff, rpcCoeff,
#if ADAPTIVE_QP_SELECTION
pcArlCoeff,
#endif
uiAbsSum, compID, cQP );
#if DEBUG_TRANSFORM_AND_QUANTISE
std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of quantiser\n";
printBlock(rpcCoeff, uiWidth, uiHeight, uiWidth);
#endif
}
}
//set the CBF/set the CBF 为子块设置编码块flag
pcCU->setCbfPartRange((((uiAbsSum > 0) ? 1 : 0) << uiOrgTrDepth), compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID));
}
QpParam 类是储存参数的,把它的定义也贴在这里
struct QpParam
{
Int Qp;//QP = floor(QP/6)+QP%6
Int per;//per = QP/6
Int rem;//rem = QP%6
QpParam(const Int qpy,
const ChannelType chType,
const Int qpBdOffset,
const Int chromaQPOffset,
const ChromaFormat chFmt );
QpParam(const TComDataCU &cu, const ComponentID compID);
}; // END STRUCT DEFINITION QpParam
QpParam::QpParam(const Int qpy,
const ChannelType chType,
const Int qpBdOffset,
const Int chromaQPOffset,
const ChromaFormat chFmt )
{
Int baseQp;//基础QP
if (isLuma(chType)) //亮度分量
{
baseQp = qpy + qpBdOffset;
}
else //色度分量
{
baseQp = Clip3( -qpBdOffset, (chromaQPMappingTableSize - 1), qpy + chromaQPOffset );
if(baseQp < 0)
{
baseQp = baseQp + qpBdOffset;//这里出现了qpBdOffset,它是这个函数传入的,可通过改这个参数,来改量化参数,亮度分量通过cu中的函数来改这个offset,色度分量offset更改就在下面重载的函数里。
}
else
{
baseQp = getScaledChromaQP(baseQp, chFmt) + qpBdOffset;
}
}
Qp =baseQp;
per=baseQp/6;
rem=baseQp%6;
}
QpParam::QpParam(const TComDataCU &cu, const ComponentID compID)
{
Int chromaQpOffset = 0;
if (isChroma(compID)) //色度分量
{
chromaQpOffset += cu.getSlice()->getPPS()->getQpOffset(compID);
chromaQpOffset += cu.getSlice()->getSliceChromaQpDelta(compID);
chromaQpOffset += cu.getSlice()->getPPS()->getPpsRangeExtension().getChromaQpOffsetListEntry(cu.getChromaQpAdj(0)).u.offset[Int(compID)-1];
}
*this = QpParam(cu.getQP( 0 ),// 色度分量改offset
toChannelType(compID),
cu.getSlice()->getSPS()->getQpBDOffset(toChannelType(compID)),
chromaQpOffset,
cu.getPic()->getChromaFormat());
}
QpParam 类是储存参数的,把它的定义也贴在这里
改变量化参数
在这里很重要的点:改变亮度分量的QpOffset
QpParam类中有改变色度的QpOffset的操作,改变亮度分量的QpOffset在TEncCu类当中的xComputeQP函数中进行,根据pcCU->getCUPelX()
和pcCU->getCUPelY()
返回的当前CU的位置,通过这个位置来设置iQpOffset,总QP=iBaseQp+iQpOffset,iBaseQp为配置文件当中设置的初始QP值。
Int TEncCu::xComputeQP( TComDataCU* pcCU, UInt uiDepth )
{
Int iBaseQp = pcCU->getSlice()->getSliceQp();//iBaseQp为我们cfg文件中的配置的Qp
Int iQpOffset = 0;
if ( m_pcEncCfg->getUseAdaptiveQP() //如果使用自适应Qp
{
TEncPic* pcEPic = dynamic_cast<TEncPic*>( pcCU->getPic() );
UInt uiAQDepth = min( uiDepth, pcEPic->getMaxAQDepth()-1 );
TEncPicQPAdaptationLayer* pcAQLayer = pcEPic->getAQLayer( uiAQDepth );
UInt uiAQUPosX = pcCU->getCUPelX() / pcAQLayer->getAQPartWidth();//返回当前CU的位置x
UInt uiAQUPosY = pcCU->getCUPelY() / pcAQLayer->getAQPartHeight();//返回当前CU的位置y
UInt uiAQUStride = pcAQLayer->getAQPartStride();
TEncQPAdaptationUnit* acAQU = pcAQLayer->getQPAdaptationUnit();
Double dMaxQScale = pow(2.0, m_pcEncCfg->getQPAdaptationRange()/6.0);
Double dAvgAct = pcAQLayer->getAvgActivity();
Double dCUAct = acAQU[uiAQUPosY * uiAQUStride + uiAQUPosX].getActivity();
Double dNormAct = (dMaxQScale*dCUAct + dAvgAct) / (dCUAct + dMaxQScale*dAvgAct);
Double dQpOffset = log(dNormAct) / log(2.0) * 6.0;
iQpOffset = Int(floor( dQpOffset + 0.49999 ));
}
return Clip3(-pcCU->getSlice()->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQp+iQpOffset );
}
iMaxQP与iMinQp的控制
在xComputeQP函数值改变iQpOffset之后,编码出来的视频并没有改变,发现是后面的xCompressCu中的iMaxQP与iMinQp的控制限制了量化参数之前的改变。
Int iBaseQP = xComputeQP( rpcBestCU, uiDepth );// 获得当前Cu的Qp,传入当前CU和深度,计算对当前CU的QP;如果不是对每个CU自适应的改变QP,则直接用之前slice算出的QP
cout << "xCompressCu:" << iBaseQP << endl;
Int iMinQP;
Int iMaxQP;
Bool isAddLowestQP = false;// 是否增加最小的量化步长
const UInt numberValidComponents = rpcBestCU->getPic()->getNumberValidComponents(); // 获取成分数量,如果色度格式是CHROMA_400,数量为1,反之为3(最大)
// 计算最小/最大的量化步长
cout << "uiDepth:" << uiDepth << " getMaxCuDQPDepth:" << pps.getMaxCuDQPDepth() << endl;
if( uiDepth <= pps.getMaxCuDQPDepth() )// uiDepth表示CU深度,pps中获得的是配置文件中MaxCuDQPDepth
{
Int idQP = m_pcEncCfg->getMaxDeltaQP();// 配置文件中MaxDeltaQP
iMinQP = Clip3( -sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP-idQP );
iMaxQP = Clip3( -sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP+idQP );
//cout << "idQP:" << idQP << " iMinQP:" << iMinQP << " iMaxQP:" << iMaxQP << endl;
}
else
{
// cout << rpcTempCU->getQP()<<endl;
iMinQP = rpcTempCU->getQP(0);
iMaxQP = rpcTempCU->getQP(0);
//cout << "1 iMinQP:" << iMinQP << " iMaxQP:" << iMaxQP << endl;
}
if ( m_pcEncCfg->getUseRateCtrl() )//是否使用码率控制
{
iMinQP = m_pcRateCtrl->getRCQP();
iMaxQP = m_pcRateCtrl->getRCQP();
//cout << "2 iMinQP:" << iMinQP << " iMaxQP:" << iMaxQP << endl;
}
在[iMinQp,iMaxQp]的范围内遍历QP,选取最优的Qp(没有看懂这里,而且也不明白最终选择的最优Qp到底是什么,代码太长了不贴完)
if ( !bBoundary )// 如果不在边界
{
for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++)// 在之前确定的QP范围中枚举QP/遍历每一个QP,执行下面的步骤,选取最优QP
{
const Bool bIsLosslessMode = isAddLowestQP && (iQP == iMinQP);
if (bIsLosslessMode)
{
iQP = lowestQP;
}
m_cuChromaQpOffsetIdxPlus1 = 0;
if (pcSlice->getUseChromaQpAdj())
{
经过和学长的讨论,发现后面如何在[iMinQp,iMaxQp]之间挑选Qp,不明白也没有关系,因为他是RDO一块的内容。是在iBaseQp的附近进行优化的,之前改iBaseQp的思路是正确的,编码后的视频Qp没有改变是由于在下面这段代码中。
(1)当CU深度(深度0代表大小为64*64)<=配置文件中MaxCuDQPDepth(CU量化最大深度)时,就使idQp= 配置文件中MaxDeltaQP(iMaxQP =iBaseQP+idQP,iMinQP =iBaseQP-idQP)。
(2)之前运行编码未改变,是因为配置文件中这两个参数都为0。当uiDepth 为0时,iMinQp=iMaxQp=iBaseQp,虽然iBaseQp在xComputeQp中改变了,但是继续分下去都会进入else语句,此时iMaxQp=iMinQp=getQp(0)(getQp(0)还不知道是哪里的Qp)。
(3)总结:只要不进入else语句,就不会使最终使用的Qp为getQp(0)。函数getQp(uiIdx)返回的是m_phQP[uiIdx],m_phQP是一个矩阵,这个矩阵的值由setQP设置,查找setQp的引用,发现setQp将m_iQp这个变量设置为m_phQP中的值。继续查找m_iQp引用发现m_iQp=int(m_fQp)
,继续查找m_fQp引用发现m_fQp是有初始值定义的,语句为("QP,q", m_fQP, 30.0, "Qp value, if value is float, QP is switched once during encoding")
这句英文对于初学者而言也较难理解,但是有大佬做的文档关于浮点Qp可以作为参考。
if( uiDepth <= pps.getMaxCuDQPDepth() )// uiDepth表示CU深度,pps中获得的是配置文件中MaxCuDQPDepth
{
Int idQP = m_pcEncCfg->getMaxDeltaQP();// 配置文件中MaxDeltaQP
iMinQP = Clip3( -sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP-idQP );
iMaxQP = Clip3( -sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP+idQP );
cout << "idQP:" << idQP << " iMinQP:" << iMinQP << " iMaxQP:" << iMaxQP << endl;
}
else
{
// cout << rpcTempCU->getQP()<<endl;
iMinQP = rpcTempCU->getQP(0);
iMaxQP = rpcTempCU->getQP(0);
cout << "1 iMinQP:" << iMinQP << " iMaxQP:" << iMaxQP << endl;
}
戏剧得一幕来了,偶然间将前文所提到得两个参数改回0,发现else语句中iMinQP与iMaxQP的值发生改变,没有继续限制ioffset的改变,即getQP(0)的值发生了改变。但最后编码的视频没有改变。说明之前分析的原因错误,回到原点。
跟踪数据流发现,rpc->temp里面的m_phQP保持在之前xComputeQp返回的值,直到经过xCheckRDCostIntra函数,会还原在配置文件中的QP值。在xCheckRDCostIntra函数内部,Qp值发生突变的地方位于m_pcPredSearch->estIntraPredLumaQT函数,进入函数内部有一个模块:
//===== set QP and clear Cbf =====
if ( pps.getUseDQP() == true)//是否允许在CU层改变QP大小,原本是true我给改掉了,这个命令获取到的参数来自于pps中的cu_qp_delta_enabled_flag
{
pcCU->setQPSubParts( pcCU->getQP(0), 0, uiDepth );
}
else
{
pcCU->setQPSubParts( pcCU->getSlice()->getSliceQp(), 0, uiDepth );
}
由代码可知,如果 pps.getUseDQP() == true,就把使用的QP值改为getQP(0)(xComputeQp返回的值),否则用slice的Qp。pps.getUseDQP() 返回的是pps中的cu_qp_delta_enabled_flag,书上说这个语法元素指定是否允许在CU层调整Qp。
它的定义代码如下:
Void TEncTop::xInitPPS()
{
m_cPPS.setConstrainedIntraPred( m_bUseConstrainedIntraPred );
Bool bUseDQP = (getMaxCuDQPDepth() > 0)? true : false;
if((getMaxDeltaQP() != 0 )|| getUseAdaptiveQP())
{
bUseDQP = true;
}
if (m_costMode==COST_SEQUENCE_LEVEL_LOSSLESS || m_costMode==COST_LOSSLESS_CODING)
{
bUseDQP=false;
}
if ( m_RCEnableRateControl )
{
m_cPPS.setUseDQP(true);
m_cPPS.setMaxCuDQPDepth( 0 );
}
else if(bUseDQP)
{
m_cPPS.setUseDQP(true);
m_cPPS.setMaxCuDQPDepth( m_iMaxCuDQPDepth );
}
else
{
m_cPPS.setUseDQP(false);
m_cPPS.setMaxCuDQPDepth( 0 );
}
根据代码
Bool bUseDQP = (getMaxCuDQPDepth() > 0)? true : false;
我把配置文件中的MaxCuDQPDepth改成了3(把这里填成3,码流分析出来diff_cu_qp_delta_depth就是3,书上说表示给定QG的大小),至此,所有的m_phQP都变成了xComputeQP的返回值,但是最后编码出来的视频和我将配置文件里的Qp改成相同值编码出的视频相比质量更好。
模仿码率控制改变CU的量化参数
上文的引用部分是开启了多QP优化功能 ,使CU层能够改变Qp。但没有必要使用这个功能,直接在代码里面改就可以。 m_cPPS.setUseDQP(true);
这个语句就是将pps参数改变,使之能够允许在CU层改变QP。
Void TEncTop::xInitPPS()
{
m_cPPS.setConstrainedIntraPred( m_bUseConstrainedIntraPred );
Bool bUseDQP = (getMaxCuDQPDepth() > 0)? true : false;
if((getMaxDeltaQP() != 0 )|| getUseAdaptiveQP())
{
bUseDQP = true;
}
if (m_costMode==COST_SEQUENCE_LEVEL_LOSSLESS || m_costMode==COST_LOSSLESS_CODING)
{
bUseDQP=false;
}
if ( m_RCEnableRateControl )
{
m_cPPS.setUseDQP(true);
m_cPPS.setMaxCuDQPDepth( 0 );
}
else if(bUseDQP)
{
m_cPPS.setUseDQP(true);
m_cPPS.setMaxCuDQPDepth( m_iMaxCuDQPDepth );
}
else
{
m_cPPS.setUseDQP(false);
m_cPPS.setMaxCuDQPDepth( 0 );
}
//my code 设置CU层允许改变Qp*****************
if (true)
{
m_cPPS.setUseDQP(true);
}
//my***********
经过码流分析,发现编码的QP值的确是xComputeQP的值,但是CU的划分更加细小了,所以也导致编码出来的视频质量更好。
为探求原因,寻求老师帮助,从老师那里获取到RDO是以CTU(64*64)为单位进行下面的划分的,故尝试控制编码CTU的参数,参考rateComtrol的代码,进行代码更改。因为内容简单,不想定义一个类,故直接借用TEncRateCtrl这个类,添加了两个函数与一个data,都是模仿TEncRateCtrl这个类控制CTU的QP参数改的。
class TEncRateCtrl
{
public:
TEncRateCtrl();
~TEncRateCtrl();
public:
Void init( Int totalFrames, Int targetBitrate, Int frameRate, Int GOPSize, Int picWidth, Int picHeight, Int LCUWidth, Int LCUHeight, Int keepHierBits, Bool useLCUSeparateModel, GOPEntry GOPList[MAX_GOP] );
Void destroy();
Void initRCPic( Int frameLevel );
Void initRCGOP( Int numberOfPictures );
Void destroyRCGOP();
public:
//my code
Void setMyCtuQP(Int QP) { m_CtuQP = QP; }
Int getMyCtuQP() { return m_CtuQP; }
//my code
Void setRCQP ( Int QP ) { m_RCQP = QP; }
Int getRCQP () { return m_RCQP; }
TEncRCSeq* getRCSeq() { assert ( m_encRCSeq != NULL ); return m_encRCSeq; }
TEncRCGOP* getRCGOP() { assert ( m_encRCGOP != NULL ); return m_encRCGOP; }
TEncRCPic* getRCPic() { assert ( m_encRCPic != NULL ); return m_encRCPic; }
list<TEncRCPic*>& getPicList() { return m_listRCPictures; }
#if U0132_TARGET_BITS_SATURATION
Bool getCpbSaturationEnabled() { return m_CpbSaturationEnabled; }
UInt getCpbState() { return m_cpbState; }
UInt getCpbSize() { return m_cpbSize; }
UInt getBufferingRate() { return m_bufferingRate; }
Int updateCpbState(Int actualBits);
Void initHrdParam(const TComHRD* pcHrd, Int iFrameRate, Double fInitialCpbFullness);
#endif
private:
TEncRCSeq* m_encRCSeq;
TEncRCGOP* m_encRCGOP;
TEncRCPic* m_encRCPic;
list<TEncRCPic*> m_listRCPictures;
//my
Int m_CtuQP;
//my
Int m_RCQP;
#if U0132_TARGET_BITS_SATURATION
Bool m_CpbSaturationEnabled; // Enable target bits saturation to avoid CPB overflow and underflow
Int m_cpbState; // CPB State
UInt m_cpbSize; // CPB size
UInt m_bufferingRate; // Buffering rate
#endif
};
#endif
在CompressSlice函数中 ,在rateComtrol起作用的位置前,模仿其控制Ctu量化参数的方法,加了如下代码:
//my code*************
if (ctuRsAddr <10)//如果Ctu的序号小于10
{
m_pcRateCtrl->setMyCtuQP(37)
}
else{
m_pcRateCtrl->setMyCtuQP(pcSlice->getSliceQp());
}
//my*****************
if ( m_pcCfg->getUseRateCtrl() )
{
Int estQP = pcSlice->getSliceQp();
Double estLambda = -1.0;
Double bpp = -1.0;
现在仅仅是将TEncRateCtrl中的数据m_CtuQP的值改为了我们想要的Qp,但还未应用。接下来在rateComtrol参数的应用位置,模仿做同样的应用。在xCompressCu函数中:
//my code
if (true)//每一次递归进来都使用我们设置的Qp
{
iMinQP = m_pcRateCtrl->getMyCtuQP();
iMaxQP = m_pcRateCtrl->getMyCtuQP();
cout << "1 iMinQP:" << iMinQP << " iMaxQP:" << iMaxQP << endl;
}
//
if ( m_pcEncCfg->getUseRateCtrl() )//是否使用码率控制
{
iMinQP = m_pcRateCtrl->getRCQP();
iMaxQP = m_pcRateCtrl->getRCQP();
cout << "2 iMinQP:" << iMinQP << " iMaxQP:" << iMaxQP << endl;
}
通过码流分析,发现在cfg文件中Qp设置32在代码中改为45编码出来的图像质量仍然高于在cfg文中设置45编码出的图像质量。又回到了原点。
为验证更改代码正确性,将cfg文件中Qp设置45而代码中也改为45编码出的图像 与 cfg文件中Qp设置45而代码中不改编码出的图像 比较,发现完全一样。至此,说明代码更改没有问题。
码流分析显示,cfg文件中Qp设置32而代码中改为45这种情况下编码出的图像的最终CU划分得更细致。非常幸运,我找到原因,在RDO中会根据cfg中QP参数修改λ,从而会根据配置文件里面的cfg定一个基础的质量,后面在CU层更改量化参数,由于λ的改变,会导致CU划分更细致,而TU不会超过CU。
改变λ
λ的值由Qp设置,在initEncSlice函数里面有根据Qp的值以及帧的类型设置λ,这里我不是很明白,但是通过调试看数据的变化,选取了必要的I帧的λ设置代码复制对每个CTU进行编码的代码中去,所以最终代码如下:
Double oldLambda = m_pcRdCost->getLambda();
//my******************
if (ctuRsAddr <100)//如果Ctu的序号小于100
{
Int iQp_ctu = 37;
Int SHIFT_QP1 = 12;
Int bitdepth_luma_qp_scale1 = 0;
Double qp_temp1 = (Double)iQp_ctu + bitdepth_luma_qp_scale1 - SHIFT_QP1;
Double dLambda = 0.57*pow(2.0, qp_temp1 / 3.0);
m_pcRateCtrl->setMyCtuQP(iQp_ctu);
pcSlice->setSliceQp(iQp_ctu);
setUpLambda(pcSlice, dLambda, iQp_ctu);
//m_pcRdCost->setLambda(dLambda, pcSlice->getSPS()->getBitDepths());
cout << "setLambda:" << dLambda<<endl;
}
else{
m_pcRateCtrl->setMyCtuQP(pcSlice->getSliceQp());
}
//my*****************
到目前为止,将cfg中QP设置为37,代码中CTUQP也设置为37与图像不经过CTU修改QP的代码编码出的图像(称为图像1)完全一致。但将cfg参数改为32,CTU QP设置为37,编码出的图像(称为2)与前文图像1的CU划分不完全一样,但图像质量人眼效果看起来类似。
xQuant
接下来转到函数 xQuant(···)的定义
Void TComTrQuant::xQuant( TComTU &rTu,
TCoeff * pSrc,
TCoeff * pDes,
#if ADAPTIVE_QP_SELECTION
TCoeff *pArlDes,//自适应后的变换系数
#endif
TCoeff &uiAbsSum,
const ComponentID compID,
const QpParam &cQP )
{
const TComRectangle &rect = rTu.getRect(compID);
const UInt uiWidth = rect.width;
const UInt uiHeight = rect.height;
TComDataCU* pcCU = rTu.getCU();
const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
const Int channelBitDepth = pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID));
TCoeff* piCoef = pSrc;//系数
TCoeff* piQCoef = pDes;//变换后系数
// 自适应量化系数选择
#if ADAPTIVE_QP_SELECTION
TCoeff* piArlCCoef = pArlDes;
#endif
const Bool useTransformSkip = pcCU->getTransformSkip(uiAbsPartIdx, compID);
const Int maxLog2TrDynamicRange = pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID));
//是否跳过了变换
Bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_useRDOQ;
if ( useRDOQ && (isLuma(compID) || RDOQ_CHROMA) )//RDOQ
{
#if T0196_SELECTIVE_RDOQ
if ( !m_useSelectiveRDOQ || xNeedRDOQ( rTu, piCoef, compID, cQP ) )
{
#endif
#if ADAPTIVE_QP_SELECTION
xRateDistOptQuant( rTu, piCoef, pDes, pArlDes, uiAbsSum, compID, cQP );// 量化函数,重要!!!!!!!!!!!!!!!!!!!!!!!!!!
#else
xRateDistOptQuant( rTu, piCoef, pDes, uiAbsSum, compID, cQP );// 量化函数,重要!!!!!!!!!!!!!!!!!!!!!!!!!!
#endif
#if T0196_SELECTIVE_RDOQ
}
else
{
memset( pDes, 0, sizeof( TCoeff ) * uiWidth *uiHeight );
uiAbsSum = 0;
}
#endif
}
else
{
TUEntropyCodingParameters codingParameters;
getTUEntropyCodingParameters(codingParameters, rTu, compID);
const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
TCoeff deltaU[MAX_TU_SIZE * MAX_TU_SIZE];
const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
assert(scalingListType < SCALING_LIST_NUM);
Int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrSize-2);
const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
*/
// Represents scaling through forward transform
Int iTransformShift = getTransformShift(channelBitDepth, uiLog2TrSize, maxLog2TrDynamicRange);
if (useTransformSkip && pcCU->getSlice()->getSPS()->getSpsRangeExtension().getExtendedPrecisionProcessingFlag())
{
iTransformShift = std::max<Int>(0, iTransformShift);
}
const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
// QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
#if ADAPTIVE_QP_SELECTION
Int iQBitsC = MAX_INT;
Int iAddC = MAX_INT;
if (m_bUseAdaptQpSelect)
{
iQBitsC = iQBits - ARL_C_PRECISION;
iAddC = 1 << (iQBitsC-1);
}
#endif
const Int iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
const Int qBits8 = iQBits - 8;
for( Int uiBlockPos = 0; uiBlockPos < uiWidth*uiHeight; uiBlockPos++ )
{
const TCoeff iLevel = piCoef[uiBlockPos];
const TCoeff iSign = (iLevel < 0 ? -1: 1);
const Int64 tmpLevel = (Int64)abs(iLevel) * (enableScalingLists ? piQuantCoeff[uiBlockPos] : defaultQuantisationCoefficient);
#if ADAPTIVE_QP_SELECTION
if( m_bUseAdaptQpSelect )
{
piArlCCoef[uiBlockPos] = (TCoeff)((tmpLevel + iAddC ) >> iQBitsC);
}
#endif
const TCoeff quantisedMagnitude = TCoeff((tmpLevel + iAdd ) >> iQBits);
deltaU[uiBlockPos] = (TCoeff)((tmpLevel - (quantisedMagnitude<<iQBits) )>> qBits8);
uiAbsSum += quantisedMagnitude;
const TCoeff quantisedCoefficient = quantisedMagnitude * iSign;
piQCoef[uiBlockPos] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );
} // for n
if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
{
if(uiAbsSum >= 2) //this prevents TUs with only one coefficient of value 1 from being tested
{
signBitHidingHDQ( piQCoef, piCoef, deltaU, codingParameters, maxLog2TrDynamicRange ) ;
}
}
} //if RDOQ
//return;
}
xRateDistOptQuant
接下来重要的函数是xRateDistOptQuant(···),转到其定义,这个函数很长。
Void TComTrQuant::xRateDistOptQuant ( TComTU &rTu,
TCoeff * plSrcCoeff,
TCoeff * piDstCoeff,
#if ADAPTIVE_QP_SELECTION
TCoeff * piArlDstCoeff,
#endif
TCoeff &uiAbsSum,
const ComponentID compID,
const QpParam &cQP )
{
// 带率失真优化的量化函数
const TComRectangle & rect = rTu.getRect(compID);
const UInt uiWidth = rect.width;
const UInt uiHeight = rect.height;
TComDataCU * pcCU = rTu.getCU();
const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
const ChannelType channelType = toChannelType(compID);
const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
const Bool extendedPrecision = pcCU->getSlice()->getSPS()->getSpsRangeExtension().getExtendedPrecisionProcessingFlag();
const Int maxLog2TrDynamicRange = pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID));
const Int channelBitDepth = rTu.getCU()->getSlice()->getSPS()->getBitDepth(channelType);
/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
*/
// Represents scaling through forward transform
Int iTransformShift = getTransformShift(channelBitDepth, uiLog2TrSize, maxLog2TrDynamicRange);
if ((pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && extendedPrecision)
{
iTransformShift = std::max<Int>(0, iTransformShift);
}
const Bool bUseGolombRiceParameterAdaptation = pcCU->getSlice()->getSPS()->getSpsRangeExtension().getPersistentRiceAdaptationEnabledFlag();
const UInt initialGolombRiceParameter = m_pcEstBitsSbac->golombRiceAdaptationStatistics[rTu.getGolombRiceStatisticsIndex(compID)] / RExt__GOLOMB_RICE_INCREMENT_DIVISOR;
UInt uiGoRiceParam = initialGolombRiceParameter;
Double d64BlockUncodedCost = 0;
const UInt uiLog2BlockWidth = g_aucConvertToBit[ uiWidth ] + 2;
const UInt uiLog2BlockHeight = g_aucConvertToBit[ uiHeight ] + 2;
const UInt uiMaxNumCoeff = uiWidth * uiHeight;
assert(compID<MAX_NUM_COMPONENT);
Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
assert(scalingListType < SCALING_LIST_NUM);
#if ADAPTIVE_QP_SELECTION
memset(piArlDstCoeff, 0, sizeof(TCoeff) * uiMaxNumCoeff);
#endif
Double pdCostCoeff [ MAX_TU_SIZE * MAX_TU_SIZE ];
Double pdCostSig [ MAX_TU_SIZE * MAX_TU_SIZE ];
Double pdCostCoeff0[ MAX_TU_SIZE * MAX_TU_SIZE ];
memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
Int rateIncUp [ MAX_TU_SIZE * MAX_TU_SIZE ];
Int rateIncDown [ MAX_TU_SIZE * MAX_TU_SIZE ];
Int sigRateDelta[ MAX_TU_SIZE * MAX_TU_SIZE ];
TCoeff deltaU [ MAX_TU_SIZE * MAX_TU_SIZE ];
memset( rateIncUp, 0, sizeof(Int ) * uiMaxNumCoeff );
memset( rateIncDown, 0, sizeof(Int ) * uiMaxNumCoeff );
memset( sigRateDelta, 0, sizeof(Int ) * uiMaxNumCoeff );
memset( deltaU, 0, sizeof(TCoeff) * uiMaxNumCoeff );
const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
const Double *const pdErrScale = getErrScaleCoeff(scalingListType, (uiLog2TrSize-2), cQP.rem);
const Int *const piQCoef = getQuantCoeff(scalingListType, cQP.rem, (uiLog2TrSize-2));
const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
const Double defaultErrorScale = getErrScaleCoeffNoScalingList(scalingListType, (uiLog2TrSize-2), cQP.rem);
const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
#if ADAPTIVE_QP_SELECTION
Int iQBitsC = iQBits - ARL_C_PRECISION;
Int iAddC = 1 << (iQBitsC-1);
#endif
TUEntropyCodingParameters codingParameters;
getTUEntropyCodingParameters(codingParameters, rTu, compID);
const UInt uiCGSize = (1 << MLS_CG_SIZE);
Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
Int iCGLastScanPos = -1;
UInt uiCtxSet = 0;
Int c1 = 1;
Int c2 = 0;
Double d64BaseCost = 0;
Int iLastScanPos = -1;
UInt c1Idx = 0;
UInt c2Idx = 0;
Int baseLevel;
memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
Int iScanPos;
coeffGroupRDStats rdStats;
const UInt significanceMapContextOffset = getSignificanceMapContextOffset(compID);
for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
{
UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];
UInt uiCGPosY = uiCGBlkPos / codingParameters.widthInGroups;
UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * codingParameters.widthInGroups);
memset( &rdStats, 0, sizeof (coeffGroupRDStats));
const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups);
for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
{// 这里是实际进行量化的地方
iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
//===== quantization =====
UInt uiBlkPos = codingParameters.scan[iScanPos];
// set coeff
const Int quantisationCoefficient = (enableScalingLists) ? piQCoef [uiBlkPos] : defaultQuantisationCoefficient;
const Double errorScale = (enableScalingLists) ? pdErrScale[uiBlkPos] : defaultErrorScale;
const Int64 tmpLevel = Int64(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;
const Intermediate_Int lLevelDouble = (Intermediate_Int)min<Int64>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (iQBits - 1)));
#if ADAPTIVE_QP_SELECTION
if( m_bUseAdaptQpSelect )
{
piArlDstCoeff[uiBlkPos] = (TCoeff)(( lLevelDouble + iAddC) >> iQBitsC );
}
#endif
const UInt uiMaxAbsLevel = std::min<UInt>(UInt(entropyCodingMaximum), UInt((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));
const Double dErr = Double( lLevelDouble );
pdCostCoeff0[ iScanPos ] = dErr * dErr * errorScale;
d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
{
iLastScanPos = iScanPos;
uiCtxSet = getContextSetIndex(compID, (iScanPos >> MLS_CG_SIZE), 0);
iCGLastScanPos = iCGScanPos;
}
if ( iLastScanPos >= 0 )
{
//===== coefficient level estimation =====
UInt uiLevel;
UInt uiOneCtx = (NUM_ONE_FLAG_CTX_PER_SET * uiCtxSet) + c1;
UInt uiAbsCtx = (NUM_ABS_FLAG_CTX_PER_SET * uiCtxSet) + c2;
if( iScanPos == iLastScanPos )
{
uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
lLevelDouble, uiMaxAbsLevel, significanceMapContextOffset, uiOneCtx, uiAbsCtx, uiGoRiceParam,
c1Idx, c2Idx, iQBits, errorScale, 1, extendedPrecision, maxLog2TrDynamicRange
);
}
else
{
UShort uiCtxSig = significanceMapContextOffset + getSigCtxInc( patternSigCtx, codingParameters, iScanPos, uiLog2BlockWidth, uiLog2BlockHeight, channelType );
uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
c1Idx, c2Idx, iQBits, errorScale, 0, extendedPrecision, maxLog2TrDynamicRange
);
sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
}
deltaU[ uiBlkPos ] = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));
if( uiLevel > 0 )
{
Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, maxLog2TrDynamicRange );
rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
}
else // uiLevel == 0
{
rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
}
piDstCoeff[ uiBlkPos ] = uiLevel;
d64BaseCost += pdCostCoeff [ iScanPos ];
baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
if( uiLevel >= baseLevel )
{
if (uiLevel > 3*(1<<uiGoRiceParam))
{
uiGoRiceParam = bUseGolombRiceParameterAdaptation ? (uiGoRiceParam + 1) : (std::min<UInt>((uiGoRiceParam + 1), 4));
}
}
if ( uiLevel >= 1)
{
c1Idx ++;
}
//===== update bin model =====
if( uiLevel > 1 )
{
c1 = 0;
c2 += (c2 < 2);
c2Idx ++;
}
else if( (c1 < 3) && (c1 > 0) && uiLevel)
{
c1++;
}
//===== context set update =====
if( ( iScanPos % uiCGSize == 0 ) && ( iScanPos > 0 ) )
{
uiCtxSet = getContextSetIndex(compID, ((iScanPos - 1) >> MLS_CG_SIZE), (c1 == 0)); //(iScanPos - 1) because we do this **before** entering the final group
c1 = 1;
c2 = 0;
c1Idx = 0;
c2Idx = 0;
uiGoRiceParam = initialGolombRiceParameter;
}
}
else
{
d64BaseCost += pdCostCoeff0[ iScanPos ];
}
rdStats.d64SigCost += pdCostSig[ iScanPos ];
if (iScanPosinCG == 0 )
{
rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
}
if (piDstCoeff[ uiBlkPos ] )
{
uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
if ( iScanPosinCG != 0 )
{
rdStats.iNNZbeforePos0++;
}
}
} //end for (iScanPosinCG)
if (iCGLastScanPos >= 0)
{
if( iCGScanPos )
{
if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
{
UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );
d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
}
else
{
if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
{
if ( rdStats.iNNZbeforePos0 == 0 )
{
d64BaseCost -= rdStats.d64SigCost_0;
rdStats.d64SigCost -= rdStats.d64SigCost_0;
}
// rd-cost if SigCoeffGroupFlag = 0, initialization
Double d64CostZeroCG = d64BaseCost;
// add SigCoeffGroupFlag cost to total cost
UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );
if (iCGScanPos < iCGLastScanPos)
{
d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
}
// try to convert the current coeff group from non-zero to all-zero
d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
// if we can save cost, change this block to all-zero block
if ( d64CostZeroCG < d64BaseCost )
{
uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
d64BaseCost = d64CostZeroCG;
if (iCGScanPos < iCGLastScanPos)
{
pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
}
// reset coeffs to 0 in this block
for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
{
iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
UInt uiBlkPos = codingParameters.scan[ iScanPos ];
if (piDstCoeff[ uiBlkPos ])
{
piDstCoeff [ uiBlkPos ] = 0;
pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
pdCostSig [ iScanPos ] = 0;
}
}
} // end if ( d64CostAllZeros < d64BaseCost )
}
} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
}
else
{
uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
}
}
} //end for (iCGScanPos)
//===== estimate last position =====
if ( iLastScanPos < 0 )
{
return;
}
Double d64BestCost = 0;
Int ui16CtxCbf = 0;
Int iBestLastIdxP1 = 0;
if( !pcCU->isIntra( uiAbsPartIdx ) && isLuma(compID) && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
{
ui16CtxCbf = 0;
d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
}
else
{
ui16CtxCbf = pcCU->getCtxQtCbf( rTu, channelType );
ui16CtxCbf += getCBFContextOffset(compID);
d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
}
Bool bFoundLast = false;
for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
{
UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];
d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
{
for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
{
iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
if (iScanPos > iLastScanPos)
{
continue;
}
UInt uiBlkPos = codingParameters.scan[iScanPos];
if( piDstCoeff[ uiBlkPos ] )
{
UInt uiPosY = uiBlkPos >> uiLog2BlockWidth;
UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth );
Double d64CostLast= codingParameters.scanType == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, compID ) : xGetRateLast( uiPosX, uiPosY, compID );
Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
if( totalCost < d64BestCost )
{
iBestLastIdxP1 = iScanPos + 1;
d64BestCost = totalCost;
}
if( piDstCoeff[ uiBlkPos ] > 1 )
{
bFoundLast = true;
break;
}
d64BaseCost -= pdCostCoeff[ iScanPos ];
d64BaseCost += pdCostCoeff0[ iScanPos ];
}
else
{
d64BaseCost -= pdCostSig[ iScanPos ];
}
} //end for
if (bFoundLast)
{
break;
}
} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
} // end for
for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
{
Int blkPos = codingParameters.scan[ scanPos ];
TCoeff level = piDstCoeff[ blkPos ];
uiAbsSum += level;
piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
}
//===== clean uncoded coefficients =====
for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
{
piDstCoeff[ codingParameters.scan[ scanPos ] ] = 0;
}
if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
{
const Double inverseQuantScale = Double(g_invQuantScales[cQP.rem]);
Int64 rdFactor = (Int64)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per))
/ m_dLambda / 16 / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth - 8)))
+ 0.5);
Int lastCG = -1;
Int absSum = 0 ;
Int n ;
for( Int subSet = (uiWidth*uiHeight-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- )
{
Int subPos = subSet << MLS_CG_SIZE;
Int firstNZPosInCG=uiCGSize , lastNZPosInCG=-1 ;
absSum = 0 ;
for(n = uiCGSize-1; n >= 0; --n )
{
if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
{
lastNZPosInCG = n;
break;
}
}
for(n = 0; n <uiCGSize; n++ )
{
if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
{
firstNZPosInCG = n;
break;
}
}
for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
{
absSum += Int(piDstCoeff[ codingParameters.scan[ n + subPos ]]);
}
if(lastNZPosInCG>=0 && lastCG==-1)
{
lastCG = 1;
}
if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
{
UInt signbit = (piDstCoeff[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1);
if( signbit!=(absSum&0x1) ) // hide but need tune
{
// calculate the cost
Int64 minCostInc = std::numeric_limits<Int64>::max(), curCost = std::numeric_limits<Int64>::max();
Int minPos = -1, finalChange = 0, curChange = 0;
for( n = (lastCG==1?lastNZPosInCG:uiCGSize-1) ; n >= 0; --n )
{
UInt uiBlkPos = codingParameters.scan[ n + subPos ];
if(piDstCoeff[ uiBlkPos ] != 0 )
{
Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];
Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
- ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
{
costDown -= (4<<15);
}
if(costUp<costDown)
{
curCost = costUp;
curChange = 1;
}
else
{
curChange = -1;
if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
{
curCost = std::numeric_limits<Int64>::max();
}
else
{
curCost = costDown;
}
}
}
else
{
curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
curChange = 1 ;
if(n<firstNZPosInCG)
{
UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
if(thissignbit != signbit )
{
curCost = std::numeric_limits<Int64>::max();
}
}
}
if( curCost<minCostInc)
{
minCostInc = curCost;
finalChange = curChange;
minPos = uiBlkPos;
}
}
if(piDstCoeff[minPos] == entropyCodingMaximum || piDstCoeff[minPos] == entropyCodingMinimum)
{
finalChange = -1;
}
if(plSrcCoeff[minPos]>=0)
{
piDstCoeff[minPos] += finalChange ;
}
else
{
piDstCoeff[minPos] -= finalChange ;
}
}
}
if(lastCG==1)
{
lastCG=0 ;
}
}
}
}
xquant()中还有一个重要函数getQuantCoeff()//获取量化矩阵中的系数
在HM16.7里面翻
CU的划分
因为每一次划分CU之后,经过RDO过程,都会经历一次量化,所以为了确定图像中某个位置的量化参数,还要弄明白一帧中的CU具体划分过程。
CU划分原理
参考自CTU、CU、PU、TU简介
传统的视频编码都是基于宏块实现的,对于4:2:0采样格式的视频,个宏块包含一个16×16大小的亮度块和两个8×8大小的色度块。考虑到高清视频超高清视频的自身特性,H.265/HEVC标准中引入了树形编码单元CTU,其尺寸由编码器指定,且可大于宏块尺寸。同一位置处的个亮度CTB和两个色度CTB,再加上相应的语法元素形成一个CTU。
在H265HEVC中,一幅图像可以被划分为若干个互不重叠的CTU,在CTU内部,采用基于四叉树的循环分层结构。同一层次上的编码单元具有相同的分割深度。一个CTU可能只包含一个CU(没有进行划分),也可能被划分为多个CU。
编码单元是否继续被划分取决于分割标志位 Split flag。
①编码单元的大小可以大于传统的宏块大小(16×16)。对于平坦区域,用一个较大的编码单元编码可以减少所用的比特数,提高编码效率。这一点在高清视频应用领域体现得尤为明显。
②通过合理地选择CTU大小和最大层次深度,编码器的编码结构可以根据不同的图片内容、图片大小以及应用需求获得较大程度的优化
③所有的单元类型都统称为编码单元,消除了宏块与亚宏块之分,并且编码单元的结构可以根据CTU大小、最大编码深度以及一系列划分标志 Split flag简单地表示出来。
CU经过PU(预测单元)与TU(变换单元)才能到达量化过程。
变换单元是独立完成变换和量化的基本单元,其尺寸也是灵活变化的。H265/HEVC突破了原有的变换尺寸限制,可支持大小为4×4~32×32的编码变换,以变换单元(TU)为基本单元进行变换和量化。它的大小依赖于CU模式,在一个CU内,允许TU跨越多个PU,以四叉树的形式递归划分。对于一个2N×2N的CU,有一个标志位决定其是否划分为4个NXN的TU,是否可以进一步划分由SPS中的TU的最大划分深度决定。根据预测残差的局部变化特性,TU可以自适应地选择最优的模式。大块的TU模式能够将能量更好地集中,小块的TU模式能够保存更多的图像细节。 这种灵活的分割结构,可以使变换后的残差能量得到充分压缩,以进一步提高编码增益。
RDO
{
if( rpcTempCU->getTotalCost() < rpcBestCU->getTotalCost() )
{
TComYuv* pcYuv;
// Change Information data
TComDataCU* pcCU = rpcBestCU;
rpcBestCU = rpcTempCU;
rpcTempCU = pcCU;
// Change Prediction data
pcYuv = m_ppcPredYuvBest[uiDepth];
m_ppcPredYuvBest[uiDepth] = m_ppcPredYuvTemp[uiDepth];
m_ppcPredYuvTemp[uiDepth] = pcYuv;
// Change Reconstruction data
pcYuv = m_ppcRecoYuvBest[uiDepth];
m_ppcRecoYuvBest[uiDepth] = m_ppcRecoYuvTemp[uiDepth];
m_ppcRecoYuvTemp[uiDepth] = pcYuv;
pcYuv = NULL;
pcCU = NULL;
// store temp best CI for next CU coding
m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]->store(m_pppcRDSbacCoder[uiDepth][CI_NEXT_BEST]);
#if DEBUG_STRING
DEBUG_STRING_SWAP(sParent, sTest)
const PredMode predMode=rpcBestCU->getPredictionMode(0);
if ((DebugOptionList::DebugString_Structure.getInt()&DebugStringGetPredModeMask(predMode)) && bAddSizeInfo)
{
std::stringstream ss(stringstream::out);
ss <<"###: " << (predMode==MODE_INTRA?"Intra ":"Inter ") << partSizeToString[rpcBestCU->getPartitionSize(0)] << " CU at " << rpcBestCU->getCUPelX() << ", " << rpcBestCU->getCUPelY() << " width=" << UInt(rpcBestCU->getWidth(0)) << std::endl;
sParent+=ss.str();
}
#endif
}
}
getRefQP函数:获取QG的预测QP值
QG是指将一副图像分成大小固定的正方形像素块,其大小由pps指定。其大小包含最大CU和最小CU。这个主要是解码CU的时候用得着,编码得时候用不着。deltaQp是编码时,记录下来,函数是codeDeltaQP
SChar TComDataCU::getRefQP( UInt uiCurrAbsIdxInCtu ) const
{
UInt lPartIdx = MAX_UINT;
UInt aPartIdx = MAX_UINT;
const TComDataCU* cULeft = getQpMinCuLeft ( lPartIdx, m_absZIdxInCtu + uiCurrAbsIdxInCtu );//获取左边QG的QP_Y
const TComDataCU* cUAbove = getQpMinCuAbove(aPartIdx, m_absZIdxInCtu + uiCurrAbsIdxInCtu); // 获取上边QG的QP_Y
return (((cULeft? cULeft->getQP( lPartIdx ): getLastCodedQP( uiCurrAbsIdxInCtu )) + (cUAbove? cUAbove->getQP( aPartIdx ): getLastCodedQP( uiCurrAbsIdxInCtu )) + 1) >> 1);//返回上面两个QP_Y的和的一半
}
一个QG的亮度QP(QPY )是由预测QP(predQP)和预测误差(deltaQP)相加得到:
Q
P
Y
=
p
r
e
d
Q
P
+
d
e
l
t
a
Q
P
QP_Y =predQP+deltaQP
QPY=predQP+deltaQP
当前QG的predQP基本上来自于上方和和左方的QG的QPY :
q
P
Y
P
R
E
D
=
(
q
P
Y
A
+
q
P
Y
B
)
>
>
1
qPY_{PRED} = (qPY_A + qPY_B)>>1
qPYPRED = (qPYA + qPYB)>>1
qPYA代表上方的QG的QPY,qPYB代表左方的QG的QPY。细节见HEVC获取参考QP。