HEVC的帧内预测的架构分为三个步骤:
①构建参考像素数组;②生成预测像素;③后处理操作。
HEVC标准将这三个步骤进行了精密设计,以求达到较高的编码效率,同时降低编码和解码端的运算要求。HEVC标准的多种预定义的预测模式构成一个模式集合,可以对包括视频和静态图像中的多种内容进行预测建模的方法。HEVC的角度预测提供了对包含方向性纹理的物体更高的预测精确度,此外平面和DC模式可以高效地表示图像的平滑区域。
接下来记录一下我对HEVC中帧内预测部分代码的理解,不正确的地方望指教
编码的入口函数是encmain.cpp文件中的main函数,调用encode函数进行编码
Void TAppEncTop::encode()
{
fstream bitstreamFile(m_bitstreamFileName.c_str(), fstream::binary | fstream::out); //以二进制输出方式打开比特流文件
if (!bitstreamFile) //判断比特流文件是否存在,若bitstreamFile为空则输出错误提示并退出程序
{
fprintf(stderr, "\nfailed to open bitstream file `%s' for writing\n", m_bitstreamFileName.c_str());
exit(EXIT_FAILURE);
}
TComPicYuv* pcPicYuvOrg = new TComPicYuv; //定义YUV类
TComPicYuv* pcPicYuvRec = NULL;
// initialize internal class & member variables 初始化内部类和成员变量
xInitLibCfg(); //初始化编码器的参数
xCreateLib(); //创建视频源文件以及编码重建后的二进制视频文件和程序的连接,初始化GOP、Slice、CU的部分对象函数
xInitLib(m_isField); //初始化SPS、PPS,GOP、Slice、CU的部分对象函数,变换和量化类,编码器搜索类函数
printChromaFormat(); //打印输入和输出的YUV格式
// main encoder loop 初始化编码器中的部分变量
Int iNumEncoded = 0; //记录已编码帧数
Bool bEos = false; //控制编码是否结束
const InputColourSpaceConversion ipCSC = m_inputColourSpaceConvert;
const InputColourSpaceConversion snrCSC = (!m_snrInternalColourSpace) ? m_inputColourSpaceConvert : IPCOLOURSPACE_UNCHANGED;
list<AccessUnit> outputAccessUnits; ///< list of access units to write out. is populated by the encoding process
TComPicYuv cPicYuvTrueOrg;
// allocate original YUV buffer 为原始YUV缓冲区分配内存空间
if( m_isField )
{
pcPicYuvOrg->create ( m_iSourceWidth, m_iSourceHeightOrg, m_chromaFormatIDC, m_uiMaxCUWidth, m_uiMaxCUHeight, m_uiMaxTotalCUDepth, true );
cPicYuvTrueOrg.create(m_iSourceWidth, m_iSourceHeightOrg, m_chromaFormatIDC, m_uiMaxCUWidth, m_uiMaxCUHeight, m_uiMaxTotalCUDepth, true);
}
else
{
pcPicYuvOrg->create ( m_iSourceWidth, m_iSourceHeight, m_chromaFormatIDC, m_uiMaxCUWidth, m_uiMaxCUHeight, m_uiMaxTotalCUDepth, true );
cPicYuvTrueOrg.create(m_iSourceWidth, m_iSourceHeight, m_chromaFormatIDC, m_uiMaxCUWidth, m_uiMaxCUHeight, m_uiMaxTotalCUDepth, true );
}
while ( !bEos ) //由bEos控制,对视频帧进行编码
{
// get buffers 设置缓冲区
xGetBuffer(pcPicYuvRec);
// read input YUV file 读入输入的YUV文件
m_cTVideoIOYuvInputFile.read( pcPicYuvOrg, &cPicYuvTrueOrg, ipCSC, m_aiPad, m_InputChromaFormatIDC, m_bClipInputVideoToRec709Range );
// increase number of received frames 接收到帧数自增
m_iFrameRcvd++;
bEos = (m_isField && (m_iFrameRcvd == (m_framesToBeEncoded >> 1) )) || ( !m_isField && (m_iFrameRcvd == m_framesToBeEncoded) );
Bool flush = 0;
// if end of file (which is only detected on a read failure) flush the encoder of any queued pictures 文件读取完成刷新编码器中的图像队列
if (m_cTVideoIOYuvInputFile.isEof())
{
flush = true;
bEos = true;
m_iFrameRcvd--;
m_cTEncTop.setFramesToBeEncoded(m_iFrameRcvd);
}
// call encoding function for one frame 调用编码器函数对单帧进行编码
if ( m_isField )
{
m_cTEncTop.encode( bEos, flush ? 0 : pcPicYuvOrg, flush ? 0 : &cPicYuvTrueOrg, snrCSC, m_cListPicYuvRec, outputAccessUnits, iNumEncoded, m_isTopFieldFirst );
}
else
{
m_cTEncTop.encode( bEos, flush ? 0 : pcPicYuvOrg, flush ? 0 : &cPicYuvTrueOrg, snrCSC, m_cListPicYuvRec, outputAccessUnits, iNumEncoded );
}
// write bistream to file if necessary 当iNumEncoded>0时写入比特流文件
if ( iNumEncoded > 0 )
{
xWriteOutput(bitstreamFile, iNumEncoded, outputAccessUnits);
outputAccessUnits.clear();
}
// temporally skip frames
if( m_temporalSubsampleRatio > 1 )
{
m_cTVideoIOYuvInputFile.skipFrames(m_temporalSubsampleRatio-1, m_iSourceWidth - m_aiPad[0], m_iSourceHeight - m_aiPad[1], m_InputChromaFormatIDC);
}
}
m_cTEncTop.printSummary(m_isField); //打印编码结果统计信息
// delete original YUV buffer 删除原始YUV缓冲区
pcPicYuvOrg->destroy();
delete pcPicYuvOrg;
pcPicYuvOrg = NULL;
// delete used buffers in encoder class 删除编码器类使用的缓冲区
m_cTEncTop.deletePicBuffer();
cPicYuvTrueOrg.destroy();
// delete buffers & classes 删除缓冲区和类
xDeleteBuffer();
xDestroyLib();
printRateSummary(); //打印总比特率信息
return;
}
TAppEncTop::encode函数的处理流程是读入m_iGOPSize大小的帧统一处理,调用下面函数
Void TEncTop::encode( Bool flush, TComPicYuv* pcPicYuvOrg, TComPicYuv* pcPicYuvTrueOrg, const InputColourSpaceConversion snrCSC, TComList<TComPicYuv*>& rcListPicYuvRecOut, std::list<AccessUnit>& accessUnitsOut, Int& iNumEncoded )
{
if (pcPicYuvOrg != NULL)
{
// get original YUV 获取原始YUV
TComPic* pcPicCurr = NULL;
xGetNewPicBuffer( pcPicCurr ); //给当前图像分配新的缓冲区
pcPicYuvOrg->copyToPic( pcPicCurr->getPicYuvOrg() ); //将pcPicYuvOrg的信息赋给当前图像
pcPicYuvTrueOrg->copyToPic( pcPicCurr->getPicYuvTrueOrg() ); //将pcPicYuvTrueOrg的信息赋给当前图像
// compute image characteristics //计算图像的特征
if ( getUseAdaptiveQP() ) //如果使用自适应QP,则调用TEncPreanalyzer::xPreanalyze来分析图像并计算用于QP自适应的局部图像特征
{
m_cPreanalyzer.xPreanalyze( dynamic_cast<TEncPic*>( pcPicCurr ) );
}
}
if ((m_iNumPicRcvd == 0) || (!flush && (m_iPOCLast != 0) && (m_iNumPicRcvd != m_iGOPSize) && (m_iGOPSize != 0)))
{
iNumEncoded = 0;
return;
}
if ( m_RCEnableRateControl ) //若使用m_RCEnableRateControl,则对GOP进行初始化
{
m_cRateCtrl.initRCGOP( m_iNumPicRcvd );
}
// compress GOP 调用TEncGop::compressGOP压缩GOP
m_cGOPEncoder.compressGOP(m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, accessUnitsOut, false, false, snrCSC, m_printFrameMSE);
if ( m_RCEnableRateControl ) //若使用了m_RCEnableRateControl,则需要消灭之前初始化的GOP
{
m_cRateCtrl.destroyRCGOP();
}
iNumEncoded = m_iNumPicRcvd;
m_iNumPicRcvd = 0;
m_uiNumAllPicCoded += iNumEncoded;
}
/**------------------------------------------------
Separate interlaced frame into two fields
-------------------------------------------------**/
Void separateFields(Pel* org, Pel* dstField, UInt stride, UInt width, UInt height, Bool isTop)
{
if (!isTop)
{
org += stride;
}
for (Int y = 0; y < height>>1; y++)
{
for (Int x = 0; x < width; x++)
{
dstField[x] = org[x];
}
dstField += stride;
org += stride*2;
}
}
TEncTop::encode函数调用下面函数处理GOP
// compress GOP
m_cGOPEncoder.compressGOP(m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, accessUnitsOut, false, false, snrCSC, m_printFrameMSE);
TEncGOP::compressGOP函数的处理流程是遍历GOP中的每一帧,处理每一帧的Slice
m_pcSliceEncoder->compressSlice ( pcPic, false, false );
TEncSlice::compressSlice函数是对Slice中的每一个CTU(64x64)进行处理
for( UInt ctuTsAddr = startCtuTsAddr; ctuTsAddr < boundingCtuTsAddr; ++ctuTsAddr )
{
...
// initialize CTU encoder
TComDataCU* pCtu = pcPic->getCtu( ctuRsAddr );
pCtu->initCtu( pcPic, ctuRsAddr );
...
// run CTU trial encoder
m_pcCuEncoder->compressCtu( pCtu );
...
m_uiPicTotalBits += pCtu->getTotalBits();
m_dPicRdCost += pCtu->getTotalCost();
m_uiPicDist += pCtu->getTotalDistortion();
}
TEncCu::compressCtu函数就是调用xCompressCU函数处理CU,其中最优的CU划分存储在m_ppcBestCU[0]变量中
xCompressCU( m_ppcBestCU[0], m_ppcTempCU[0], 0 DEBUG_STRING_PASS_INTO(sDebug) );
对于帧内预测,TEncCu::xCompressCU函数的处理流程是先判断当前CU是否到边界,如果不到,则进行帧内预测的处理;然后判断当前CU是否可以继续划分CU,若可以,则划分成4个CU,递归调用xCompressCU函数进行处理
#if AMP_ENC_SPEEDUP
//对当前CU计算最好代价
//对当前CU的子块继续递归调用xCompressCU
Void TEncCu::xCompressCU( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, const UInt uiDepth DEBUG_STRING_FN_DECLARE(sDebug_), PartSize eParentPartSize )
#else
Void TEncCu::xCompressCU( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, const UInt uiDepth )
#endif
{
TComPic* pcPic = rpcBestCU->getPic(); //获取当前CU的图像
DEBUG_STRING_NEW(sDebug)
const TComPPS &pps=*(rpcTempCU->getSlice()->getPPS()); //获取图像参数集
const TComSPS &sps=*(rpcTempCU->getSlice()->getSPS()); //获取序列参数集
// These are only used if getFastDeltaQp() is true
const UInt fastDeltaQPCuMaxSize = Clip3(sps.getMaxCUHeight()>>sps.getLog2DiffMaxMinCodingBlockSize(), sps.getMaxCUHeight(), 32u);
// get Original YUV data from picture 从图像中获取原始YUV数据
m_ppcOrigYuv[uiDepth]->copyFromPicYuv( pcPic->getPicYuvOrg(), rpcBestCU->getCtuRsAddr(), rpcBestCU->getZorderIdxInCtu() );
// variable for Cbf fast mode PU decision
Bool doNotBlockPu = true; //快速cbf标识
Bool earlyDetectionSkipMode = false; //early skip早期跳出标识
const UInt uiLPelX = rpcBestCU->getCUPelX(); //最左端点x坐标
const UInt uiRPelX = uiLPelX + rpcBestCU->getWidth(0) - 1; //最右端点x坐标
const UInt uiTPelY = rpcBestCU->getCUPelY(); //最上端点y坐标
const UInt uiBPelY = uiTPelY + rpcBestCU->getHeight(0) - 1; //最下端点y坐标
const UInt uiWidth = rpcBestCU->getWidth(0); //当前CU块宽度
//传入当前CU和深度,计算对当前CU的QP;如果不是对每个CU自适应的改变QP,则直接用之前slice算出的QP
Int iBaseQP = xComputeQP( rpcBestCU, uiDepth );
Int iMinQP;
Int iMaxQP;
Bool isAddLowestQP = false;
//获取成分数量,如果色度格式是CHROMA_400,数量为1,反之为3(最大)
const UInt numberValidComponents = rpcBestCU->getPic()->getNumberValidComponents();
if( uiDepth <= pps.getMaxCuDQPDepth() )
{
Int idQP = m_pcEncCfg->getMaxDeltaQP();
iMinQP = Clip3( -sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP-idQP );
iMaxQP = Clip3( -sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP+idQP );
}
else
{
iMinQP = rpcTempCU->getQP(0);
iMaxQP = rpcTempCU->getQP(0);
}
if ( m_pcEncCfg->getUseRateCtrl() )
{
iMinQP = m_pcRateCtrl->getRCQP();
iMaxQP = m_pcRateCtrl->getRCQP();
}
// transquant-bypass (TQB) processing loop variable initialisation ---
//根据当前的深度,是否使用码率控制,是否使用TQB(TransquantBypass模式),调整QP最大和最小的范围(iMinQP-iMaxQP)
const Int lowestQP = iMinQP; // For TQB, use this QP which is the lowest non TQB QP tested (rather than QP'=0) - that way delta QPs are smaller, and TQB can be tested at all CU levels.
if ( (pps.getTransquantBypassEnableFlag()) )
{
isAddLowestQP = true; // mark that the first iteration is to cost TQB mode.
iMinQP = iMinQP - 1; // increase loop variable range by 1, to allow testing of TQB mode along with other QPs
if ( m_pcEncCfg->getCUTransquantBypassFlagForceValue() )
{
iMaxQP = iMinQP;
}
}
TComSlice * pcSlice = rpcTempCU->getPic()->getSlice(rpcTempCU->getPic()->getCurrSliceIdx()); //获取当前所在slice
// 当前CU块的右边界在整个图像的最右边 或者 下边界在整个图像最下边 则为TRUE(即在边界)
const Bool bBoundary = !( uiRPelX < sps.getPicWidthInLumaSamples() && uiBPelY < sps.getPicHeightInLumaSamples() );
if ( !bBoundary ) //如果不在边界
{
for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++) //在之前确定的QP范围中枚举QP
{
const Bool bIsLosslessMode = isAddLowestQP && (iQP == iMinQP);
if (bIsLosslessMode)
{
iQP = lowestQP;
}
m_cuChromaQpOffsetIdxPlus1 = 0;
if (pcSlice->getUseChromaQpAdj())
{
/* Pre-estimation of chroma QP based on input block activity may be performed
* here, using for example m_ppcOrigYuv[uiDepth] */
/* To exercise the current code, the index used for adjustment is based on
* block position
*/
/*
/* 如果是TransquantBypass模式(这里用bIsLosslessMode布尔型标识)且如果当前枚举到最小QP,将其改为lowestQP
* 如果是自适应改变QP,设置相关的对最小编码块大小取Log的值、色度QP偏移量索引
*/
Int lgMinCuSize = sps.getLog2MinCodingBlockSize() +
std::max<Int>(0, sps.getLog2DiffMaxMinCodingBlockSize()-Int(pps.getPpsRangeExtension().getDiffCuChromaQpOffsetDepth()));
m_cuChromaQpOffsetIdxPlus1 = ((uiLPelX >> lgMinCuSize) + (uiTPelY >> lgMinCuSize)) % (pps.getPpsRangeExtension().getChromaQpOffsetListLen() + 1);
}
//使用CTU四叉树子层的deltaQP初始化预测数据,根据深度设置CU的宽度和高度,对QP赋值
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
// do inter modes, SKIP and 2Nx2N 做帧间预测,SKIP和2N*2N
if( rpcBestCU->getSlice()->getSliceType() != I_SLICE )
{
// 2Nx2N
if(m_pcEncCfg->getUseEarlySkipDetection()) //使用early skip早期跳出模式
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2Nx2N DEBUG_STRING_PASS_INTO(sDebug) ); //尝试用普通模式进行预测
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); //rpcBestCU保存性能最优的预测方