HEVC中的mv预测技术主要分为两种:AMVP mode 和Merge mode,这里就主要来聊聊Merge mode。
由于很多时候当前的编码CU与相邻已经完成编码的CU具有相同的mv,因此从相邻的若干个PU中获取最佳的mv就是Merge mode要做的主要事情:
- 通过搜索相邻PU的motion information(包括reference index和mv)是否可用,可得时加入MergeCandidateList,作为motion information的候选。
- 逐个服复用MergeCandidateList中的motion information,计算残差。
- 模型选择最终选定Merge mode为预测模式,则将Merge index和相应的残差写入码流。
这次要讨论的getInterMergeCandidates()函数主要就是建立MergeCandidateList的过程。在x265的源码中,不同的preset对应的MergeCandidate个数是不一样的。从ultrafast到medium的maxNumMergeCand为2,slow和slower为3,veryslow为4,placebo为5。不同的candidate个数会对编码计算复杂度和压缩效率产生影响,需要根据应用场景进行调整。
为了获得更好的压缩效果,在获取MergeCandidateList的过程中,需要考虑以下几点:
- 加入MergeCandidateList的motion information的次序会对编码产生影响。放在candidate list中靠前位置的motion information应该是最有可能被选中的块。这样在对merge index进行变长编码时就会占用最小的bits。经过大量的试验表明,周边PU的搜索顺序为left(A1)->above(B1)->above right(B0)->left bottom(A0)->above left(B2)->temporal->填充candidate。
- Candidate的质量也会有影响,体现在有效的Candidate的个数。在getInterMergeCandidates()函数中,有pruning的过程来筛选掉重复的Candidate。pruning的算法为B1与A1做pruning,B0与B1做pruning,A0与A1做pruning,B2与A1和B1做pruning。这样可以减少candidate list中重复的motion information,将有效的放在list前面。
在3D-HEVC中MergeCandidateList的获取过程中就加入了disparity motion vector作为candidate之一,也是提高了candidate的质量,从而提高编码效果。
下面是x265中getInterMergeCandidates()函数的源代码和注释,供大家参考。
/* Construct list of merging candidates, returns count */
uint32_t CUData::getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField(*candMvField)[2], uint8_t* candDir) const
{
uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;//计算当前CU的绝对位置
const bool isInterB = m_slice->isInterB();
//最大的MergeCandidate个数,默认为2,其中slow和slower模式为3,veryslow模式为4,placebo模式为5
const uint32_t maxNumMergeCand = m_slice->m_maxNumMergeCand;
//assert(m_slice->m_maxNumMergeCand == 2);
for (uint32_t i = 0; i < maxNumMergeCand; ++i)//初始化Mergecandidates的mv和refidx为0
{
candMvField[i][0].mv = 0;
candMvField[i][1].mv = 0;
candMvField[i][0].refIdx = REF_NOT_VALID;
candMvField[i][1].refIdx = REF_NOT_VALID;
}
/* calculate the location of upper-left corner pixel and size of the current PU */
int xP, yP, nPSW, nPSH;
int cuSize = 1 << m_log2CUSize[0];//获取CU的大小
int partMode = m_partSize[0];//获取四叉树划分模式
int tmp = partTable[partMode][puIdx][0];
nPSW = ((tmp >> 4) * cuSize) >> 2;//PU的宽度
nPSH = ((tmp & 0xF) * cuSize) >> 2;//PU的高度
tmp = partTable[partMode][puIdx][1];
xP = ((tmp >> 4) * cuSize) >> 2;//PU top-left像素x坐标
yP = ((tmp & 0xF) * cuSize) >> 2;//PU top-left像素y坐标
if(xP == 16 || yP == 16)
int a = 0;
if(nPSW == 8 && nPSH == 8)
int b = 0;
uint32_t count = 0;
uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx);
PartSize curPS = (PartSize)m_partSize[absPartIdx];
//merge candidates的PU搜索顺序:left(A1)->above->above right->left bottom->above left->temporal->填充candidate
//按照统计规律,上述PU的排序按照与当前PU具备最佳mv的概率从高到低排列,被选中的candidate的index值越小,写入码流后占用的bit越小
//因此需要将选中概率大的PU放在candidate list的靠前位置
// left
uint32_t leftPartIdx = 0;
const CUData* cuLeft = getPULeft(leftPartIdx, partIdxLB);//获取left PU
//isavailableA1:A1是否可用,判断条件为:
//1. A1存在
//2. A1与当前coding PU不属于同一merge域(与当前coding PU不共用同一套merge info)
//3. 当前PU不能具备下列条件:index为1,划分方式为Nx2N,nLx2N,nrx2N
//4. A1为inter mode(这样才具备mv)
bool isAvailableA1 = cuLeft &&
cuLeft->isDiffMER(xP - 1, yP + nPSH - 1, xP, yP) &&
!(puIdx == 1 && (curPS == SIZE_Nx2N || curPS == SIZE_nLx2N || curPS == SIZE_nRx2N)) &&
cuLeft->isInter(leftPartIdx);
if (isAvailableA1)
{
// get Inter Dir
candDir[count] = cuLeft->m_interDir[leftPartIdx];//复用inter direction
// get Mv from Left
cuLeft->getMvField(cuLeft, leftPartIdx, 0, candMvField[count][0]);//P或者B的list0,复用mv
if (isInterB)
cuLeft->getMvField(cuLeft, leftPartIdx, 1, candMvField[count][1]);//B的list1,复用mv
if (++count == maxNumMergeCand)//MergeCand个数
return maxNumMergeCand;
}
deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT);
// above
uint32_t abovePartIdx = 0;
const CUData* cuAbove = getPUAbove(abovePartIdx, partIdxRT);//获取B1
bool isAvailableB1 = cuAbove &&
cuAbove->isDiffMER(xP + nPSW - 1, yP - 1, xP, yP) &&
!(puIdx == 1 && (curPS == SIZE_2NxN || curPS == SIZE_2NxnU || curPS == SIZE_2NxnD)) &&
cuAbove->isInter(abovePartIdx);
if (isAvailableB1 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAbove, abovePartIdx)))//与A1的motion info做互比,舍去相同的
{
// get Inter Dir
candDir[count] = cuAbove->m_interDir[abovePartIdx];
// get Mv from Left
cuAbove->getMvField(cuAbove, abovePartIdx, 0, candMvField[count][0]);
if (isInterB)
cuAbove->getMvField(cuAbove, abovePartIdx, 1, candMvField[count][1]);
if (++count == maxNumMergeCand)
return maxNumMergeCand;
}
// above right
uint32_t aboveRightPartIdx = 0;
const CUData* cuAboveRight = getPUAboveRight(aboveRightPartIdx, partIdxRT);
bool isAvailableB0 = cuAboveRight &&
cuAboveRight->isDiffMER(xP + nPSW, yP - 1, xP, yP) &&
cuAboveRight->isInter(aboveRightPartIdx);
if (isAvailableB0 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveRight, aboveRightPartIdx)))//与B1做互比
{
// get Inter Dir
candDir[count] = cuAboveRight->m_interDir[aboveRightPartIdx];
// get Mv from Left
cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 0, candMvField[count][0]);
if (isInterB)
cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 1, candMvField[count][1]);
if (++count == maxNumMergeCand)
return maxNumMergeCand;
}
// left bottom
uint32_t leftBottomPartIdx = 0;
const CUData* cuLeftBottom = this->getPUBelowLeft(leftBottomPartIdx, partIdxLB);
bool isAvailableA0 = cuLeftBottom &&
cuLeftBottom->isDiffMER(xP - 1, yP + nPSH, xP, yP) &&
cuLeftBottom->isInter(leftBottomPartIdx);
if (isAvailableA0 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuLeftBottom, leftBottomPartIdx)))//与A1互比
{
// get Inter Dir
candDir[count] = cuLeftBottom->m_interDir[leftBottomPartIdx];
// get Mv from Left
cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 0, candMvField[count][0]);
if (isInterB)
cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 1, candMvField[count][1]);
if (++count == maxNumMergeCand)
return maxNumMergeCand;
}
// above left
if (count < 4)
{
uint32_t aboveLeftPartIdx = 0;
const CUData* cuAboveLeft = getPUAboveLeft(aboveLeftPartIdx, absPartAddr);
bool isAvailableB2 = cuAboveLeft &&
cuAboveLeft->isDiffMER(xP - 1, yP - 1, xP, yP) &&
cuAboveLeft->isInter(aboveLeftPartIdx);
if (isAvailableB2 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAboveLeft, aboveLeftPartIdx))
&& (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveLeft, aboveLeftPartIdx)))//与A1和B1做互比
{
// get Inter Dir
candDir[count] = cuAboveLeft->m_interDir[aboveLeftPartIdx];
// get Mv from Left
cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 0, candMvField[count][0]);
if (isInterB)
cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 1, candMvField[count][1]);
if (++count == maxNumMergeCand)
return maxNumMergeCand;
}
}
//检查当前帧是否打开了temporal MVP,如果存在,则检查temporal mv是否可加入candidate list
if (m_slice->m_sps->bTemporalMVPEnabled)
{
uint32_t partIdxRB = deriveRightBottomIdx(puIdx);
MV colmv;
int ctuIdx = -1;
// image boundary check
if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples &&
m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
{
uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
uint32_t numUnits = s_numPartInCUSize;
bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1); // is not at the last column of CTU
bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1); // is not at the last row of CTU
if (bNotLastCol && bNotLastRow)//如果不是当前CTU的最后一行、最后一列,则取当前PU的right bottom的address
{
absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE + 1];
ctuIdx = m_cuAddr;
}
//如果是最后一行,但不是最后一列,则取下一行CTU中对应当前PU的right bottom的address
//注意,这里并没有把CTU的index更新,这里为了在编码时避免CTU跨行,在这种情况下直接取中心位置的PU的mv作为candidate
else if (bNotLastCol)
absPartAddr = g_rasterToZscan[(absPartIdxRB + 1) & (numUnits - 1)];
else if (bNotLastRow)//如果是最后一列,但不是最后一行,则取下一个CU的中对应当前PU的right bottom的address
{
absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE - numUnits + 1];
ctuIdx = m_cuAddr + 1;
}
else // is the right bottom corner of CTU如果是当前CTU的右下角,
absPartAddr = 0;
}
int maxList = isInterB ? 2 : 1;
int dir = 0, refIdx = 0;
for (int list = 0; list < maxList; list++)
{
bool bExistMV = ctuIdx >= 0 && getColMVP(colmv, refIdx, list, ctuIdx, absPartAddr);//取MVP
if (!bExistMV)//如果没有取成功,则取中央位置PU的mvp,这里对应上述的CTU跨行的问题
{
uint32_t partIdxCenter = deriveCenterIdx(puIdx);
bExistMV = getColMVP(colmv, refIdx, list, m_cuAddr, partIdxCenter);
}
if (bExistMV)//如果取成功,将mvp加入candidate list
{
dir |= (1 << list);
candMvField[count][list].mv = colmv;
candMvField[count][list].refIdx = refIdx;
}
}
if (dir != 0)
{
candDir[count] = (uint8_t)dir;
if (++count == maxNumMergeCand)
return maxNumMergeCand;
}
}
//当前如果是interB时,之前的candidate个数大于1,则按照已制定好的优先级组合已有的candidate info,形成一个新的candidate,并加入list
if (isInterB)
{
const uint32_t cutoff = count * (count - 1);
uint32_t priorityList0 = 0xEDC984; // { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 }
uint32_t priorityList1 = 0xB73621; // { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 }
for (uint32_t idx = 0; idx < cutoff; idx++, priorityList0 >>= 2, priorityList1 >>= 2)
{
int i = priorityList0 & 3;
int j = priorityList1 & 3;
if ((candDir[i] & 0x1) && (candDir[j] & 0x2))
{
// get Mv from cand[i] and cand[j]
int refIdxL0 = candMvField[i][0].refIdx;
int refIdxL1 = candMvField[j][1].refIdx;
int refPOCL0 = m_slice->m_refPOCList[0][refIdxL0];
int refPOCL1 = m_slice->m_refPOCList[1][refIdxL1];
if (!(refPOCL0 == refPOCL1 && candMvField[i][0].mv == candMvField[j][1].mv))
{
candMvField[count][0].mv = candMvField[i][0].mv;
candMvField[count][0].refIdx = refIdxL0;
candMvField[count][1].mv = candMvField[j][1].mv;
candMvField[count][1].refIdx = refIdxL1;
candDir[count] = 3;
if (++count == maxNumMergeCand)
return maxNumMergeCand;
}
}
}
}
//上述的过程选出的candidate个数如果还小于maxNumMergeCand,则补充0
int numRefIdx = (isInterB) ? X265_MIN(m_slice->m_numRefIdx[0], m_slice->m_numRefIdx[1]) : m_slice->m_numRefIdx[0];
int r = 0;
int refcnt = 0;
while (count < maxNumMergeCand)
{
candDir[count] = 1;
candMvField[count][0].mv.word = 0;
candMvField[count][0].refIdx = r;
if (isInterB)
{
candDir[count] = 3;
candMvField[count][1].mv.word = 0;
candMvField[count][1].refIdx = r;
}
count++;
if (refcnt == numRefIdx - 1)
r = 0;
else
{
++r;
++refcnt;
}
}
return count;
}