重叠块运动补偿(Overlapped Block Motion Compensation,OBMC)技术是在当前块运动补偿完成之后,将使用相邻块的运动信息进行运动补偿得到的块与当前预测块进行加权,主要用于修正CU的边界像素,可以降低方块效应。
不应用OBMC的条件如下所示:
- 在 SPS 级别禁用 OBMC时
- 当前块为帧内模式或 IBC 模式时
- 当前块应用 LIC 时
- 当前亮度块像素数小于或等于 32 时
对于基于子块的编码工具,提出使用子块OBMC,以使用相邻子块的运动信息将相同的加权融合应用于顶部、左侧、底部和右侧子块边界像素。
- Affine AMVP 模式;
- Affine Merge模式和基于子块的时间运动矢量预测 (SbTMVP);
- 基于子块的双边匹配。
1. OBMC计算
主要包括以下两种情况:
(1)CU边界
对于CU边界像素,OBMC对亮度修正边界的4个像素,对色度修正边界的1个像素。
CU边界左上角4x4块,其上相邻块和左相邻块均存在,如下图所示
此时OBMC加权的计算如下所示,其中C(x, y)为当前CU运动补偿得到的预测块的边界像素,L(x,y)为当前CU通过使用左侧相邻块的运动信息运动补偿得到的像素,T(x,y)为当前CU通过上侧相邻块的运动信息运动补偿得到的像素。
CU边界上侧/左侧(仅存在上相邻块或者左相邻块,以上侧为例),如下图所示
OBMC修正加权计算公式如下所示:
(2)Sub-CU边界
对于Sub-CU边界像素,OBMC对亮度分量修正边界的4个像素,对色度修正边界的2个像素。
对于CU内部的su-CU,存在四个方向的OBMC,如下图所示:
OBMC计算公式如下所示,其中C表示当前预测块,R/L/T/B是使用右侧/左侧/上侧/下侧运动信息运动补偿得到的像素。
2. 实现细节
在编码端,使用一个 L 缓冲区来存储当前 CU 中第一行和第一列块的 OBMC块。 存储在 L 缓冲区中的 OBMC 块可以在 CU 模式候选的不同率失真优化 (RDO) 过程共享。在实现中,在当前的CU中,对于第一行/列的每个4x4块,当这个4x4块第一次需要上/左块边界OBMC块时,它会做实际的4x4块MC以获得OBMC块并将它存储到这个 4x4 块缓冲区中。 对于不同CU模式候选的任何其他RDO进程,不会执行重复执行4x4块MC,可以重用存储的OBMC块。
当我们在上/左CU边界生成上/左块边界OBMC块时,如果相邻的OBMC块具有相同的运动信息(参考索引和运动矢量),它们可以一起进行MC生成OBMC块以节省编码时间和解码时间。 也就是说,可以将相同运动信息的相邻OBMC块的MC操作合并为一个更大的OBMC块的MC操作,以节省冗余内存访问。
为了减少行缓冲区大小,如果当前块位于 CTU 行边界,则 OBMC 的行数从 4 减少到 2。
3. 相关代码
在每次运动补偿完成之后,通过subBlockOBMC函数执行OBMC。具体流程如下:
- 对CU上侧边界执行OBMC
- 对CU左侧边界执行OBMC
- 如果不是子块模式(Affine/bdmvrRefine),则退出,否则继续对sub-CU的边界执行OBMC
代码及注释如下所示:
void InterPrediction::subBlockOBMC(PredictionUnit &pu, PelUnitBuf* pDst)
{
if (
pu.cs->sps->getUseOBMC() == false
|| pu.cu->obmcFlag == false
#if INTER_LIC
|| pu.cu->LICFlag
#endif
|| pu.lwidth() * pu.lheight() < 32
)
{
return;
}
const UnitArea orgPuArea = pu; // PU
PredictionUnit subPu = pu;
const uint32_t uiWidth = pu.lwidth(); // 宽度
const uint32_t uiHeight = pu.lheight(); // 高度
const uint32_t uiMinCUW = pu.cs->pcv->minCUWidth; // 最小CU单元
const uint32_t uiHeightInBlock = uiHeight / uiMinCUW; // 高度存在多少个最小CU
const uint32_t uiWidthInBlock = uiWidth / uiMinCUW; // 宽度存在多少个最小CU
#if MULTI_PASS_DMVR
const bool bSubMotion = pu.cu->affine || pu.bdmvrRefine; // 子块的OBMC
#else
const bool bSubMotion = pu.cu->affine || PU::checkDMVRCondition(pu);
#endif
MotionInfo NeighMi = MotionInfo();
int BcwIdx = pu.cu->BcwIdx;
bool affine = pu.cu->affine;
bool geo = pu.cu->geoFlag;
subPu.cu->affine = false;
subPu.cu->BcwIdx = BCW_DEFAULT;
subPu.cu->geoFlag = false;
#if INTER_LIC
subPu.cu->LICFlag = false;
#endif
subPu.ciipFlag = false;
#if TM_MRG
subPu.tmMergeFlag = false;
#endif
#if MULTI_PASS_DMVR
subPu.bdmvrRefine = false;
#endif
subPu.mvRefine = false;
subPu.mmvdMergeFlag = false;
PelUnitBuf pcYuvPred = pDst == nullptr ? pu.cs->getPredBuf(pu) : *pDst;
PelUnitBuf pcYuvTmpPredL0 = m_tmpObmcBufL0.subBuf(UnitAreaRelative(*pu.cu, pu));
PelUnitBuf pcYuvTmpPredT0 = m_tmpObmcBufT0.subBuf(UnitAreaRelative(*pu.cu, pu));
// 先遍历上侧边界,再遍历左侧边界
for (int iBlkBoundary = 0; iBlkBoundary < 2; iBlkBoundary++) // 0 - top; 1 - left
{
unsigned int uiLengthInBlock = ((iBlkBoundary == 0) ? uiWidthInBlock : uiHeightInBlock);
int iSub = 0, iState = 0;
while (iSub < uiLengthInBlock) // 遍历全部单元
{
int iLength = 0;
// 当前位置相对于左上角的偏移
Position curOffset = (iBlkBoundary == 0) ? Position(iSub * uiMinCUW, 0) : Position(0, iSub * uiMinCUW);
// iState = 0 表示相邻块不可用
// iState = 1表示相邻区域是帧内块或者相邻位置的运动信息和当前块的运动信息相同
// iState = 0或者iState = 1时不会进行OBMC
// iState = 2才会执行OBMC, iLength为相邻块运动信息相同区域长度, iSub是已经obmc的长度
iState = PU::getSameNeigMotion(pu, NeighMi, curOffset, iBlkBoundary, iLength, uiLengthInBlock - iSub);
if (iState == 2) // do OBMC 执行OBMC
{
subPu = NeighMi; // 边界子PU
if (iBlkBoundary == 0)
{
subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(orgPuArea.lumaPos().offset(iSub * uiMinCUW, 0), Size{ iLength*uiMinCUW, uiMinCUW })));
}
else
{
subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(orgPuArea.lumaPos().offset(0, iSub * uiMinCUW), Size{ uiMinCUW, iLength*uiMinCUW })));
}
const UnitArea predArea = UnitAreaRelative(orgPuArea, subPu);
PelUnitBuf cPred = pcYuvPred.subBuf(predArea);
PelUnitBuf cTmp1;
if (iBlkBoundary == 0)//above
{
cTmp1 = pcYuvTmpPredT0.subBuf(predArea);
}
else//left
{
cTmp1 = pcYuvTmpPredL0.subBuf(predArea);
}
xSubBlockMotionCompensation(subPu, cTmp1); // 利用相邻块的运动信息进行运动补偿,求出边界子PU的预测值
for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
{
xSubblockOBMC(ComponentID(compID), subPu, cPred, cTmp1, iBlkBoundary); // 对当前CU上侧边界像素和左侧边界预测像素进行修正
}
iSub += iLength;
}
else if (iState == 1 || iState == 3) // consecutive intra neighbors or skip OBMC based on MV similarity
{
iSub += iLength;
}
else // unavailable neighbors
{
iSub += uiLengthInBlock;
break;
}
}
CHECK(iSub != uiLengthInBlock, "not all sub-blocks are merged");
}
if (!bSubMotion) // 如果是非子块模式,则退出
{
pu.cu->BcwIdx = BcwIdx;
pu.cu->affine = affine;
pu.cu->geoFlag = geo;
return;
}
// 对于子块模式(Affine和bdmvrRefine)
PelUnitBuf pcYuvTmpPred = m_tmpSubObmcBuf;
PelUnitBuf cTmp1 = pcYuvTmpPred.subBuf(UnitArea(pu.chromaFormat, Area(0, 0, uiMinCUW, uiMinCUW)));
PelUnitBuf cTmp2 = pcYuvTmpPred.subBuf(UnitArea(pu.chromaFormat, Area(4, 0, uiMinCUW, uiMinCUW)));
PelUnitBuf cTmp3 = pcYuvTmpPred.subBuf(UnitArea(pu.chromaFormat, Area(8, 0, uiMinCUW, uiMinCUW)));
PelUnitBuf cTmp4 = pcYuvTmpPred.subBuf(UnitArea(pu.chromaFormat, Area(12, 0, uiMinCUW, uiMinCUW)));
PelUnitBuf zero = pcYuvTmpPred.subBuf(UnitArea(pu.chromaFormat, Area(16, 0, uiMinCUW, uiMinCUW)));
for (int iSubX = 0; iSubX < uiWidthInBlock; iSubX += 1)
{
for (int iSubY = 0; iSubY < uiHeightInBlock; iSubY += 1)
{
bool bCURBoundary = (iSubX == uiWidthInBlock - 1); // CU的右边界
bool bCUBBoundary = (iSubY == uiHeightInBlock - 1); // CU的下边界
// 子PU的尺寸(uiMinCUW, uiMinCUW)
subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(orgPuArea.lumaPos().offset(iSubX * uiMinCUW, iSubY * uiMinCUW), Size{ uiMinCUW, uiMinCUW })));
const UnitArea predArea = UnitAreaRelative(orgPuArea, subPu);
PelUnitBuf cPred = pcYuvPred.subBuf(predArea);
bool isAboveAvail = false, isLeftAvail = false, isBelowAvail = false, isRightAvail = false;
// 对于位于CU的上侧边界和左侧边界的子块,已经进行过OBMC,因此在此处不再进行
// 判断条件iSubY > 0与iSubX > 0
// above
if (iSubY)
{
// 上侧运动信息可用,可以进行OBMC
isAboveAvail = PU::getNeighborMotion(pu, NeighMi, Position(iSubX * uiMinCUW, iSubY * uiMinCUW), Size(uiMinCUW, uiMinCUW), 0);
if (isAboveAvail)
{
subPu = NeighMi;
xSubBlockMotionCompensation(subPu, cTmp1); // 运动补偿
}
}
// left
if (iSubX)
{
isLeftAvail = PU::getNeighborMotion(pu, NeighMi, Position(iSubX * uiMinCUW, iSubY * uiMinCUW), Size(uiMinCUW, uiMinCUW), 1);
if (isLeftAvail)
{
subPu = NeighMi;
xSubBlockMotionCompensation(subPu, cTmp2);
}
}
// 对于位于CU下边界与右边界的块,由于下相邻PU与右相邻PU还不可用,因此不进行OBMC
// below
if (!bCUBBoundary)
{
isBelowAvail = PU::getNeighborMotion(pu, NeighMi, Position(iSubX * uiMinCUW, iSubY * uiMinCUW), Size(uiMinCUW, uiMinCUW), 2);
if (isBelowAvail)
{
subPu = NeighMi;
xSubBlockMotionCompensation(subPu, cTmp3);
}
}
// right
if (!bCURBoundary)
{
isRightAvail = PU::getNeighborMotion(pu, NeighMi, Position(iSubX * uiMinCUW, iSubY * uiMinCUW), Size(uiMinCUW, uiMinCUW), 3);
if (isRightAvail)
{
subPu = NeighMi;
xSubBlockMotionCompensation(subPu, cTmp4);
}
}
if( isAboveAvail || isLeftAvail || isBelowAvail || isRightAvail )
{
for( int compID = 0; compID < MAX_NUM_COMPONENT; compID++ )
{
// OBMC加权融合
xSubblockOBMCBlending( ComponentID( compID ), subPu, cPred, isAboveAvail ? cTmp1: zero, isLeftAvail ? cTmp2: zero, isBelowAvail ? cTmp3: zero, isRightAvail ? cTmp4: zero, isAboveAvail, isLeftAvail, isBelowAvail, isRightAvail, true );
}
}
}
}
pu.cu->BcwIdx = BcwIdx;
pu.cu->affine = affine;
pu.cu->geoFlag = geo;
return;
}
xSubblockOBMC函数用于CU左侧边界/上侧边界像素执行OBMC的加权修正过程。
// Function for (weighted) averaging predictors of current block and predictors generated by applying neighboring motions to current block.
// 用于(加权)平均当前块的预测器和通过将相邻运动信息应用于当前块生成的预测器的函数。
// iDir 为0 (above)或者 1(left)
// bSubMotion为false
void InterPrediction::xSubblockOBMC(const ComponentID eComp, PredictionUnit &pu, PelUnitBuf &pcYuvPredDst, PelUnitBuf &pcYuvPredSrc, int iDir, bool bSubMotion)
{
int iWidth = pu.blocks[eComp].width;
int iHeight = pu.blocks[eComp].height;
if (iWidth == 0 || iHeight == 0)
{
return;
}
Pel* pOrgDst = pcYuvPredDst.bufs[eComp].buf;
Pel* pOrgSrc = pcYuvPredSrc.bufs[eComp].buf;
const int strideDst = pcYuvPredDst.bufs[eComp].stride;
const int strideSrc = pcYuvPredSrc.bufs[eComp].stride;
if (iDir == 0) //above
{
for (int i = 0; i < iWidth; i++)
{
Pel* pDst = pOrgDst;
Pel* pSrc = pOrgSrc;
pDst[i] = bSubMotion ? (3 * pDst[i] + pSrc[i] + 2) >> 2 : (26 * pDst[i] + 6 * pSrc[i] + 16) >> 5;
if (eComp == COMPONENT_Y)
{
pDst += strideDst;
pSrc += strideSrc;
pDst[i] = (7 * pDst[i] + pSrc[i] + 4) >> 3;
pDst += strideDst;
pSrc += strideSrc;
pDst[i] = (15 * pDst[i] + pSrc[i] + 8) >> 4;
if (!bSubMotion)
{
pDst += strideDst;
pSrc += strideSrc;
pDst[i] = (31 * pDst[i] + pSrc[i] + 16) >> 5;
}
}
}
}
if (iDir == 1) //left
{
Pel* pDst = pOrgDst;
Pel* pSrc = pOrgSrc;
for (int i = 0; i < iHeight; i++)
{
pDst[0] = bSubMotion ? (3 * pDst[0] + pSrc[0] + 2) >> 2 : (26 * pDst[0] + 6 * pSrc[0] + 16) >> 5;
if (eComp == COMPONENT_Y)
{
pDst[1] = (7 * pDst[1] + pSrc[1] + 4) >> 3;
pDst[2] = (15 * pDst[2] + pSrc[2] + 8) >> 4;
if (!bSubMotion)
{
pDst[3] = (31 * pDst[3] + pSrc[3] + 16) >> 5;
}
}
pDst += strideDst;
pSrc += strideSrc;
}
}
if (iDir == 2) //below
{
for (int i = 0; i < iWidth; i++)
{
Pel* pDst = pOrgDst + (iHeight - 1) * strideDst;
Pel* pSrc = pOrgSrc + (iHeight - 1) * strideSrc;
pDst[i] = (3 * pDst[i] + pSrc[i] + 2) >> 2;
if (eComp == COMPONENT_Y)
{
pDst -= strideDst;
pSrc -= strideSrc;
pDst[i] = (7 * pDst[i] + pSrc[i] + 4) >> 3;
pDst -= strideDst;
pSrc -= strideSrc;
pDst[i] = (15 * pDst[i] + pSrc[i] + 8) >> 4;
}
}
}
if (iDir == 3) //right
{
Pel* pDst = pOrgDst + (iWidth - 4);
Pel* pSrc = pOrgSrc + (iWidth - 4);
for (int i = 0; i < iHeight; i++)
{
pDst[3] = (3 * pDst[3] + pSrc[3] + 2) >> 2;
if (eComp == COMPONENT_Y)
{
pDst[2] = (7 * pDst[2] + pSrc[2] + 4) >> 3;
pDst[1] = (15 * pDst[1] + pSrc[1] + 8) >> 4;
}
pDst += strideDst;
pSrc += strideSrc;
}
}
}
xSubblockOBMCBlending函数用于进行sub-CU四个方向边界的加权修正过程。
void InterPrediction::xSubblockOBMCBlending(const ComponentID eComp, PredictionUnit &pu, PelUnitBuf &pcYuvPredDst, PelUnitBuf &pcYuvPredSrc1, PelUnitBuf &pcYuvPredSrc2, PelUnitBuf &pcYuvPredSrc3, PelUnitBuf &pcYuvPredSrc4, bool isAboveAvail, bool isLeftAvail, bool isBelowAvail, bool isRightAvail, bool bSubMotion)
{
int iWidth = pu.blocks[eComp].width;
int iHeight = pu.blocks[eComp].height;
if (iWidth == 0 || iHeight == 0)
{
return;
}
Pel* pOrgDst = pcYuvPredDst.bufs[eComp].buf;
Pel* pOrgSrc1 = pcYuvPredSrc1.bufs[eComp].buf;
Pel* pOrgSrc2 = pcYuvPredSrc2.bufs[eComp].buf;
Pel* pOrgSrc3 = pcYuvPredSrc3.bufs[eComp].buf;
Pel* pOrgSrc4 = pcYuvPredSrc4.bufs[eComp].buf;
const int strideDst = pcYuvPredDst.bufs[eComp].stride;
const int strideSrc = pcYuvPredSrc1.bufs[eComp].stride;
unsigned int isChroma = !isLuma( eComp );
unsigned int aboveWeight[4], leftWeight[4], belowWeight[4], rightWeight[4];
if( isAboveAvail )
{
memcpy( aboveWeight, defaultWeight[isChroma], sizeof( aboveWeight ) );
}
else
{
memset( aboveWeight, 0, sizeof( aboveWeight ) );
}
if( isLeftAvail )
{
memcpy( leftWeight, defaultWeight[isChroma], sizeof( leftWeight ) );
}
else
{
memset( leftWeight, 0, sizeof( leftWeight ) );
}
if( isBelowAvail )
{
memcpy( belowWeight, defaultWeight[isChroma], sizeof( belowWeight ) );
}
else
{
memset( belowWeight, 0, sizeof( belowWeight ) );
}
if( isRightAvail )
{
memcpy( rightWeight, defaultWeight[isChroma], sizeof( rightWeight ) );
}
else
{
memset( rightWeight, 0, sizeof( rightWeight ) );
}
unsigned int shift = 7;
unsigned int sumWeight = 1 << shift;
unsigned int add = 1 << (shift - 1);
Pel* pDst = pOrgDst;
Pel* pSrc1 = pOrgSrc1;
Pel* pSrc2 = pOrgSrc2;
Pel* pSrc3 = pOrgSrc3;
Pel* pSrc4 = pOrgSrc4;
if( isLuma( eComp ) ) // 亮度
{
for( int j = 0; j < iHeight; j++ )
{
unsigned int idx_h = iHeight - 1 - j;
for( int i = 0; i < iWidth; i++ )
{
unsigned int idx_w = iWidth - 1 - i;
unsigned int sumOBMCWeight = aboveWeight[j] + leftWeight[i] + belowWeight[idx_h] + rightWeight[idx_w];
if( sumOBMCWeight == 0 )
{
continue;
}
unsigned int currentWeight = sumWeight - sumOBMCWeight; // 当前块的权重
pDst[i] = (currentWeight * pDst[i] + aboveWeight[j] * pSrc1[i] + leftWeight[i] * pSrc2[i] + belowWeight[idx_h] * pSrc3[i] + rightWeight[idx_w] * pSrc4[i] + add) >> shift;
}
pDst += strideDst;
pSrc1 += strideSrc;
pSrc2 += strideSrc;
pSrc3 += strideSrc;
pSrc4 += strideSrc;
}
}
else // 色度,仅对左侧和上侧进行OBMC,并且仅对边界两行两列进行
{
pDst[0] = ((sumWeight - aboveWeight[0] - leftWeight[0]) * pDst[0] + aboveWeight[0] * pSrc1[0] + leftWeight[0] * pSrc2[0] + add) >> shift;
pDst[1] = ((sumWeight - aboveWeight[0] - rightWeight[0]) * pDst[1] + aboveWeight[0] * pSrc1[1] + rightWeight[0] * pSrc4[1] + add) >> shift;
pDst += strideDst;
pSrc2 += strideSrc;
pSrc3 += strideSrc;
pSrc4 += strideSrc;
pDst[0] = ((sumWeight - leftWeight[0] - belowWeight[0]) * pDst[0] + leftWeight[0] * pSrc2[0] + belowWeight[0] * pSrc3[0] + add) >> shift;
pDst[1] = ((sumWeight - belowWeight[0] - rightWeight[0]) * pDst[1] + belowWeight[0] * pSrc3[1] + rightWeight[0] * pSrc4[1] + add) >> shift;
}
}