ECM技术学习：解码端帧内模式推导(Decoder-side Intra Mode Derivation )

最新推荐文章于 2024-07-24 14:54:00 发布

涵小呆

最新推荐文章于 2024-07-24 14:54:00 发布

阅读量2.9k

点赞数 4

分类专栏： ECM 文章标签：算法视频编码 ECM VVC

本文链接：https://blog.csdn.net/BigDream123/article/details/121305710

版权

ECM 专栏收录该内容

15 篇文章 12 订阅

订阅专栏

解码端帧内模式推导（DIMD）技术是之前在VVC标准化的过程中提出的技术，因为其在解码端引入的复杂度较高，因此没有被VVC采纳。为了探索下一代压缩标准，JVET最近设立了最新的ECM参考平台，将DIMD技术包含了进去。

DIMD技术，称为解码端帧内模式推导，是通过使用求出当前块相邻像素的梯度，来推导当前块的角度模式，从而可以降低编码模式比特。在解码端，通过使用相同的推导方式推导角度模式，恢复出重建YUV。

1. 帧内模式推导

DIMD使用当前块相邻的Template区域的重建像素推导帧内角度模式，如下图所示，绿色区域为当前待预测块，蓝色区域为模板区域，其中T=3。 DIMD使用如右图所示的3x3水平sober滤波器和垂直sober滤波器模板中心的像素（即黄色像素）分别计算水平梯度Gx和垂直梯度Gy。之后通过atan(Gy/Gx)即可求出对应像素的角度，并将其转换为VVC中的65种角度模式IPM之一。之后计算Gx和Gy的绝对值之和作为该角度的累加幅度值。

对模板区域中心全部像素重复上述过程，可以得到一个帧内角度模式直方图。

2. 解码流程

加入DIMD技术后的帧内模式解码流程如下所示，对于帧内预测模式，先解析DIMD flag，如果DIMD flag 为1则进行帧内预测模式推导；否则，解析MIP/MRL/ISP等帧内模式标志。

3.计算预测值

通过角度模式直方图，可以得到两个幅度值最高的帧内预测模式IPM，将其和Planar模式的预测值进行加权融合，得到最终的预测值。其中Planar模式的权重固定为 21/64（~1/3）。然后，剩余的 43/64 (~2/3) 权重分配给两个IPM，两个IPM的权重和它们直方图的幅度值相关，如下图所示。

4. 相关代码

ECM中DIMD推导帧内模式的代码在deriveDimdMode函数中，基本流程如下：

分析模板区域可用像素
建立角度模式直方图：buildHistogram，选出幅度值最高的两个角度模式IPM
判断是否进行加权融合，如果需要进行加权融合则计算各个模式的权重

void IntraPrediction::deriveDimdMode(const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu)
{
  if( !cu.slice->getSPS()->getUseDimd() )
  {
    return;
  }

  int sigcnt = 0;
  const CodingStructure  &cs = *cu.cs;
  const SPS             &sps = *cs.sps;
  const PreCalcValues   &pcv = *cs.pcv;
  const ChannelType   chType = toChannelType(area.compID);

  const Pel *pReco = recoBuf.buf; // 当前CU的重建像素的指针，指向当前CU的左上角
  const uint32_t uiWidth = area.width;
  const uint32_t uiHeight = area.height;
  const int iStride = recoBuf.stride;
  const int predSize = (uiWidth << 1);
  const int predHSize = (uiHeight << 1);

  const bool noShift = pcv.noChroma2x2 && uiWidth == 4; // don't shift on the lowest level (chroma not-split)
  const int  unitWidth = pcv.minCUWidth >> (noShift ? 0 : getComponentScaleX(area.compID, sps.getChromaFormatIdc()));
  const int  unitHeight = pcv.minCUHeight >> (noShift ? 0 : getComponentScaleY(area.compID, sps.getChromaFormatIdc()));

  const int  totalAboveUnits = (predSize + (unitWidth - 1)) / unitWidth; // 上侧总单元数
  const int  totalLeftUnits = (predHSize + (unitHeight - 1)) / unitHeight; // 左侧总单元数
  const int  totalUnits = totalAboveUnits + totalLeftUnits + 1; //+1 for top-left
  const int  numAboveUnits = std::max<int>(uiWidth / unitWidth, 1);
  const int  numLeftUnits = std::max<int>(uiHeight / unitHeight, 1);
  const int  numAboveRightUnits = totalAboveUnits - numAboveUnits;
  const int  numLeftBelowUnits = totalLeftUnits - numLeftUnits;

  CHECK(numAboveUnits <= 0 || numLeftUnits <= 0 || numAboveRightUnits <= 0 || numLeftBelowUnits <= 0, "Size not supported");

  // ----- Step 1: analyze neighborhood ----- 分析相邻区域
  const Position posLT = area;

  bool  neighborFlags[4 * MAX_NUM_PART_IDXS_IN_CTU_WIDTH + 1];
  memset(neighborFlags, 0, totalUnits);
  // 上侧可用的单元数
  int numIntraAbove = isAboveAvailable(cu, chType, posLT, numAboveUnits, unitWidth, (neighborFlags + totalLeftUnits + 1));
  // 左侧可用的单元数
  int numIntraLeft = isLeftAvailable(cu, chType, posLT, numLeftUnits, unitHeight, (neighborFlags + totalLeftUnits - 1));

  // ----- Step 2: build histogram of gradients ----- 建立梯度直方图
  int piHistogram_clean[NUM_LUMA_MODE] = { 0 };
  
  if (numIntraLeft)
  {
    // 建立左侧像素的梯度直方图
    // 由于Sober算子的窗口是3，第一个减1表示左侧一列的最后一个像素无法用sober算子
    // 第二个减1是在上侧像素不可用的情况下左侧一列的第一个像素无法用sober算子
    uint32_t uiHeightLeft = numIntraLeft * unitHeight - 1 - (!numIntraAbove ? 1 : 0);
    const Pel *pRecoLeft = pReco - 2 + iStride * (!numIntraAbove ? 1 : 0); // 移动到第一个使用Sober算子的位置
    sigcnt += buildHistogram(pRecoLeft, iStride, uiHeightLeft, 1, piHistogram_clean, 1, uiWidth, uiHeight);
  }

  if (numIntraAbove)
  {
    // 建立上侧像素的梯度直方图
    // 由于Sober算子的窗口是3，第一个减1表示上一行的最后一个像素无法用sober算子
     // 第二个减1是在左侧像素不可用的情况下上侧一行的的第一个像素无法用sober算子
    uint32_t uiWidthAbove = numIntraAbove * unitWidth - 1 - (!numIntraLeft ? 1 : 0);
    const Pel *pRecoAbove = pReco - iStride * 2 + (!numIntraLeft ? 1 : 0);
    sigcnt += buildHistogram(pRecoAbove, iStride, 1, uiWidthAbove, piHistogram_clean, 2, uiWidth, uiHeight);
  }

  if (numIntraLeft && numIntraAbove)
  {
    // 建立左上角像素的梯度直方图
    const Pel *pRecoAboveLeft = pReco - 2 - iStride * 2;
    sigcnt += buildHistogram(pRecoAboveLeft, iStride, 2, 2, piHistogram_clean, 3, uiWidth, uiHeight);
  }

  int first_amp = 0, second_amp = 0, cur_amp = 0;
  int first_mode = 0, second_mode = 0, cur_mode = 0;
  // 遍历67种模式，选出幅度值最高的两种模式
  for (int i = 0; i < NUM_LUMA_MODE; i++)
  {
    cur_amp = piHistogram_clean[i];
    cur_mode = i;
    if (cur_amp > first_amp)
    {
      second_amp = first_amp;
      second_mode = first_mode;
      first_amp = cur_amp;
      first_mode = cur_mode;
    }
    else
    {
      if (cur_amp > second_amp)
      {
        second_amp = cur_amp;
        second_mode = cur_mode;
      }
    }
  }

  // ----- Step 3: derive best mode from histogram of gradients  根据HOG推导最佳模式-----
  cu.dimdMode = first_mode; // 存储最佳模式

  cu.dimd_is_blend = true;
  // 满足以下三种条件时才会融合
  cu.dimd_is_blend &= second_amp > 0;
  cu.dimd_is_blend &= second_mode > DC_IDX;
  cu.dimd_is_blend &= first_mode > DC_IDX;

  if( cu.dimd_is_blend )
  {
    cu.dimdBlendMode[0] = second_mode; // 存储第二高的模式
  }

  const int blend_sum_weight = 6;
  int sum_weight = 1 << blend_sum_weight;
  // 计算三种模式的权重
  if (cu.dimd_is_blend)
  {
    double dRatio = 0.0;
    sum_weight -= static_cast<int>((double)sum_weight / 3); // ~ 1/3 of the weight to be reserved for planar
    dRatio = (double)first_amp / (double)(first_amp + second_amp);
    int iRatio = static_cast<int>(dRatio * sum_weight);
    cu.dimdRelWeight[0] = iRatio;
    cu.dimdRelWeight[2] = sum_weight - iRatio;
    cu.dimdRelWeight[1] = (1 << blend_sum_weight) - sum_weight;
  }
  else
  {
    cu.dimdRelWeight[0] = sum_weight;
    cu.dimdRelWeight[1] = 0;
    cu.dimdRelWeight[2] = 0;
  }

}

buildHistogram函数用来建立角度模式直方图。通过垂直梯度和水平梯度的atan，可以求出角度 $\theta$ ,垂直于 $\theta$ 的角度即为预测角度。需要注意的是预测角度和角度 $\theta$ 是垂直的。

为了方便求出预测角度，代码实现是通过水平梯度GHor和垂直梯度GVer的值和符号划分为如下4个不同区域，对于不同区域的角度，通过 atan $\theta$ ，求出偏移角度，最后水平/垂直角度相加/减偏移角度与即可得到最终的角度模式。

区域0：GHor和GVer符号相反且 $\left | G_{Hor} \right | < \left | G_{Ver} \right |$
区域1：GHor和GVer符号相同且 $\left | G_{Hor} \right | < \left | G_{Ver} \right |$
区域2：GHor和GVer符号相同且 $\left | G_{Hor} \right |\geqslant \left | G_{Ver} \right |$
区域3：GHor和GVer符号相反且 $\left | G_{Hor} \right |\geqslant \left | G_{Ver} \right |$

如果 $\left | G_{Hor} \right | < \left | G_{Ver} \right |$ 则 $tan\vartheta =\frac{\left | G_{Hor} \right |}{\left | G_{Ver} \right |}$

如果 $\left | G_{Hor} \right |\geqslant \left | G_{Ver} \right |$ 则 $tan \theta =\frac{\left | G_{Ver} \right |}{\left | G_{Hor} \right |}$

对于区域0，预测角度=水平角度+偏移角度，对于区域1，预测角度=水平角度-偏移角度

对于区域2，预测角度=垂直角度+偏移角度，对于区域3，预测角度=垂直角度-偏移角度

有点难以理解，以下图为例，Ghor和GVer的符号相同且 $\left | G_{Hor} \right | < \left | G_{Ver} \right |$ ，通过 $tan\vartheta =\frac{\left | G_{Hor} \right |}{\left | G_{Ver} \right |}$

可以求出偏移角度 $\theta$ ，偏移角度 $\theta$ 的垂直方向即为预测角度。为了方便计算，可以通过水平角度-偏移角度 $\theta$ 即求出最终的预测角度。

int buildHistogram(const Pel *pReco, int iStride, uint32_t uiHeight, uint32_t uiWidth, int* piHistogram, int direction, int bw, int bh)
{
  int w_step = 1, h_step = 1;
  // 16种角度偏移，从水平或者垂直模式进行偏移，得到对应的角度模式
  // 角度偏移[0, 1, 3, 5, 7, 10, 14, 17, 20, 23, 26, 29, 32, 35, 39, 42, 45]
  int angTable[17] = { 0, 2048, 4096, 6144, 8192, 12288, 16384, 20480, 24576, 28672, 32768, 36864, 40960, 47104, 53248, 59392, 65536 };
  // 四个区域各自的初始角度
  int offsets[4] = { HOR_IDX, HOR_IDX, VER_IDX, VER_IDX };
  // 根据不同区域对初始角度加上或者减去arctan(angle)
  int dirs[4] = { -1, 1, -1, 1 };
  int map_x_gr_y_1[2][2] = { { 1, 0 },{ 0, 1 } };
  int map_x_gr_y_0[2][2] = { { 2, 3 },{ 3, 2 } };

  for (uint32_t y = 0; y < uiHeight; y += h_step)
  {
    for (uint32_t x = 0; x < uiWidth; x += w_step)
    {
      if ((direction == 3) && x == (uiWidth - 1) && y == (uiHeight - 1)) // 对于左上角区域的块，仅对左上角三个像素应用Sober算子
        continue;

      const Pel *pRec = pReco + y * iStride + x;
      // iDy检测垂直边缘，即水平梯度GHor
      int iDy = pRec[-iStride - 1] + 2 * pRec[-1] + pRec[iStride - 1] - pRec[-iStride + 1] - 2 * pRec[+1] - pRec[iStride + 1];
      // iDx检测水平边缘，即垂直梯度GVer
      int iDx = pRec[iStride - 1] + 2 * pRec[iStride] + pRec[iStride + 1] - pRec[-iStride - 1] - 2 * pRec[-iStride] - pRec[-iStride + 1];

      if (iDy == 0 && iDx == 0)
        continue;

      int iAmp = (int)(abs(iDx) + abs(iDy));
      int iAng_uneven = -1; // 计算出的角度模式
      // for determining region
      if (iDx != 0 && iDy != 0) // pure angles are not concerned
      {
        // get the region 决定角度的区域
        int signx = iDx < 0 ? 1 : 0;
        int signy = iDy < 0 ? 1 : 0;
        int absx = iDx < 0 ? -iDx : iDx;
        int absy = iDy < 0 ? -iDy : iDy;
        int x_gr_y = absx > absy ? 1 : 0; // 垂直梯度的绝对值是否大于水平梯度的绝对值
        int region = x_gr_y ? map_x_gr_y_1[signy][signx] : map_x_gr_y_0[signy][signx];

        //region = (region == 1 ? 2 : (region == 2 ? 1 : (region == 3 ? 4 : 3)));
        // tan(angle)
        float fRatio = x_gr_y ? static_cast<float>(absy) / static_cast<float>(absx) : static_cast<float>(absx) / static_cast<float>(absy);
        float fRatio_scaled = fRatio * (1 << 16);
        int iRatio = static_cast<int>(fRatio_scaled); // 角度
        // get ang_idx
        int idx = -1;
        // 判断该角度与该17种角度哪种角度最接近
        for( int i = 0; i < 17; i++ )
        {
          if( iRatio < angTable[i] )
          {
            idx = iRatio - angTable[i - 1] < angTable[i] - iRatio ? i - 1 : i;
            break;
          }
        }
        // 最终的角度是水平或者垂直加上其偏移
        iAng_uneven = offsets[region] + dirs[region] * idx;
        //iAng_uneven = offsets[region - 1] + dirs[region - 1] * idx;
      }
      else // 垂直或者水平
      {
        // 水平梯度为0，则为垂直模式
        // 垂直梯度为0，则为水平模式
        iAng_uneven = iDx == 0 ? VER_IDX : HOR_IDX;
      }
      piHistogram[iAng_uneven] += iAmp; // 累加幅度
    }
  }
  return 0;
}

DIMD预测值的加权融合代码如下所示：

if (pu.cu->dimd && pu.cu->dimd_is_blend && isLuma(compID))
  {
    int width = piPred.width;
    int height = piPred.height;
    const UnitArea localUnitArea( pu.chromaFormat, Area( 0, 0, width, height ) );

    PelBuf planarBuffer = m_tempBuffer[0].getBuf( localUnitArea.Y() );
    PelBuf predAng = m_tempBuffer[1].getBuf( localUnitArea.Y() );

    xPredIntraPlanar( srcBuf, planarBuffer ); // 计算Planar模式的预测值

    const bool applyPdpc = m_ipaParam.applyPDPC;
#if JVET_V0087_DIMD_NO_ISP   // this is pure cleanup to make code easier to read. It generates identical resut to the else part
    PredictionUnit pu2 = pu;
    pu2.intraDir[0] = pu.cu->dimdBlendMode[0];
    initPredIntraParams(pu2, pu.Y(), *(pu.cs->sps)); 

#if JVET_W0123_TIMD_FUSION
    xPredIntraAng(srcBuf, predAng, channelType, clpRng, false);// 计算次高幅度值对应模式的预测值
#else
    xPredIntraAng(srcBuf, predAng, channelType, clpRng);
#endif
#else
    const bool   useISP = NOT_INTRA_SUBPARTITIONS != pu.cu->ispMode && isLuma( CHANNEL_TYPE_LUMA );//ok
    const Size   cuSize = Size( pu.cu->blocks[compId].width, pu.cu->blocks[compId].height ); //ok
    const Size   puSize = Size( piPred.width, piPred.height );
    const Size&  blockSize = useISP ? cuSize : puSize;
    int blendDir = pu.cu->dimdBlendMode[0];
    const int      dirMode = blendDir;
    const int     predMode = getModifiedWideAngle( blockSize.width, blockSize.height, dirMode ); // to check later
    m_ipaParam.isModeVer = predMode >= DIA_IDX;
    m_ipaParam.multiRefIndex = 0;
    m_ipaParam.refFilterFlag = false;
    m_ipaParam.interpolationFlag = false;
    m_ipaParam.applyPDPC = ( ( puSize.width >= MIN_TB_SIZEY && puSize.height >= MIN_TB_SIZEY ) || !isLuma( compId ) ) && m_ipaParam.multiRefIndex == 0;

    const int    intraPredAngleMode = ( m_ipaParam.isModeVer ) ? predMode - VER_IDX : -( predMode - HOR_IDX );//ok
    int absAng = 0;
    if( dirMode > DC_IDX && dirMode < NUM_LUMA_MODE ) // intraPredAngle for directional modes
    {
      static const int angTable[32] = { 0,    1,    2,    3,    4,    6,     8,   10,   12,   14,   16,   18,   20,   23,   26,   29,   32,   35,   39,  45,  51,  57,  64,  73,  86, 102, 128, 171, 256, 341, 512, 1024 };//ok
      static const int invAngTable[32] = { 0,   16384, 8192, 5461, 4096, 2731, 2048, 1638, 1365, 1170, 1024, 910, 819, 712, 630, 565, 512, 468,   420,  364,  321,  287,  256,  224,  191,  161,  128,  96,  64,  48,  32,  16 };   // (512 * 32) / Angle

      const int     absAngMode = abs( intraPredAngleMode );
      const int     signAng = intraPredAngleMode < 0 ? -1 : 1;
      absAng = angTable[absAngMode];

      m_ipaParam.absInvAngle = invAngTable[absAngMode];
      m_ipaParam.intraPredAngle = signAng * absAng;
      if( intraPredAngleMode < 0 )
      {
        m_ipaParam.applyPDPC = false;
      }
      else if( intraPredAngleMode > 0 )
      {
        const int sideSize = m_ipaParam.isModeVer ? puSize.height : puSize.width;
        const int maxScale = 2;
#if GRAD_PDPC
        m_ipaParam.useGradPDPC = false;
#endif

        m_ipaParam.angularScale = std::min( maxScale, floorLog2( sideSize ) - ( floorLog2( 3 * m_ipaParam.absInvAngle - 2 ) - 8 ) );
#if GRAD_PDPC
        if( ( m_ipaParam.angularScale < 0 ) && ( isLuma( compId ) ) )
        {
          m_ipaParam.angularScale = ( floorLog2( puSize.width ) + floorLog2( puSize.height ) - 2 ) >> 2;
          m_ipaParam.useGradPDPC = true;
        }
#endif
        m_ipaParam.applyPDPC &= m_ipaParam.angularScale >= 0;
      }
    }

    if( pu.cs->sps->getSpsRangeExtension().getIntraSmoothingDisabledFlag()
        || ( !isLuma( CHANNEL_TYPE_LUMA ) && pu.chromaFormat != CHROMA_444 )
        || useISP
        || m_ipaParam.multiRefIndex
        || DC_IDX == dirMode
        )
    {
      //do nothing
    }
    else if( !useISP )// HOR, VER and angular modes (MDIS)
    {
      bool filterFlag = false;
      const int diff = std::min<int>( abs( predMode - HOR_IDX ), abs( predMode - VER_IDX ) );
      const int log2Size = ( ( floorLog2( puSize.width ) + floorLog2( puSize.height ) ) >> 1 );
      CHECK( log2Size >= MAX_INTRA_FILTER_DEPTHS, "Size not supported" );
      filterFlag = ( diff > m_aucIntraFilter[log2Size] );


      if( filterFlag )
      {
        const bool isRefFilter = isIntegerSlope( absAng );
        CHECK( puSize.width * puSize.height <= 32, "DCT-IF interpolation filter is always used for 4x4, 4x8, and 8x4 luma CB" );
        m_ipaParam.refFilterFlag = isRefFilter;
        m_ipaParam.interpolationFlag = !isRefFilter;
      }
    }

#if JVET_W0123_TIMD_FUSION
    xPredIntraAng( srcBuf, predAng, channelType, clpRng, false );
#else
    xPredIntraAng( srcBuf, predAng, channelType, clpRng );
#endif
#endif
    m_ipaParam.applyPDPC = applyPdpc;

    // do blending 将三个模式的预测值加权融合
    const int log2WeightSum = 6;
    Pel *pelPred = piPred.buf;
    Pel *pelPlanar = planarBuffer.buf;
    Pel *pelPredAng = predAng.buf;
    int  w0 = pu.cu->dimdRelWeight[0], w1 = pu.cu->dimdRelWeight[1], w2 = pu.cu->dimdRelWeight[2];

    for( int y = 0; y < height; y++ )
    {
      for( int x = 0; x < width; x++ )
      {
        int blend = pelPred[x] * w0;
        blend += pelPlanar[x] * w1;
        blend += pelPredAng[x] * w2;
        pelPred[x] = (Pel)(blend >> log2WeightSum); // 加权融合
      }

      pelPred += piPred.stride;
      pelPlanar += planarBuffer.stride;
      pelPredAng += predAng.stride;
    }
  }

涵小呆

关注

4
点赞
踩
6

收藏

觉得还不错? 一键收藏
0
评论
ECM技术学习：解码端帧内模式推导(Decoder-side Intra Mode Derivation )

解码端帧内模式推导（DIMD）技术是之前在VVC标准化的过程中提出的技术，因为其在解码端引入的复杂度较高，因此没有被VVC采纳。为了探索下一代压缩标准，JVET最近设立了最新的ECM参考平台，将DIMD技术包含了进去。DIMD技术，称为解码端帧内模式推导，是通过使用求出当前块相邻像素的梯度，来推导当前块的角度模式，从而可以降低编码模式比特。在解码端，通过使用相同的推导方式推导角度模式，恢复出重建YUV。1. 帧内模式推导DIMD使用当前块相邻的Template区域的重建像素推导帧内角度模式，如下
复制链接

扫一扫

专栏目录