Larger Coding Tree Block & Larger Transform Unit

最新推荐文章于 2020-06-10 09:00:00 发布

jtfan211

最新推荐文章于 2020-06-10 09:00:00 发布

阅读量357

点赞数

本文链接：https://blog.csdn.net/jtfan/article/details/76446545

版权

视频编码专栏收录该内容

11 篇文章 0 订阅

订阅专栏

开启新技术和新优化工具学习之路。
首先，看下最简单的大块优化策略。

在HEVC，CTU最大可支持64x64的CTU以及32x32的TU
而在JVET中，CTU被扩展到256x256，TU被扩展到64x64，其中，CU、PU的划分模式不变。
针对64x64TU, 变换后，只保留左上角32x32的变换系数块，其余3个32x32块的变换系数均设为0*（参见函数fastForwardDCT2_B64()最后几行代码）*. 且其变换矩阵为沿用DCT-II型，计算方式如下：
这里写图片描述

其中，S=256 * （根号64）

具体地，参考JEM1.0中代码。

large CTU 参考条件宏 COM16_C806_LARGE_CTU

若视频大小不满足CTU的大小，则适当调整CTU的大小


if( m_uiMaxCUWidth * 2 > m_iSourceWidth && m_uiMaxCUHeight * 2 > m_iSourceHeight )
{
    while( m_uiMaxCUWidth * 2 > m_iSourceWidth && m_uiMaxCUHeight * 2 > m_iSourceHeight )
    {
          m_uiMaxCUWidth >>= 1;
          m_uiMaxCUHeight >>= 1;
          m_uiMaxCUDepth--;
    }
    printf( "\nWarning: CTU size is reduced to (%dx%d) to better fit picture size (%dx%d)\n" , m_uiMaxCUWidth , m_uiMaxCUHeight , m_iSourceWidth , m_iSourceHeight );
}

调整最大CU深度，和最大CU大小


static const Int MAX_CU_DEPTH =                                     8; ///

//当cu大于1/2图像宽度时，则跳过merge2Nx2N模式
  if( m_pcEncCfg->getUseFastLCTU() && rpcTempCU->getHeight( 0 ) * 2 > rpcTempCU->getSlice()->getSPS()->getPicHeightInLumaSamples() )
  {
    rpcTempCU->getTotalCost() = MAX_DOUBLE / 4;
    rpcTempCU->getTotalDistortion() = MAX_INT;
    xCheckBestMode(rpcBestCU, rpcTempCU, rpcTempCU->getDepth( 0 ));
    return;
  }
  ```
  ```
  //当cu宽度大于64，不做非2Nx2N inter PU
    if( m_pcEncCfg->getUseFastLCTU() )
  {
        if( ePartSize != SIZE_2Nx2N && rpcTempCU->getWidth( 0 ) > 64 )
        {
          rpcTempCU->getTotalCost() = MAX_DOUBLE / 4;
          rpcTempCU->getTotalDistortion() = MAX_INT;
          xCheckBestMode(rpcBestCU, rpcTempCU, uhDepth);
          return;
        }
  }

//当CU的宽度大于64时，跳过帧内预测
  if( m_pcEncCfg->getUseFastLCTU() ) 
  {
    if( rpcTempCU->getWidth( 0 ) > 64 )
    {
      rpcTempCU->getTotalCost() = MAX_DOUBLE / 4;
      rpcTempCU->getTotalDistortion() = MAX_INT;
      xCheckBestMode(rpcBestCU, rpcTempCU, uiDepth);
      return;
    }
  }

//当cu depth小于相邻块最大depth，或者RoiWidth*RoiHeight大于等于64时，才做双向搜索，其中roiWidth和roiheight为PU的宽和长
    pcCU->getMaxMinCUDepth( uiMinCUDepth , uiMaxCUDepth , pcCU->getZorderIdxInCtu() );
    Bool bCheckBi = true;
    if( m_pcEncCfg->getUseFastLCTU() )
    {
      bCheckBi = pcCU->getDepth( 0 ) < uiMaxCUDepth || iRoiWidth * iRoiHeight >= 64;
    }

large TU 参考条件宏 COM16_C806_T64

扩大TUSize

static const Int MAX_TU_SIZE =                                     64;
static const Int MAX_LOG2_TU_SIZE_PLUS_ONE =                        7; ///< log2(MAX_TU_SIZE) + 1

初始化各变换矩阵的初始值，可以看到现在有五种不同的变换矩阵DCT2，DCT5，DCT8，DST1，DST7（具体还得在后面看看multiple core transform）

c = 4;
for ( i=0; i<5; i++ )
{
TMatrixCoeff *iT = NULL;
const Double s = sqrt((Double)c) * ( 64 << COM16_C806_TRANS_PREC );
const Double PI = 3.14159265358979323846;

switch(i)
{
case 0: iT = g_aiTr4 [0][0]; break;
case 1: iT = g_aiTr8 [0][0]; break;
case 2: iT = g_aiTr16[0][0]; break;
case 3: iT = g_aiTr32[0][0]; break;
case 4: iT = g_aiTr64[0][0]; break;
case 5: exit(0); break;
}

for( Int k=0; k<c; k++ )
{
  for( Int n=0; n<c; n++ )
  {
    Double w0, w1, v;

    // DCT-II
    w0 = k==0 ? sqrt(0.5) : 1;
    v = cos(PI*(n+0.5)*k/c ) * w0 * sqrt(2.0/c);
    iT[DCT2*c*c + k*c + n] = (Short) ( s * v + ( v > 0 ? 0.5 : -0.5) );

    // DCT-V
    w0 = ( k==0 ) ? sqrt(0.5) : 1.0;
    w1 = ( n==0 ) ? sqrt(0.5) : 1.0;
    v = cos(PI*n*k/(c-0.5)) * w0 * w1 * sqrt(2.0/(c-0.5));
    iT[DCT5*c*c + k*c + n] = (Short) ( s * v + ( v > 0 ? 0.5 : -0.5) );

    // DCT-VIII
    v = cos(PI*(k+0.5)*(n+0.5)/(c+0.5) ) * sqrt(2.0/(c+0.5));
    iT[DCT8*c*c + k*c + n] = (Short) ( s * v + ( v > 0 ? 0.5 : -0.5) );

    // DST-I
    v = sin(PI*(n+1)*(k+1)/(c+1)) * sqrt(2.0/(c+1));
    iT[DST1*c*c + k*c + n] = (Short) ( s * v + ( v > 0 ? 0.5 : -0.5) );

    // DST-VII
    v = sin(PI*(k+0.5)*(n+1)/(c+0.5)) * sqrt(2.0/(c+0.5));
    iT[DST7*c*c + k*c + n] = (Short) ( s * v + ( v > 0 ? 0.5 : -0.5) );
  }
}
c <<= 1;
}

对应地，修改quantization时scalinglist的大小，即增加64x64块
64x64的TU只用于dct-II
变换函数，正向变换fastForwardDCT2_B64，反向变换fastInverseDCT2_B64
为了保证位深精度，变换中需多移两位（64->256，变换矩阵系数扩大四倍）
```
if( iWidth==64 )
{
shift_1st += COM16_C806_TRANS_PREC;
shift_2nd += COM16_C806_TRANS_PREC;
}
```

小结：
- Larger CTU, 将CTB扩展到256x256，并对large CU增加了系列加速算法(fastLCTU)。
- Larger TU,将TU扩展到了64x64，且其变换矩阵主要使用DCT-II，且64x64变换后的变换系数块只取L-L 32x32，其余3个32x32块的系数均设为0

jtfan211

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Larger Coding Tree Block & Larger Transform Unit

开启新技术和新优化工具学习之路。首先，看下最简单的大块优化策略。在HEVC，CTU最大可支持64x64的CTU以及32x32的TU 而在JVET中，CTU被扩展到256x256，TU被扩展到64x64，其中，CU、PU的划分模式不变。针对64x64TU, 变换后，只保留左上角32x32的变换系数块，其余3个32x32块的变换系数均设为0*（参见函数fastForwardDCT2_B64()
复制链接

扫一扫