VTM11中关于码率控制的疑问笔记（一）

最新推荐文章于 2022-08-16 22:21:39 发布

道外二蹄脚

最新推荐文章于 2022-08-16 22:21:39 发布

阅读量986

点赞数 2

分类专栏： H.266/VVC 文章标签：音视频

本文链接：https://blog.csdn.net/weixin_36672636/article/details/118999670

版权

H.266/VVC 专栏收录该内容

1 篇文章 1 订阅

订阅专栏

本文详细解读了VVC（H.266的后续标准）中码率控制模块的初始化过程，涉及isLowdelay判断、GOPSize与level的关系、bitsRatio数组的使用，以及如何根据比特率目标调整编码配置。通过实例解析，帮助理解关键参数在编码效率和延迟之间的权衡。

摘要由CSDN通过智能技术生成

最近一直在看vtm11的代码，研究方向是H.266/VVC的码率控制。关于代码以及代码引申出VVC一些概念的疑问在此记录一下，希望研究这方面的前辈多指教。

一、码率控制模块初始化（序列级）

//码率控制模块初始化
void RateCtrl::init(int totalFrames, int targetBitrate, int frameRate, int GOPSize, int picWidth, int picHeight, int LCUWidth, int LCUHeight, int bitDepth, int keepHierBits, bool useLCUSeparateModel, GOPEntry  GOPList[MAX_GOP])
{
  destroy();

  bool isLowdelay = true;
  for ( int i=0; i<GOPSize-1; i++ )
  {//m_POC帧序列号
    if ( GOPList[i].m_POC > GOPList[i+1].m_POC )
    {//如果GOPList中前一个图像的播放序号比后一个图像的播放序号大，那他就不是低时延编码
      isLowdelay = false;
      break;
    }
  }

  int numberOfLevel = 1;
  int adaptiveBit = 0;
  //keepHierBits:即是否采用分层编码，若采用，则各帧的比特分配是不一样的，否则，各帧的比特分配权重相同
  if ( keepHierBits > 0 || ( !isLowdelay && (GOPSize == 32 || GOPSize == 16 || GOPSize == 8) ) )
  {
    numberOfLevel = int( log((double)GOPSize)/log(2.0) + 0.5 ) + 1;
  }
  numberOfLevel++;    // intra picture
  numberOfLevel++;    // non-reference picture


  int* bitsRatio;
  bitsRatio = new int[ GOPSize ];//这个数组其实不是分配给gop中各帧的比特数，而是分配给各帧的比特数比例(w)
  for ( int i=0; i<GOPSize; i++ )
  {
    bitsRatio[i] = 10;
    if ( !GOPList[i].m_refPic )
    {//m_refPic：参考图像 布尔值
      bitsRatio[i] = 2;
    }
  }

  if ( keepHierBits > 0 )
  {
   //bpp：每像素比特数
    double bpp = (double)( targetBitrate / (double)( frameRate*picWidth*picHeight ) );
    if ( GOPSize == 4 && isLowdelay )
    {
      if ( bpp > 0.2 )
      {
        bitsRatio[0] = 2;
        bitsRatio[1] = 3;
        bitsRatio[2] = 2;
        bitsRatio[3] = 6;
      }
      else if( bpp > 0.1 )
      {
        bitsRatio[0] = 2;
        bitsRatio[1] = 3;
        bitsRatio[2] = 2;
        bitsRatio[3] = 10;
      }
      else if ( bpp > 0.05 )
      {
        bitsRatio[0] = 2;
        bitsRatio[1] = 3;
        bitsRatio[2] = 2;
        bitsRatio[3] = 12;
      }
      else
      {
        bitsRatio[0] = 2;
        bitsRatio[1] = 3;
        bitsRatio[2] = 2;
        bitsRatio[3] = 14;
      }

      if ( keepHierBits == 2 )
      {
        adaptiveBit = 1;
      }
    }
    else if (GOPSize == 8 && isLowdelay)
    {
      if (bpp > 0.2)
      {
        bitsRatio[0] = 2;
        bitsRatio[1] = 3;
        bitsRatio[2] = 2;
        bitsRatio[3] = 3;
        bitsRatio[4] = 2;
        bitsRatio[5] = 3;
        bitsRatio[6] = 2;
        bitsRatio[7] = 6;
      }
      else if (bpp > 0.1)
      {
        bitsRatio[0] = 2;
        bitsRatio[1] = 3;
        bitsRatio[2] = 2;
        bitsRatio[3] = 3;
        bitsRatio[4] = 2;
        bitsRatio[5] = 3;
        bitsRatio[6] = 2;
        bitsRatio[7] = 10;
      }
      else if (bpp > 0.05)
      {
        bitsRatio[0] = 2;
        bitsRatio[1] = 3;
        bitsRatio[2] = 2;
        bitsRatio[3] = 3;
        bitsRatio[4] = 2;
        bitsRatio[5] = 3;
        bitsRatio[6] = 2;
        bitsRatio[7] = 12;
      }
      else
      {
        bitsRatio[0] = 2;
        bitsRatio[1] = 3;
        bitsRatio[2] = 2;
        bitsRatio[3] = 3;
        bitsRatio[4] = 2;
        bitsRatio[5] = 3;
        bitsRatio[6] = 2;
        bitsRatio[7] = 14;
      }

      if (keepHierBits == 2)
      {
        adaptiveBit = 1;
      }
    }
    else if ( GOPSize == 8 && !isLowdelay )
    {
      if ( bpp > 0.2 )
      {
        bitsRatio[0] = 15;
        bitsRatio[1] = 5;
        bitsRatio[2] = 4;
        bitsRatio[3] = 1;
        bitsRatio[4] = 1;
        bitsRatio[5] = 4;
        bitsRatio[6] = 1;
        bitsRatio[7] = 1;
      }
      else if ( bpp > 0.1 )
      {
        bitsRatio[0] = 20;
        bitsRatio[1] = 6;
        bitsRatio[2] = 4;
        bitsRatio[3] = 1;
        bitsRatio[4] = 1;
        bitsRatio[5] = 4;
        bitsRatio[6] = 1;
        bitsRatio[7] = 1;
      }
      else if ( bpp > 0.05 )
      {
        bitsRatio[0] = 25;
        bitsRatio[1] = 7;
        bitsRatio[2] = 4;
        bitsRatio[3] = 1;
        bitsRatio[4] = 1;
        bitsRatio[5] = 4;
        bitsRatio[6] = 1;
        bitsRatio[7] = 1;
      }
      else
      {
        bitsRatio[0] = 30;
        bitsRatio[1] = 8;
        bitsRatio[2] = 4;
        bitsRatio[3] = 1;
        bitsRatio[4] = 1;
        bitsRatio[5] = 4;
        bitsRatio[6] = 1;
        bitsRatio[7] = 1;
      }

      if ( keepHierBits == 2 )
      {
        adaptiveBit = 2;
      }
    }
    else if (GOPSize == 16 && !isLowdelay)
    {
      if (bpp > 0.2)
      {
        bitsRatio[0] = 10;
        bitsRatio[1] = 8;
        bitsRatio[2] = 4;
        bitsRatio[3] = 2;
        bitsRatio[4] = 1;
        bitsRatio[5] = 1;
        bitsRatio[6] = 2;
        bitsRatio[7] = 1;
        bitsRatio[8] = 1;
        bitsRatio[9] = 4;
        bitsRatio[10] = 2;
        bitsRatio[11] = 1;
        bitsRatio[12] = 1;
        bitsRatio[13] = 2;
        bitsRatio[14] = 1;
        bitsRatio[15] = 1;
      }
      else if (bpp > 0.1)
      {
        bitsRatio[0] = 15;
        bitsRatio[1] = 9;
        bitsRatio[2] = 4;
        bitsRatio[3] = 2;
        bitsRatio[4] = 1;
        bitsRatio[5] = 1;
        bitsRatio[6] = 2;
        bitsRatio[7] = 1;
        bitsRatio[8] = 1;
        bitsRatio[9] = 4;
        bitsRatio[10] = 2;
        bitsRatio[11] = 1;
        bitsRatio[12] = 1;
        bitsRatio[13] = 2;
        bitsRatio[14] = 1;
        bitsRatio[15] = 1;
      }
      else if (bpp > 0.05)
      {
        bitsRatio[0] = 40;
        bitsRatio[1] = 17;
        bitsRatio[2] = 7;
        bitsRatio[3] = 2;
        bitsRatio[4] = 1;
        bitsRatio[5] = 1;
        bitsRatio[6] = 2;
        bitsRatio[7] = 1;
        bitsRatio[8] = 1;
        bitsRatio[9] = 7;
        bitsRatio[10] = 2;
        bitsRatio[11] = 1;
        bitsRatio[12] = 1;
        bitsRatio[13] = 2;
        bitsRatio[14] = 1;
        bitsRatio[15] = 1;
      }
      else
      {
        bitsRatio[0] = 40;
        bitsRatio[1] = 15;
        bitsRatio[2] = 6;
        bitsRatio[3] = 3;
        bitsRatio[4] = 1;
        bitsRatio[5] = 1;
        bitsRatio[6] = 3;
        bitsRatio[7] = 1;
        bitsRatio[8] = 1;
        bitsRatio[9] = 6;
        bitsRatio[10] = 3;
        bitsRatio[11] = 1;
        bitsRatio[12] = 1;
        bitsRatio[13] = 3;
        bitsRatio[14] = 1;
        bitsRatio[15] = 1;
      }

      if (keepHierBits == 2)
      {
        adaptiveBit = 2;
      }
    }
    else if (GOPSize == 32 && !isLowdelay)
    {
      int bitsRatioInit[4][6]={
        {16, 10, 8, 4, 2, 1},
        {16, 10, 8, 4, 2, 1},
        {16, 10, 8, 4, 2, 1},
        {10, 8, 6, 4, 2, 1},
      };
      int cls;
      if (bpp > 0.2)
      {
        cls = 0;
      }
      else if(bpp > 0.1)
      {
        cls = 1;
      }
      else if(bpp > 0.05)
      {
        cls = 2;
      }
      else
      {
        cls = 3;
      }
      int index[32] = {0, 1, 2, 3, 4, 5, 5, 4, 5, 5, 3, 4, 5, 5, 4, 5, 5, 2, 3, 4, 5, 5, 4, 5, 5, 3, 4, 5, 5, 4, 5, 5};

      for (int i = 0; i < 32; i++)
      {
        bitsRatio[i] = bitsRatioInit[cls][index[i]];
      }
      if (keepHierBits == 2)
      {
        adaptiveBit = 2;
      }
    }
    else
    {
      msg( WARNING, "\n hierarchical bit allocation is not support for the specified coding structure currently.\n" );
    }
  }
  //GOPID2Level:gop图像序号到level的映射
  int* GOPID2Level = new int[ GOPSize ];
  for ( int i=0; i<GOPSize; i++ )
  {
    GOPID2Level[i] = 1;
    if ( !GOPList[i].m_refPic )
    {
      GOPID2Level[i] = 2;
    }
  }

  if ( keepHierBits > 0 )
  {
    if ( GOPSize == 4 && isLowdelay )
    {
      GOPID2Level[0] = 3;
      GOPID2Level[1] = 2;
      GOPID2Level[2] = 3;
      GOPID2Level[3] = 1;
    }
    if (GOPSize == 8 && isLowdelay)
    {
      GOPID2Level[0] = 3;
      GOPID2Level[1] = 2;
      GOPID2Level[2] = 3;
      GOPID2Level[3] = 2;
      GOPID2Level[4] = 3;
      GOPID2Level[5] = 2;
      GOPID2Level[6] = 3;
      GOPID2Level[7] = 1;
    }
    else if ( GOPSize == 8 && !isLowdelay )
    {
      GOPID2Level[0] = 1;
      GOPID2Level[1] = 2;
      GOPID2Level[2] = 3;
      GOPID2Level[3] = 4;
      GOPID2Level[4] = 4;
      GOPID2Level[5] = 3;
      GOPID2Level[6] = 4;
      GOPID2Level[7] = 4;
    }
    else if (GOPSize == 16 && !isLowdelay)
    {
      GOPID2Level[0] = 1;
      GOPID2Level[1] = 2;
      GOPID2Level[2] = 3;
      GOPID2Level[3] = 4;
      GOPID2Level[4] = 5;
      GOPID2Level[5] = 5;
      GOPID2Level[6] = 4;
      GOPID2Level[7] = 5;
      GOPID2Level[8] = 5;
      GOPID2Level[9] = 3;
      GOPID2Level[10] = 4;
      GOPID2Level[11] = 5;
      GOPID2Level[12] = 5;
      GOPID2Level[13] = 4;
      GOPID2Level[14] = 5;
      GOPID2Level[15] = 5;
    }
  }

  if ( !isLowdelay && GOPSize == 8 )
  {
    GOPID2Level[0] = 1;
    GOPID2Level[1] = 2;
    GOPID2Level[2] = 3;
    GOPID2Level[3] = 4;
    GOPID2Level[4] = 4;
    GOPID2Level[5] = 3;
    GOPID2Level[6] = 4;
    GOPID2Level[7] = 4;
  }
  else if (GOPSize == 16 && !isLowdelay)
  {
    GOPID2Level[0] = 1;
    GOPID2Level[1] = 2;
    GOPID2Level[2] = 3;
    GOPID2Level[3] = 4;
    GOPID2Level[4] = 5;
    GOPID2Level[5] = 5;
    GOPID2Level[6] = 4;
    GOPID2Level[7] = 5;
    GOPID2Level[8] = 5;
    GOPID2Level[9] = 3;
    GOPID2Level[10] = 4;
    GOPID2Level[11] = 5;
    GOPID2Level[12] = 5;
    GOPID2Level[13] = 4;
    GOPID2Level[14] = 5;
    GOPID2Level[15] = 5;
  }
  else if(GOPSize == 32 && !isLowdelay)
  {
    GOPID2Level[0]  = 1;
    GOPID2Level[1]  = 2;
    GOPID2Level[2]  = 3;
    GOPID2Level[3]  = 4;
    GOPID2Level[4]  = 5;
    GOPID2Level[5]  = 6;
    GOPID2Level[6]  = 6;
    GOPID2Level[7]  = 5;
    GOPID2Level[8]  = 6;
    GOPID2Level[9]  = 6;
    GOPID2Level[10] = 4;
    GOPID2Level[11] = 5;
    GOPID2Level[12] = 6;
    GOPID2Level[13] = 6;
    GOPID2Level[14] = 5;
    GOPID2Level[15] = 6;
    GOPID2Level[16] = 6;
    GOPID2Level[17] = 3;
    GOPID2Level[18] = 4;
    GOPID2Level[19] = 5;
    GOPID2Level[20] = 6;
    GOPID2Level[21] = 6;
    GOPID2Level[22] = 5;
    GOPID2Level[23] = 6;
    GOPID2Level[24] = 6;
    GOPID2Level[25] = 4;
    GOPID2Level[26] = 5;
    GOPID2Level[27] = 6;
    GOPID2Level[28] = 6;
    GOPID2Level[29] = 5;
    GOPID2Level[30] = 6;
    GOPID2Level[31] = 6;
  }

  m_encRCSeq = new EncRCSeq;//将计算后得到的各项数据传入码率控制的初始化函数（EncRCSeq.cpp）之中
  m_encRCSeq->create( totalFrames, targetBitrate, frameRate, GOPSize, picWidth, picHeight, LCUWidth, LCUHeight, numberOfLevel, useLCUSeparateModel, adaptiveBit );
  m_encRCSeq->initBitsRatio( bitsRatio );//设置每一帧的比特率比重
  m_encRCSeq->initGOPID2Level( GOPID2Level );
  m_encRCSeq->setBitDepth(bitDepth);
  m_encRCSeq->initPicPara();// 主要是帧级别的alpha和beta参数的设置(JCTVC-K0103)
  if ( useLCUSeparateModel )
  {
    m_encRCSeq->initLCUPara();// 主要是LCU级别的alpha和beta参数的设置

  }
#if U0132_TARGET_BITS_SATURATION
  m_CpbSaturationEnabled = false;
  m_cpbSize              = targetBitrate;
  m_cpbState             = (uint32_t)(m_cpbSize*0.5f);
  m_bufferingRate        = (int)(targetBitrate / frameRate);
#endif

  delete[] bitsRatio;
  delete[] GOPID2Level;
}

1.isLowdelay变量

该变量是判断是否采用低时延（LD）编码配置。VVC中有三种编码配置，即AI（all intra）、RA（random access）、LD（low delay）。关于三种模式，参考：https://blog.csdn.net/dongmie1999/article/details/116131129

2.numberOfLevel变量

目前没弄懂

3.bitsRatio数组

根据JCTVC-K0103提案，该数组存放的是GOP中各帧对应的比特数比例（也可以说是比特数权重）。

4.GOPID2Level数组

虽然知道它是存储GOP中图像序号到level的映射，但这个数组的意义也还不是特别清楚。

5.下面几行就是将得到的一些数据存入m_encRCSeq对象中，其中有个create()函数,如下：

void EncRCSeq::create( int totalFrames, int targetBitrate, int frameRate, int GOPSize, int picWidth, int picHeight, int LCUWidth, int LCUHeight, int numberOfLevel, bool useLCUSeparateModel, int adaptiveBit )
{
  destroy();
  m_totalFrames         = totalFrames;//总帧数
  m_targetRate          = targetBitrate;//目标比特
  m_frameRate           = frameRate;//帧率(fps)
  m_GOPSize             = GOPSize;//gop大小
  m_picWidth            = picWidth;
  m_picHeight           = picHeight;
  m_LCUWidth            = LCUWidth;
  m_LCUHeight           = LCUHeight;
  m_numberOfLevel       = numberOfLevel;
  m_useLCUSeparateModel = useLCUSeparateModel;

  m_numberOfPixel   = m_picWidth * m_picHeight;
  //序列总码率 即输出码流大小
  m_targetBits      = (int64_t)m_totalFrames * (int64_t)m_targetRate / (int64_t)m_frameRate;
  //每像素被分到的目标比特
  m_seqTargetBpp = (double)m_targetRate / (double)m_frameRate / (double)m_numberOfPixel;
  //m_alphaUpdate、m_betaUpdate这两个变量用于在接下来更新lamda的参数值
  if ( m_seqTargetBpp < 0.03 )
  {
    m_alphaUpdate = 0.01;
    m_betaUpdate  = 0.005;
  }
  else if ( m_seqTargetBpp < 0.08 )
  {
    m_alphaUpdate = 0.05;
    m_betaUpdate  = 0.025;
  }
  else if ( m_seqTargetBpp < 0.2 )
  {
    m_alphaUpdate = 0.1;
    m_betaUpdate  = 0.05;
  }
  else if ( m_seqTargetBpp < 0.5 )
  {
    m_alphaUpdate = 0.2;
    m_betaUpdate  = 0.1;
  }
  else
  {
    m_alphaUpdate = 0.4;
    m_betaUpdate  = 0.2;
  }

  m_averageBits     = (int)(m_targetBits / totalFrames);//平均每帧占用的目标比特数
  int picWidthInBU  = ( m_picWidth  % m_LCUWidth  ) == 0 ? m_picWidth  / m_LCUWidth  : m_picWidth  / m_LCUWidth  + 1;
  int picHeightInBU = ( m_picHeight % m_LCUHeight ) == 0 ? m_picHeight / m_LCUHeight : m_picHeight / m_LCUHeight + 1;
  //一帧包含的LCU数量
  m_numberOfLCU     = picWidthInBU * picHeightInBU;

  m_bitsRatio   = new int[m_GOPSize];
  for ( int i=0; i<m_GOPSize; i++ )
  {
    m_bitsRatio[i] = 1;
  }

  m_GOPID2Level = new int[m_GOPSize];
  for ( int i=0; i<m_GOPSize; i++ )
  {
    m_GOPID2Level[i] = 1;
  }

  m_picPara = new TRCParameter[m_numberOfLevel];
  for ( int i=0; i<m_numberOfLevel; i++ )
  {
    m_picPara[i].m_alpha = 0.0;
    m_picPara[i].m_beta  = 0.0;
    m_picPara[i].m_validPix = -1;
    m_picPara[i].m_skipRatio = 0.0;
  }

  if ( m_useLCUSeparateModel )
  {
    m_LCUPara = new TRCParameter*[m_numberOfLevel];
    for ( int i=0; i<m_numberOfLevel; i++ )
    {
      m_LCUPara[i] = new TRCParameter[m_numberOfLCU];
      for ( int j=0; j<m_numberOfLCU; j++)
      {
        m_LCUPara[i][j].m_alpha = 0.0;
        m_LCUPara[i][j].m_beta  = 0.0;
        m_LCUPara[i][j].m_validPix = -1;
        m_LCUPara[i][j].m_skipRatio = 0.0;
      }
    }
  }

  m_framesLeft = m_totalFrames;
  m_bitsLeft   = m_targetBits;
  m_adaptiveBit = adaptiveBit;
  m_lastLambda = 0.0;
}

涉及几个变量，在之后的帧级初始化和LCU级初始化可能会用到：

1）m_targetBits:序列的目标总比特；这个计算公式我认为应该这么写比较容易理解：

m_targetBits = [(int64_t)m_totalFrames / (int64_t)m_frameRate ]* (int64_t)m_targetRate ;即总帧数先除以帧率，求得传输所有帧需要的总秒数，再乘以目标码率(单位是bps)，即得序列的总目标比特数。

2）m_seqTargetBpp：每帧的每像素被分到的目标比特数；序列的目标比特率除以帧率，求得每帧的比特数，再除以像素总数，即得每像素目标比特。

3）m_averageBits：每帧的目标比特数。

道外二蹄脚

关注

2
点赞
踩
2

收藏

觉得还不错? 一键收藏
7
评论
VTM11中关于码率控制的疑问笔记（一）

最近一直在看vtm11的代码，研究方向是H.266/VVC的码率控制。关于代码以及代码引申出VVC一些概念的疑问在此记录一下，希望研究这方面的前辈多指教。一、码率控制模块初始化（序列级）//码率控制模块初始化void RateCtrl::init(int totalFrames, int targetBitrate, int frameRate, int GOPSize, int picWidth, int picHeight, int LCUWidth, int LCUHeight, int
复制链接

扫一扫

专栏目录