最近一直在看vtm11的代码,研究方向是H.266/VVC的码率控制。关于代码以及代码引申出VVC一些概念的疑问在此记录一下,希望研究这方面的前辈多指教。
一、码率控制模块初始化(序列级)
//码率控制模块初始化
void RateCtrl::init(int totalFrames, int targetBitrate, int frameRate, int GOPSize, int picWidth, int picHeight, int LCUWidth, int LCUHeight, int bitDepth, int keepHierBits, bool useLCUSeparateModel, GOPEntry GOPList[MAX_GOP])
{
destroy();
bool isLowdelay = true;
for ( int i=0; i<GOPSize-1; i++ )
{//m_POC帧序列号
if ( GOPList[i].m_POC > GOPList[i+1].m_POC )
{//如果GOPList中前一个图像的播放序号比后一个图像的播放序号大,那他就不是低时延编码
isLowdelay = false;
break;
}
}
int numberOfLevel = 1;
int adaptiveBit = 0;
//keepHierBits:即是否采用分层编码,若采用,则各帧的比特分配是不一样的,否则,各帧的比特分配权重相同
if ( keepHierBits > 0 || ( !isLowdelay && (GOPSize == 32 || GOPSize == 16 || GOPSize == 8) ) )
{
numberOfLevel = int( log((double)GOPSize)/log(2.0) + 0.5 ) + 1;
}
numberOfLevel++; // intra picture
numberOfLevel++; // non-reference picture
int* bitsRatio;
bitsRatio = new int[ GOPSize ];//这个数组其实不是分配给gop中各帧的比特数,而是分配给各帧的比特数比例(w)
for ( int i=0; i<GOPSize; i++ )
{
bitsRatio[i] = 10;
if ( !GOPList[i].m_refPic )
{//m_refPic:参考图像 布尔值
bitsRatio[i] = 2;
}
}
if ( keepHierBits > 0 )
{
//bpp:每像素比特数
double bpp = (double)( targetBitrate / (double)( frameRate*picWidth*picHeight ) );
if ( GOPSize == 4 && isLowdelay )
{
if ( bpp > 0.2 )
{
bitsRatio[0] = 2;
bitsRatio[1] = 3;
bitsRatio[2] = 2;
bitsRatio[3] = 6;
}
else if( bpp > 0.1 )
{
bitsRatio[0] = 2;
bitsRatio[1] = 3;
bitsRatio[2] = 2;
bitsRatio[3] = 10;
}
else if ( bpp > 0.05 )
{
bitsRatio[0] = 2;
bitsRatio[1] = 3;
bitsRatio[2] = 2;
bitsRatio[3] = 12;
}
else
{
bitsRatio[0] = 2;
bitsRatio[1] = 3;
bitsRatio[2] = 2;
bitsRatio[3] = 14;
}
if ( keepHierBits == 2 )
{
adaptiveBit = 1;
}
}
else if (GOPSize == 8 && isLowdelay)
{
if (bpp > 0.2)
{
bitsRatio[0] = 2;
bitsRatio[1] = 3;
bitsRatio[2] = 2;
bitsRatio[3] = 3;
bitsRatio[4] = 2;
bitsRatio[5] = 3;
bitsRatio[6] = 2;
bitsRatio[7] = 6;
}
else if (bpp > 0.1)
{
bitsRatio[0] = 2;
bitsRatio[1] = 3;
bitsRatio[2] = 2;
bitsRatio[3] = 3;
bitsRatio[4] = 2;
bitsRatio[5] = 3;
bitsRatio[6] = 2;
bitsRatio[7] = 10;
}
else if (bpp > 0.05)
{
bitsRatio[0] = 2;
bitsRatio[1] = 3;
bitsRatio[2] = 2;
bitsRatio[3] = 3;
bitsRatio[4] = 2;
bitsRatio[5] = 3;
bitsRatio[6] = 2;
bitsRatio[7] = 12;
}
else
{
bitsRatio[0] = 2;
bitsRatio[1] = 3;
bitsRatio[2] = 2;
bitsRatio[3] = 3;
bitsRatio[4] = 2;
bitsRatio[5] = 3;
bitsRatio[6] = 2;
bitsRatio[7] = 14;
}
if (keepHierBits == 2)
{
adaptiveBit = 1;
}
}
else if ( GOPSize == 8 && !isLowdelay )
{
if ( bpp > 0.2 )
{
bitsRatio[0] = 15;
bitsRatio[1] = 5;
bitsRatio[2] = 4;
bitsRatio[3] = 1;
bitsRatio[4] = 1;
bitsRatio[5] = 4;
bitsRatio[6] = 1;
bitsRatio[7] = 1;
}
else if ( bpp > 0.1 )
{
bitsRatio[0] = 20;
bitsRatio[1] = 6;
bitsRatio[2] = 4;
bitsRatio[3] = 1;
bitsRatio[4] = 1;
bitsRatio[5] = 4;
bitsRatio[6] = 1;
bitsRatio[7] = 1;
}
else if ( bpp > 0.05 )
{
bitsRatio[0] = 25;
bitsRatio[1] = 7;
bitsRatio[2] = 4;
bitsRatio[3] = 1;
bitsRatio[4] = 1;
bitsRatio[5] = 4;
bitsRatio[6] = 1;
bitsRatio[7] = 1;
}
else
{
bitsRatio[0] = 30;
bitsRatio[1] = 8;
bitsRatio[2] = 4;
bitsRatio[3] = 1;
bitsRatio[4] = 1;
bitsRatio[5] = 4;
bitsRatio[6] = 1;
bitsRatio[7] = 1;
}
if ( keepHierBits == 2 )
{
adaptiveBit = 2;
}
}
else if (GOPSize == 16 && !isLowdelay)
{
if (bpp > 0.2)
{
bitsRatio[0] = 10;
bitsRatio[1] = 8;
bitsRatio[2] = 4;
bitsRatio[3] = 2;
bitsRatio[4] = 1;
bitsRatio[5] = 1;
bitsRatio[6] = 2;
bitsRatio[7] = 1;
bitsRatio[8] = 1;
bitsRatio[9] = 4;
bitsRatio[10] = 2;
bitsRatio[11] = 1;
bitsRatio[12] = 1;
bitsRatio[13] = 2;
bitsRatio[14] = 1;
bitsRatio[15] = 1;
}
else if (bpp > 0.1)
{
bitsRatio[0] = 15;
bitsRatio[1] = 9;
bitsRatio[2] = 4;
bitsRatio[3] = 2;
bitsRatio[4] = 1;
bitsRatio[5] = 1;
bitsRatio[6] = 2;
bitsRatio[7] = 1;
bitsRatio[8] = 1;
bitsRatio[9] = 4;
bitsRatio[10] = 2;
bitsRatio[11] = 1;
bitsRatio[12] = 1;
bitsRatio[13] = 2;
bitsRatio[14] = 1;
bitsRatio[15] = 1;
}
else if (bpp > 0.05)
{
bitsRatio[0] = 40;
bitsRatio[1] = 17;
bitsRatio[2] = 7;
bitsRatio[3] = 2;
bitsRatio[4] = 1;
bitsRatio[5] = 1;
bitsRatio[6] = 2;
bitsRatio[7] = 1;
bitsRatio[8] = 1;
bitsRatio[9] = 7;
bitsRatio[10] = 2;
bitsRatio[11] = 1;
bitsRatio[12] = 1;
bitsRatio[13] = 2;
bitsRatio[14] = 1;
bitsRatio[15] = 1;
}
else
{
bitsRatio[0] = 40;
bitsRatio[1] = 15;
bitsRatio[2] = 6;
bitsRatio[3] = 3;
bitsRatio[4] = 1;
bitsRatio[5] = 1;
bitsRatio[6] = 3;
bitsRatio[7] = 1;
bitsRatio[8] = 1;
bitsRatio[9] = 6;
bitsRatio[10] = 3;
bitsRatio[11] = 1;
bitsRatio[12] = 1;
bitsRatio[13] = 3;
bitsRatio[14] = 1;
bitsRatio[15] = 1;
}
if (keepHierBits == 2)
{
adaptiveBit = 2;
}
}
else if (GOPSize == 32 && !isLowdelay)
{
int bitsRatioInit[4][6]={
{16, 10, 8, 4, 2, 1},
{16, 10, 8, 4, 2, 1},
{16, 10, 8, 4, 2, 1},
{10, 8, 6, 4, 2, 1},
};
int cls;
if (bpp > 0.2)
{
cls = 0;
}
else if(bpp > 0.1)
{
cls = 1;
}
else if(bpp > 0.05)
{
cls = 2;
}
else
{
cls = 3;
}
int index[32] = {0, 1, 2, 3, 4, 5, 5, 4, 5, 5, 3, 4, 5, 5, 4, 5, 5, 2, 3, 4, 5, 5, 4, 5, 5, 3, 4, 5, 5, 4, 5, 5};
for (int i = 0; i < 32; i++)
{
bitsRatio[i] = bitsRatioInit[cls][index[i]];
}
if (keepHierBits == 2)
{
adaptiveBit = 2;
}
}
else
{
msg( WARNING, "\n hierarchical bit allocation is not support for the specified coding structure currently.\n" );
}
}
//GOPID2Level:gop图像序号到level的映射
int* GOPID2Level = new int[ GOPSize ];
for ( int i=0; i<GOPSize; i++ )
{
GOPID2Level[i] = 1;
if ( !GOPList[i].m_refPic )
{
GOPID2Level[i] = 2;
}
}
if ( keepHierBits > 0 )
{
if ( GOPSize == 4 && isLowdelay )
{
GOPID2Level[0] = 3;
GOPID2Level[1] = 2;
GOPID2Level[2] = 3;
GOPID2Level[3] = 1;
}
if (GOPSize == 8 && isLowdelay)
{
GOPID2Level[0] = 3;
GOPID2Level[1] = 2;
GOPID2Level[2] = 3;
GOPID2Level[3] = 2;
GOPID2Level[4] = 3;
GOPID2Level[5] = 2;
GOPID2Level[6] = 3;
GOPID2Level[7] = 1;
}
else if ( GOPSize == 8 && !isLowdelay )
{
GOPID2Level[0] = 1;
GOPID2Level[1] = 2;
GOPID2Level[2] = 3;
GOPID2Level[3] = 4;
GOPID2Level[4] = 4;
GOPID2Level[5] = 3;
GOPID2Level[6] = 4;
GOPID2Level[7] = 4;
}
else if (GOPSize == 16 && !isLowdelay)
{
GOPID2Level[0] = 1;
GOPID2Level[1] = 2;
GOPID2Level[2] = 3;
GOPID2Level[3] = 4;
GOPID2Level[4] = 5;
GOPID2Level[5] = 5;
GOPID2Level[6] = 4;
GOPID2Level[7] = 5;
GOPID2Level[8] = 5;
GOPID2Level[9] = 3;
GOPID2Level[10] = 4;
GOPID2Level[11] = 5;
GOPID2Level[12] = 5;
GOPID2Level[13] = 4;
GOPID2Level[14] = 5;
GOPID2Level[15] = 5;
}
}
if ( !isLowdelay && GOPSize == 8 )
{
GOPID2Level[0] = 1;
GOPID2Level[1] = 2;
GOPID2Level[2] = 3;
GOPID2Level[3] = 4;
GOPID2Level[4] = 4;
GOPID2Level[5] = 3;
GOPID2Level[6] = 4;
GOPID2Level[7] = 4;
}
else if (GOPSize == 16 && !isLowdelay)
{
GOPID2Level[0] = 1;
GOPID2Level[1] = 2;
GOPID2Level[2] = 3;
GOPID2Level[3] = 4;
GOPID2Level[4] = 5;
GOPID2Level[5] = 5;
GOPID2Level[6] = 4;
GOPID2Level[7] = 5;
GOPID2Level[8] = 5;
GOPID2Level[9] = 3;
GOPID2Level[10] = 4;
GOPID2Level[11] = 5;
GOPID2Level[12] = 5;
GOPID2Level[13] = 4;
GOPID2Level[14] = 5;
GOPID2Level[15] = 5;
}
else if(GOPSize == 32 && !isLowdelay)
{
GOPID2Level[0] = 1;
GOPID2Level[1] = 2;
GOPID2Level[2] = 3;
GOPID2Level[3] = 4;
GOPID2Level[4] = 5;
GOPID2Level[5] = 6;
GOPID2Level[6] = 6;
GOPID2Level[7] = 5;
GOPID2Level[8] = 6;
GOPID2Level[9] = 6;
GOPID2Level[10] = 4;
GOPID2Level[11] = 5;
GOPID2Level[12] = 6;
GOPID2Level[13] = 6;
GOPID2Level[14] = 5;
GOPID2Level[15] = 6;
GOPID2Level[16] = 6;
GOPID2Level[17] = 3;
GOPID2Level[18] = 4;
GOPID2Level[19] = 5;
GOPID2Level[20] = 6;
GOPID2Level[21] = 6;
GOPID2Level[22] = 5;
GOPID2Level[23] = 6;
GOPID2Level[24] = 6;
GOPID2Level[25] = 4;
GOPID2Level[26] = 5;
GOPID2Level[27] = 6;
GOPID2Level[28] = 6;
GOPID2Level[29] = 5;
GOPID2Level[30] = 6;
GOPID2Level[31] = 6;
}
m_encRCSeq = new EncRCSeq;//将计算后得到的各项数据传入码率控制的初始化函数(EncRCSeq.cpp)之中
m_encRCSeq->create( totalFrames, targetBitrate, frameRate, GOPSize, picWidth, picHeight, LCUWidth, LCUHeight, numberOfLevel, useLCUSeparateModel, adaptiveBit );
m_encRCSeq->initBitsRatio( bitsRatio );//设置每一帧的比特率比重
m_encRCSeq->initGOPID2Level( GOPID2Level );
m_encRCSeq->setBitDepth(bitDepth);
m_encRCSeq->initPicPara();// 主要是帧级别的alpha和beta参数的设置(JCTVC-K0103)
if ( useLCUSeparateModel )
{
m_encRCSeq->initLCUPara();// 主要是LCU级别的alpha和beta参数的设置
}
#if U0132_TARGET_BITS_SATURATION
m_CpbSaturationEnabled = false;
m_cpbSize = targetBitrate;
m_cpbState = (uint32_t)(m_cpbSize*0.5f);
m_bufferingRate = (int)(targetBitrate / frameRate);
#endif
delete[] bitsRatio;
delete[] GOPID2Level;
}
1.isLowdelay变量
该变量是判断是否采用低时延(LD)编码配置。VVC中有三种编码配置,即AI(all intra)、RA(random access)、LD(low delay)。关于三种模式,参考:https://blog.csdn.net/dongmie1999/article/details/116131129
2.numberOfLevel变量
目前没弄懂
3.bitsRatio数组
根据JCTVC-K0103提案,该数组存放的是GOP中各帧对应的比特数比例(也可以说是比特数权重)。
4.GOPID2Level数组
虽然知道它是存储GOP中图像序号到level的映射,但这个数组的意义也还不是特别清楚。
5.下面几行就是将得到的一些数据存入m_encRCSeq对象中,其中有个create()函数,如下:
void EncRCSeq::create( int totalFrames, int targetBitrate, int frameRate, int GOPSize, int picWidth, int picHeight, int LCUWidth, int LCUHeight, int numberOfLevel, bool useLCUSeparateModel, int adaptiveBit )
{
destroy();
m_totalFrames = totalFrames;//总帧数
m_targetRate = targetBitrate;//目标比特
m_frameRate = frameRate;//帧率(fps)
m_GOPSize = GOPSize;//gop大小
m_picWidth = picWidth;
m_picHeight = picHeight;
m_LCUWidth = LCUWidth;
m_LCUHeight = LCUHeight;
m_numberOfLevel = numberOfLevel;
m_useLCUSeparateModel = useLCUSeparateModel;
m_numberOfPixel = m_picWidth * m_picHeight;
//序列总码率 即输出码流大小
m_targetBits = (int64_t)m_totalFrames * (int64_t)m_targetRate / (int64_t)m_frameRate;
//每像素被分到的目标比特
m_seqTargetBpp = (double)m_targetRate / (double)m_frameRate / (double)m_numberOfPixel;
//m_alphaUpdate、m_betaUpdate这两个变量用于在接下来更新lamda的参数值
if ( m_seqTargetBpp < 0.03 )
{
m_alphaUpdate = 0.01;
m_betaUpdate = 0.005;
}
else if ( m_seqTargetBpp < 0.08 )
{
m_alphaUpdate = 0.05;
m_betaUpdate = 0.025;
}
else if ( m_seqTargetBpp < 0.2 )
{
m_alphaUpdate = 0.1;
m_betaUpdate = 0.05;
}
else if ( m_seqTargetBpp < 0.5 )
{
m_alphaUpdate = 0.2;
m_betaUpdate = 0.1;
}
else
{
m_alphaUpdate = 0.4;
m_betaUpdate = 0.2;
}
m_averageBits = (int)(m_targetBits / totalFrames);//平均每帧占用的目标比特数
int picWidthInBU = ( m_picWidth % m_LCUWidth ) == 0 ? m_picWidth / m_LCUWidth : m_picWidth / m_LCUWidth + 1;
int picHeightInBU = ( m_picHeight % m_LCUHeight ) == 0 ? m_picHeight / m_LCUHeight : m_picHeight / m_LCUHeight + 1;
//一帧包含的LCU数量
m_numberOfLCU = picWidthInBU * picHeightInBU;
m_bitsRatio = new int[m_GOPSize];
for ( int i=0; i<m_GOPSize; i++ )
{
m_bitsRatio[i] = 1;
}
m_GOPID2Level = new int[m_GOPSize];
for ( int i=0; i<m_GOPSize; i++ )
{
m_GOPID2Level[i] = 1;
}
m_picPara = new TRCParameter[m_numberOfLevel];
for ( int i=0; i<m_numberOfLevel; i++ )
{
m_picPara[i].m_alpha = 0.0;
m_picPara[i].m_beta = 0.0;
m_picPara[i].m_validPix = -1;
m_picPara[i].m_skipRatio = 0.0;
}
if ( m_useLCUSeparateModel )
{
m_LCUPara = new TRCParameter*[m_numberOfLevel];
for ( int i=0; i<m_numberOfLevel; i++ )
{
m_LCUPara[i] = new TRCParameter[m_numberOfLCU];
for ( int j=0; j<m_numberOfLCU; j++)
{
m_LCUPara[i][j].m_alpha = 0.0;
m_LCUPara[i][j].m_beta = 0.0;
m_LCUPara[i][j].m_validPix = -1;
m_LCUPara[i][j].m_skipRatio = 0.0;
}
}
}
m_framesLeft = m_totalFrames;
m_bitsLeft = m_targetBits;
m_adaptiveBit = adaptiveBit;
m_lastLambda = 0.0;
}
涉及几个变量,在之后的帧级初始化和LCU级初始化可能会用到:
1)m_targetBits:序列的目标总比特;这个计算公式我认为应该这么写比较容易理解:
m_targetBits = [(int64_t)m_totalFrames / (int64_t)m_frameRate ]* (int64_t)m_targetRate ;即总帧数先除以帧率,求得传输所有帧需要的总秒数,再乘以目标码率(单位是bps),即得序列的总目标比特数。
2)m_seqTargetBpp:每帧的每像素被分到的目标比特数;序列的目标比特率除以帧率,求得每帧的比特数,再除以像素总数,即得每像素目标比特。
3)m_averageBits:每帧的目标比特数。