VTM14.0代码阅读（1） enc.main函数代码初读

青椒鸡汤

已于 2022-03-03 10:04:38 修改

阅读量1.4k

点赞数 6

分类专栏：视频编解码文章标签： C++ 编码器配置解析 SIMD优化帧处理

于 2022-02-09 17:47:48 首次发布

好

本文链接：https://blog.csdn.net/dfhg54/article/details/122840800

版权

视频编解码专栏收录该内容

41 篇文章 37 订阅

订阅专栏

#include <time.h>
#include <iostream>
#include <chrono>
#include <ctime>

#include "EncoderLib/EncLibCommon.h"
#include "EncApp.h"
#include "Utilities/program_options_lite.h"

static const uint32_t settingNameWidth = 66;//u：代表 unsigned 即无符号，即定义的变量不能为负数； //int：代表类型为 int 整形；32：代表四个字节，即为 int 类型；_t：代表用 typedef 定义的；作用是为一种数据类型定义一个新名字 //即用typedef定义一个无符号int整型为一个新类型 uint32_t（可多个定义）,settingNameWidth 是uint32_t定义的一个变量
static const uint32_t settingHelpWidth = 84;
static const uint32_t settingValueWidth = 3;//const代表这个变量的值为只读，static用于一个全局变量，是说明该全局变量只对定义在同一个文件中的函数可见。即使在另一个文件中使用了extern，另一个文件中的函数也不能访问这个全局变量。

int main(int argc, char* argv[]) //等于int main(int argc，char**argv) = int main(int argc，string argv) CSDN(C++)有详细解释c语言中main函数参数argc，argv说明,及命令行中如何传参数_大道至簡的博客-CSDN博客_c语言 main传参

C语言命令行参数详解_theLostLamb的博客-CSDN博客_c语言命令行

这里的 argc是你在设置EncoderApp为启动项后，属性里的那个命令行参数，比如这里设置为-c BasketballDrill.cfg -c encoder_intra_vtm.cfg，则第1~5个命令行参数分别为(1)E:\Professional software\VVCSoftware_VTM-VTM-14.2\bin\vs15\msvc-19.16\x86_64\debug

（2）-c (3)BasketballDrill.cfg (4)-c (5) encoder_intra_vtm.cfg

{

// print information //打印信息

fprintf( stdout, "\n" );

fprintf( stdout, "VVCSoftware: VTM Encoder Version %s ", VTM_VERSION );

fprintf( stdout, NVM_ONOS );

fprintf( stdout, NVM_COMPILEDBY );

fprintf( stdout, NVM_BITS ); //stdout, stdin, stderr的中文名字分别是标准输出，标准输入和标准错误。fprintf()函数根据指定的format(格式)发送信息(参数)到由stream(流)指定的文件.因此fprintf()可以使得信息输出到指定的文件

#if ENABLE_SIMD_OPT //if endif 后为0执行 endif 后的程序，为1执行其中的程序。此处 ENABLE_SIMD_OPT =SIMD_ENABLE = 1

std::string SIMD; //std::为标准的意思，前加using namespace std即可不用加

df::program_options_lite::Options opts;

opts.addOptions()

( "SIMD", SIMD, string( "" ), "" )

( "c", df::program_options_lite::parseConfigFile, "" );

df::program_options_lite::SilentReporter err;

df::program_options_lite::scanArgv( opts, argc, ( const char** ) argv, err );

fprintf( stdout, "[SIMD=%s] ", read_x86_extension( SIMD ) ); //这里识别使用的SIMD技术为AVX2，并输出

#endif

#if ENABLE_TRACING

fprintf( stdout, "[ENABLE_TRACING] " ); //默认禁用(仅在调试时启用，解码时需要15%的运行时间)

#endif

fprintf( stdout, "\n" );

std::fstream bitstream; //fstream是用using为basic_fstream这个类起的一个别名，bitstream是实例化的对象。规定了与C流相关联的输入/输出流

EncLibCommon encLibCommon; //enclib是一个类，定义了一些重要参数集如 SPS,APS,PPS

std::vector pcEncApp(1); //容器,还没搞懂，正在看

bool resized = false;

int layerIdx = 0;

initROM(); //初始化rom

TComHash::initBlockSizeToIndex(); //TComHash是一个结构体，里面定义了很多hash值规则函数，如generateBlock2x2HashValue(),还有isBlock2x2RowSameValue（）这种具体定义算法的函数。等后面用到这个函数时再结合讲

//initBlockSizeToIndex()为各种规格的块定义了一个值，编个号，方便快速索引

C/C++编程语言中char** a和char* a[]介绍_liitdar的博客-CSDN博客_char**

可能理解有误，之后来做修改

char** layerArgv = new char*[argc]; //在堆区开辟一块内存，存入命令行输入的每个参数的地址，然后用新建的指针 layerArgv来接收，这个指针（指向一块内存地址，该内存地址中存储的是 char* 类型的数据。指针的加减运算在这里的体现为： layerArgv + 1 表示地址加 8 字节（在 32 位系统中，地址加 4 字节）。

可能理解有误，之后来做修改

do
{
pcEncApp[layerIdx] = new EncApp( bitstream, &encLibCommon );//EncApp是一个编码应用类，继承了EncAppCfg类，这里用new指令开辟一块地址，将一些配置信息，流的信息存放并赋给pcEncApp[layerIdx]这个容器
// create application encoder class per layer为每个层创建应用程序编码器类，调用了create()函数
pcEncApp[layerIdx]->create();

这一段是解析每层的配置，在命令行参数为5的情况时，i的值从0~4，将5个命令行参数的地址分别赋给了layerArgv[j],如果命令函参数出错,则报错检查

// parse configuration per layer
    try
    {
      int j = 0;
      for( int i = 0; i < argc; i++ )
      {
        if( argv[i][0] == '-' && argv[i][1] == 'l' ) //命令行参数出错的情况，检查
        {
          if (argc <= i + 1)
          {
            THROW("Command line parsing error: missing parameter after -lx\n");
          }
          int numParams = 1; // count how many parameters are consumed
          // check for long parameters, which start with "--"
          const std::string param = argv[i + 1];
          if (param.rfind("--", 0) != 0)
          {
            // only short parameters have a second parameter for the value
            if (argc <= i + 2)
            {
              THROW("Command line parsing error: missing parameter after -lx\n");
            }
            numParams++;
          }
          // check if correct layer index检查层索引
          if( argv[i][2] == std::to_string( layerIdx ).c_str()[0] )
          {
            layerArgv[j] = argv[i + 1];
            if (numParams > 1)
            {
              layerArgv[j + 1] = argv[i + 2];
            }
            j+= numParams;
          }
          i += numParams;
        }

命令行参数正确，则

else
{
layerArgv[j] = argv[i]; //前面定义的layerArgv是开辟的一块内存来存储命令行参数argc地址的，比如这里存了5个地址，这里就是给这个地址作个排序，按顺序赋给 layerArgv
j++;
}
}

这里如果都是正确的就跳到

if( !pcEncApp[layerIdx]->parseCfg( j, layerArgv ) ) //parseCfg配置分析，如果配置错误就销毁。parseCfg是一个bool类型的函数
{
pcEncApp[layerIdx]->destroy();
return 1;
}

配置分析完以后跳到(这个函数里出现了m_orgPic，m_trueOrgPic，可能是数据的填充，之后补充）

pcEncApp[layerIdx]->createLib( layerIdx ); //createLib()是EncApp类里的一个函数，简历一个库。将各种技术的使用状况打印出来

略（学了容器之后再看）

if( !resized )
{
pcEncApp.resize( pcEncApp[layerIdx]->getMaxLayers() );
resized = true;
}

layerIdx++;
} while( layerIdx < pcEncApp.size() );

delete[] layerArgv; //消除

正常情况layerIdx都等于1，跳过

if (layerIdx > 1)
{
VPS* vps = pcEncApp[0]->getVPS();
//check chroma format and bit-depth for dependent layers
for (uint32_t i = 0; i < layerIdx; i++)
{
int curLayerChromaFormatIdc = pcEncApp[i]->getChromaFormatIDC();
int curLayerBitDepth = pcEncApp[i]->getBitDepth();
for (uint32_t j = 0; j < layerIdx; j++)
{
if (vps->getDirectRefLayerFlag(i, j))
{
int refLayerChromaFormatIdcInVPS = pcEncApp[j]->getChromaFormatIDC();
CHECK(curLayerChromaFormatIdc != refLayerChromaFormatIdcInVPS, "The chroma formats of the current layer and the reference layer are different");
int refLayerBitDepthInVPS = pcEncApp[j]->getBitDepth();
CHECK(curLayerBitDepth != refLayerBitDepthInVPS, "The bit-depth of the current layer and the reference layer are different");
}
}
}
}

if PRINT_MACRO_VALUES //默认为1,启用后，编码器在启动时打印出非环境变量控制的宏及其值的列表
printMacroSettings(); //这个函数是用来显示非环境变量宏的设置情况.在函数中满足VERBOSE值大于DETAILS值时，打印出宏的设置情况。比如RExt__DECODER_DEBUG_BIT_STATISTICS 这个参数设置为1时，解码器产生比特使用统计数据(将影响解码器运行时间高达10%)
#endif

// starting time //显示出开始解码的时间
auto startTime = std::chrono::steady_clock::now();
std::time_t startTime2 = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
fprintf(stdout, " started @ %s", std::ctime(&startTime2) );
clock_t startClock = clock();

参考VTM10.0代码学习9：from_encmain_to_EncLib_encode()_hjhyxq2014的博客-CSDN博客

GOP：I帧、P帧、B帧、GOP、IDR 和PTS, DTS之间的关系 - stardsd - 博客园

视频压缩编码 gop（Group of Pictures）（I帧间隔）的概念、IDR、I帧（关键帧，intra picture）、P帧、B帧、帧内压缩、帧间压缩、pts（显示时间）、dts（解码时间）_Dontla的博客-CSDN博客_gop

编码器将多张图像进行编码后生产成一段一段的 GOP ( Group of Pictures ) ，解码器在播放时则是读取一段一段的 GOP 进行解码后读取画面再渲染显示。GOP ( Group of Pictures) 是一组连续的画面，由一张 I 帧和数张 B / P 帧组成，是视频图像编码器和解码器存取的基本单位，它的排列顺序将会一直重复到影像结束。GOP即Group of picture（图像组），指两个I帧之间的距离。一个序列的第一个图像叫做 IDR 图像（立即刷新图像），IDR 图像都是 I 帧图像。

eos:End Of Sequence,代表一个gop中的最后一帧。如果eos为true，则代表已经读取到输入视频的最后一帧。

// call encoding function per layer
bool eos = false; //eos:End Of Sequence

while( !eos )
{
// read GOP
bool keepLoop = true;
while( keepLoop )
{
for( auto & encApp : pcEncApp )
{
#ifndef _DEBUG
try
{
#endif
keepLoop = encApp->encodePrep( eos ); //编码前的预处理
#ifndef _DEBUG //
}
catch( Exception &e )
{
std::cerr << e.what() << std::endl;
return EXIT_FAILURE;
}
catch( const std::bad_alloc &e )
{
std::cout << "Memory allocation failed: " << e.what() << std::endl;
return EXIT_FAILURE;
}
#endif
}
}

附1第239行预编码函数的具体解析 keepLoop = encApp->encodePrep( eos );

C语言丨关键字enum用法详解，看这篇就够了 - 沐歌爱编程 - 博客园

CSC：Colour Space Conversion 色彩空间转换

注意：所有的全局变量，即定义在函数外的变量，默认值为0。

bool EncApp::encodePrep( bool& eos )
{
  // main encoder loop
  const InputColourSpaceConversion ipCSC = m_inputColourSpaceConvert;  //InputColourSpaceConversion是一个枚举数据类型,定义了几种色彩空间转换的方式，由不同的值代表。
//因为全局变量默认为0，所以ipCSC默认为IPCOLOURSPACE_UNCHANGED(0)
  const InputColourSpaceConversion snrCSC = ( !m_snrInternalColourSpace ) ? m_inputColourSpaceConvert : IPCOLOURSPACE_UNCHANGED;

此处这个ipCSC暂时不知道设置出来是为了什么

m_snrInternalColourSpace:如果为真，则在计算信噪比时不采用颜色空间变换，否则应用输入的逆

#if EXTENSION_360_VIDEO //不开就不管
if( m_ext360->isEnabled() )
{
m_ext360->read( m_cVideoIOYuvInputFile, *m_orgPic, *m_trueOrgPic, ipCSC );
}
else
{
m_cVideoIOYuvInputFile.read( *m_orgPic, *m_trueOrgPic, ipCSC, m_sourcePadding, m_InputChromaFormatIDC, m_bClipInputVideoToRec709Range );
}
#else
m_cVideoIOYuvInputFile.read( *m_orgPic, *m_trueOrgPic, ipCSC, m_sourcePadding, m_InputChromaFormatIDC, m_bClipInputVideoToRec709Range ); //这里m_cVideoIOYuvInputFile是VideoIOYuv 类实例化的一个对象，然后调用了read函数读取如亮度色度格式，像素尺寸的数据。各个变量的意思在read函数上有表明，或是直接查询变量的定义和声明，也有注释。比如m_sourcePadding在read函数中为aiPad[2]，代表着填充像素尺寸的一个数组，0代表长，1代表像素的宽
#endif

基于gop的时间过滤器，默认为关闭

if( m_gopBasedTemporalFilterEnabled )
{
m_temporalFilter.filter( m_orgPic, m_iFrameRcvd );
m_filteredOrgPic->copyFrom(*m_orgPic);
}

m_isField:启用场编码

m_iFrameRcvd:接收帧的数目

m_framesToBeEncoded:编码帧数

m_iFrameRcvd++;

eos = ( m_isField && ( m_iFrameRcvd == ( m_framesToBeEncoded >> 1 ) ) ) || ( !m_isField && ( m_iFrameRcvd == m_framesToBeEncoded ) ); //这里使eos等于true

猜测eof流状态出问题时重新设定参数，正确时不运行

if( m_cVideoIOYuvInputFile.isEof() )
{
m_flush = true;
eos = true;
m_iFrameRcvd--;
m_cEncLib.setFramesToBeEncoded( m_iFrameRcvd );
}

这里m_isTopFieldFirst这个参数可以通过查看全部调用，然后在EncCppCfg.cpp的第844行有详细解释。

/

bool keepDoing = false;

// call encoding function for one frame 调用一帧的编码函数
if( m_isField ) //如果启用了场编码，这里会多传一个m_isTopFieldFirst ，是否设置为奇数行优先的顶场扫描
{
keepDoing = m_cEncLib.encodePrep( eos, m_flush ? 0 : m_orgPic, m_flush ? 0 : m_trueOrgPic, m_flush ? 0 : m_filteredOrgPic, snrCSC, m_recBufList, m_numEncoded, m_isTopFieldFirst );
}
else
{
keepDoing = m_cEncLib.encodePrep( eos, m_flush ? 0 : m_orgPic, m_flush ? 0 : m_trueOrgPic, m_flush ? 0 : m_filteredOrgPic, snrCSC, m_recBufList, m_numEncoded );
}

//传输各个参数进编码器类下的预编码函数encodePrep()中，最后返回一个false值，赋给keepdoing。好像是先写入缓存。这个函数以后再细看

return keepDoing;
}