HM编码器代码阅读(18)——变换

最新推荐文章于 2022-02-24 15:41:07 发布

NB_vol_1

最新推荐文章于 2022-02-24 15:41:07 发布

阅读量7.4k

点赞数 2

分类专栏： HEVC编码器HM源码阅读文章标签： H.265 c++ hevc 编码

本文链接：https://blog.csdn.net/NB_vol_1/article/details/51190324

版权

HEVC编码器HM源码阅读专栏收录该内容

51 篇文章 155 订阅

订阅专栏

变换

原理以及公式

对于大部分图像来说，它们都有很多平坦区域和内容变换缓慢的区域，而且相邻像素点的相关性很强，通过变换，可以把这些相关性减少，同时把图像的能量在空间域的分散分布转换为在变换域的相对集中分布，这样就可以去除空间冗余了

HEVC中使用两种变换：DCT和DST。DST只处理帧内4x4模式的亮度块，其他的所有模式都使用DCT因此HEVC中，DST只有4x4的规格，而DCT有4x4、8x8、16x16、32x32等几种尺寸。

以二维的DCT为例子

可以看到二维的DCT变换可以转换成2个一维的DCT变换：

一维的形式：

但是矩阵A的元素是非整数，不利于计算，因此要做调整，把A矩阵的元素都乘上一个因子，把他们转换成整数，然后进行DCT变换，变换完成之后，在量化之前，把因子除去即可。

下面是DCT完整的形式：

其他更多细节请看 HEVC/H.265理论知识（5）——变换

蝶形变换

蝶形变换的思想就是提取矩阵的相关部分，定义中间变量，减少运算次数。具体可以参考 H.264整数DCT公式推导及蝶形算法分析

以4x4的DCT变换为例子：

分析结果矩阵的第一列

定义4个临时变量

那么第一列的结果可以转换成

通过定义临时变量，减少了重复的计算，以此加快计算速度，这就是蝶形变换

代码实现

入口函数

入口函数是encodeResAndCalcRdInterCU。

这个函数的作用是根据预测值，求出残差，然后进行TU的划分，然后进行变换、量化等操作以及RD代价的计算。在前面讲解预测的时候，这个函数已经分析过了，这里不再赘述。

变换量化主函数

// 变换和量化！！！！！
Void TComTrQuant::transformNxN( TComDataCU* pcCU, 
	Pel*        pcResidual, 
	UInt        uiStride, 
	TCoeff*     rpcCoeff, 
#if ADAPTIVE_QP_SELECTION
	Int*&       rpcArlCoeff, 
#endif
	UInt        uiWidth, 
	UInt        uiHeight, 
	UInt&       uiAbsSum, 
	TextType    eTType, 
	UInt        uiAbsPartIdx,
	Bool        useTransformSkip
	)
{
	// 判断是否跳过变换量化步骤，如果跳过的话，那么变换量化系数直接就是残差系数
	if (pcCU->getCUTransquantBypass(uiAbsPartIdx))
	{
		uiAbsSum=0;
		for (UInt k = 0; k<uiHeight; k++)
		{
			for (UInt j = 0; j<uiWidth; j++)
			{
				rpcCoeff[k*uiWidth+j]= pcResidual[k*uiStride+j];
				uiAbsSum += abs(pcResidual[k*uiStride+j]);
			}
		}
		return;
	}
	// 亮度块的帧内预测模式
	UInt uiMode;  //luma intra pred
	if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA )
	{
		uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx );
	}
	else
	{
		uiMode = REG_DCT;
	}

	uiAbsSum = 0;
	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
	Int bitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;

	// 使用变换跳过模式
	if(useTransformSkip)
	{
		// 变换
		xTransformSkip(bitDepth, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
	}
	else
	{
		// 变换
		xT(bitDepth, uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
	}
	// 量化函数
	xQuant( pcCU, m_plTempCoeff, rpcCoeff,
#if ADAPTIVE_QP_SELECTION
		rpcArlCoeff,
#endif
		uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx );
}

使用了变换跳过的模式的变换操作

/*
** 使用了变换跳过模式的操作，这个操作也相当于是变换，
** 但是没有执行具体的DCT变换，只是把残差系数经过位移操作
*/
Void TComTrQuant::xTransformSkip(Int bitDepth, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int width, Int height )
{
	assert( width == height );
	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
	Int  shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
	UInt transformSkipShift;
	Int  j,k;
	if(shift >= 0)
	{
		transformSkipShift = shift;
		for (j = 0; j < height; j++)
		{    
			for(k = 0; k < width; k ++)
			{
				psCoeff[j*height + k] = piBlkResi[j * uiStride + k] << transformSkipShift;      
			}
		}
	}
	else
	{
		//The case when uiBitDepth > 13
		Int offset;
		transformSkipShift = -shift;
		offset = (1 << (transformSkipShift - 1));
		for (j = 0; j < height; j++)
		{    
			for(k = 0; k < width; k ++)
			{
				psCoeff[j*height + k] = (piBlkResi[j * uiStride + k] + offset) >> transformSkipShift;      
			}
		}
	}
}

变换操作

/*
** 变换操作
*/
Void TComTrQuant::xT(Int bitDepth, UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight )
{
#if MATRIX_MULT  
	Int iSize = iWidth;
	// 常规的DCT
	xTr(bitDepth, piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode);
#else
	Int j;
	Short block[ 32 * 32 ];

	// 系数
	Short coeff[ 32 * 32 ];

	// 初始化块的值
	for (j = 0; j < iHeight; j++)
	{    
		memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( Short ) );
	}

	// 使用蝶形快速变换的DCT操作
	// 变换之后的系数存放在coeff中
	xTrMxN(bitDepth, block, coeff, iWidth, iHeight, uiMode );

	// 将系数转存到psCoeff中
	for ( j = 0; j < iHeight * iWidth; j++ )
	{    
		psCoeff[ j ] = coeff[ j ];
	}
#endif  
}

常规的DCT

/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
*  \param block pointer to input data (residual)
*  \param coeff pointer to output data (transform coefficients)
*  \param uiStride stride of input data
*  \param uiTrSize transform size (uiTrSize x uiTrSize)
*  \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
*/
// 常规的DCT变换
void xTr(Int bitDepth, Pel *block, Int *coeff, UInt uiStride, UInt uiTrSize, UInt uiMode)
{
	Int i,j,k,iSum;
	Int tmp[32*32];
	const Short *iT;
	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;

	// 选择变换矩阵
	if (uiTrSize==4)
	{
		iT  = g_aiT4[0];
	}
	else if (uiTrSize==8)
	{
		iT = g_aiT8[0];
	}
	else if (uiTrSize==16)
	{
		iT = g_aiT16[0];
	}
	else if (uiTrSize==32)
	{
		iT = g_aiT32[0];
	}
	else
	{
		assert(0);
	}

	Int shift_1st = uiLog2TrSize - 1 + bitDepth-8; // log2(N) - 1 + g_bitDepth-8
	Int add_1st = 1<<(shift_1st-1);
	Int shift_2nd = uiLog2TrSize + 6;
	Int add_2nd = 1<<(shift_2nd-1);

	/* Horizontal transform */
	// 水平方向的一维变换
	if (uiTrSize==4)
	{
		if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])
		{
			iT  =  g_as_DST_MAT_4[0];
		}
	}
	for (i=0; i<uiTrSize; i++)
	{
		for (j=0; j<uiTrSize; j++)
		{
			iSum = 0;
			for (k=0; k<uiTrSize; k++)
			{
				iSum += iT[i*uiTrSize+k]*block[j*uiStride+k];
			}
			tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
		}
	}

	/* Vertical transform */
	// 垂直方向的一维变换
	if (uiTrSize==4)
	{
		if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode])
		{
			iT  =  g_as_DST_MAT_4[0];
		}
		else
		{
			iT  = g_aiT4[0];
		}
	}
	for (i=0; i<uiTrSize; i++)
	{                 
		for (j=0; j<uiTrSize; j++)
		{
			iSum = 0;
			for (k=0; k<uiTrSize; k++)
			{
				iSum += iT[i*uiTrSize+k]*tmp[j*uiTrSize+k];        
			}
			coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd; 
		}
	}
}

使用蝶形变换实现的DCT

/*
** 变换操作（使用蝶形变换实现）
*/
void xTrMxN(Int bitDepth, Short *block,Short *coeff, Int iWidth, Int iHeight, UInt uiMode)
{
	Int shift_1st = g_aucConvertToBit[iWidth]  + 1 + bitDepth-8; // log2(iWidth) - 1 + g_bitDepth - 8
	Int shift_2nd = g_aucConvertToBit[iHeight]  + 8;                   // log2(iHeight) + 6

	Short tmp[ 64 * 64 ];

	// 对于4x4的块，比较特殊
	if( iWidth == 4 && iHeight == 4)
	{
		if (uiMode != REG_DCT)
		{
			// 快速变换
			fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
			fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
		}
		else
		{
			// 蝴蝶型变换
			partialButterfly4(block, tmp, shift_1st, iHeight);
			partialButterfly4(tmp, coeff, shift_2nd, iWidth);
		}

	}
	// 大小为8的时候
	else if( iWidth == 8 && iHeight == 8)
	{
		partialButterfly8( block, tmp, shift_1st, iHeight );
		partialButterfly8( tmp, coeff, shift_2nd, iWidth );
	}
	// 大小为16的时候
	else if( iWidth == 16 && iHeight == 16)
	{
		partialButterfly16( block, tmp, shift_1st, iHeight );
		partialButterfly16( tmp, coeff, shift_2nd, iWidth );
	}
	// 大小为32的时候
	else if( iWidth == 32 && iHeight == 32)
	{
		partialButterfly32( block, tmp, shift_1st, iHeight );
		partialButterfly32( tmp, coeff, shift_2nd, iWidth );
	}
}

蝶形变换一例

/* 4x4的蝶形变换 */
void partialButterfly4(short *src, short *dst,int shift, int src_stride)//(Short *src,Short *dst,Int shift, Int line)
{
  int j;
  int E[2],O[2];
  int add = 1<<(shift-1);

  for (j=0; j<4; j++)
  {    
    /* E and O */
	// 定义了四个临时变量，蝶形变换的核心就是使用空间代替时间
	// 这四个变量的目的就是减少运算的次数，只计算一次即可，下次使用的时候直接使用即可
    E[0] = src[0] + src[3];
    O[0] = src[0] - src[3];
    E[1] = src[1] + src[2];
    O[1] = src[1] - src[2];

    dst[0] = (g_aiT4[0][0]*E[0] + g_aiT4[0][1]*E[1] + add)>>shift;
    dst[2*4] = (g_aiT4[2][0]*E[0] + g_aiT4[2][1]*E[1] + add)>>shift;
    dst[4] = (g_aiT4[1][0]*O[0] + g_aiT4[1][1]*O[1] + add)>>shift;
    dst[3*4] = (g_aiT4[3][0]*O[0] + g_aiT4[3][1]*O[1] + add)>>shift;

    src += src_stride;
    dst++;
  }
}