hevc移植/优化-day 4:内存使用优化(内存泄漏问题修复。)-CSDN博客

本文链接：https://blog.csdn.net/dotmonkey/article/details/7002841

在windows上测试的时候发现占用内存巨多，想经嵌入式平台上移植必需优化内存的使用。linux下的valgrind工具很适合做内存泄漏检测，不仅能分析内存泄漏，还能检测到内存未初始化/内存越界等问题。

检测方法：

valgrind -v --leak-check=full --show-reachable=yes ./TAppDecoderStaticd -b RaceHorses_416x240_30.265 -d 0 2>log.txt

在输出的log.txt中即可看出各种问题的说明以及出现该问题时的调用堆栈，方便查看并修正代码。

分析后发现，检测到的问题大部分由于重复解码引起，见TAppDecTop.cpp注释：

    /* location serves to work around a design fault in the decoder, whereby
     * the process of reading a new slice that is the first slice of a new frame
     * requires the TDecTop::decode() method to be called again with the same
     * nal unit. */

修正补丁：

diff --git a/source/App/TAppDecoder/TAppDecCfg.h b/source/App/TAppDecoder/TAppDecCfg.h
index e702c5c..f792d09 100644
--- a/source/App/TAppDecoder/TAppDecCfg.h
+++ b/source/App/TAppDecoder/TAppDecCfg.h
@@ -64,7 +64,10 @@ protected:
   
 public:
   TAppDecCfg()          {}
-  virtual ~TAppDecCfg() {}
+  virtual ~TAppDecCfg() {
+  	if(m_pchBitstreamFile) free(m_pchBitstreamFile);
+  	if(m_pchReconFile) free(m_pchReconFile);
+  }
   
   Bool  parseCfg        ( Int argc, Char* argv[] );   ///< initialize option class from configuration
 };
diff --git a/source/Lib/TLibCommon/TComPicSym.cpp b/source/Lib/TLibCommon/TComPicSym.cpp
index 646b8d7..54c897f 100644
--- a/source/Lib/TLibCommon/TComPicSym.cpp
+++ b/source/Lib/TLibCommon/TComPicSym.cpp
@@ -47,7 +47,7 @@
 Void TComPicSym::create  ( Int iPicWidth, Int iPicHeight, UInt uiMaxWidth, UInt uiMaxHeight, UInt uiMaxDepth )
 {
   UInt i;
-
+	m_apcTComTile = NULL;
   m_uhTotalDepth      = uiMaxDepth;
   m_uiNumPartitions   = 1<<(m_uhTotalDepth<<1);
   
@@ -180,6 +180,7 @@ UInt TComPicSym::getPicSCUAddr( UInt SCUEncOrder )
 
 Void TComPicSym::xCreateTComTileArray()
 {
+  if(m_apcTComTile) return;
   m_apcTComTile = new TComTile*[(m_iNumColumnsMinus1+1)*(m_iNumRowsMinus1+1)];
   for( UInt i=0; i<(m_iNumColumnsMinus1+1)*(m_iNumRowsMinus1+1); i++ )
   {
diff --git a/source/Lib/TLibCommon/TComPrediction.cpp b/source/Lib/TLibCommon/TComPrediction.cpp
index 0801988..2db4856 100644
--- a/source/Lib/TLibCommon/TComPrediction.cpp
+++ b/source/Lib/TLibCommon/TComPrediction.cpp
@@ -116,6 +116,8 @@ Void TComPrediction::initTempBuff()
     m_acYuvPred[1] .create( g_uiMaxCUWidth, g_uiMaxCUHeight );
 
     m_cYuvPredTemp.create( g_uiMaxCUWidth, g_uiMaxCUHeight );
+  }else{
+  	return;
   }
 
 #if LM_CHROMA                      
diff --git a/source/Lib/TLibCommon/TComRom.cpp b/source/Lib/TLibCommon/TComRom.cpp
index a83eda6..8ae78e4 100644
--- a/source/Lib/TLibCommon/TComRom.cpp
+++ b/source/Lib/TLibCommon/TComRom.cpp
@@ -115,6 +115,13 @@ Void destroyROM()
 #endif
 #endif //QC_MDCS
   }
+
+#if NSQT
+	for ( i = 0; i < 2; i++ )
+	{
+		delete[] g_auiNonSquareSigLastScan[ i ];
+	}
+#endif
 }
 
 // ====================================================================================================================
diff --git a/source/Lib/TLibDecoder/NALread.h b/source/Lib/TLibDecoder/NALread.h
index 9964113..ce0f7b3 100644
--- a/source/Lib/TLibDecoder/NALread.h
+++ b/source/Lib/TLibDecoder/NALread.h
@@ -47,6 +47,7 @@
 struct InputNALUnit : public NALUnit
 {
   TComInputBitstream* m_Bitstream;
+  virtual ~InputNALUnit(){delete m_Bitstream;}
 };
 
 void read(InputNALUnit& nalu, std::vector<uint8_t>& nalUnitBuf);
diff --git a/source/Lib/TLibDecoder/TDecGop.cpp b/source/Lib/TLibDecoder/TDecGop.cpp
index 5f0e43c..5167df4 100644
--- a/source/Lib/TLibDecoder/TDecGop.cpp
+++ b/source/Lib/TLibDecoder/TDecGop.cpp
@@ -391,7 +390,7 @@ Void TDecGop::decompressGop(TComInputBitstream* pcBitstream, TComPic*& rpcPic, B
 #endif
     
 #if OL_USE_WPP
-    if (iSymbolMode && pcSlice->getPPS()->getEntropyCodingSynchro())
+    if (iSymbolMode /* && pcSlice->getPPS()->getEntropyCodingSynchro()*/)
     {
       // deallocate all created substreams, including internal buffers.
       for (UInt ui = 0; ui < uiNumSubstreams; ui++)
diff --git a/source/Lib/TLibDecoder/TDecSlice.cpp b/source/Lib/TLibDecoder/TDecSlice.cpp
index b1c8a3d..6be8230 100644
--- a/source/Lib/TLibDecoder/TDecSlice.cpp
+++ b/source/Lib/TLibDecoder/TDecSlice.cpp
@@ -134,6 +134,8 @@ Void TDecSlice::decompressSlice(TComInputBitstream* pcBitstream, TComPic*& rpcPi
 
   if( iSymbolMode )
   {
+    delete[] m_pcBufferSbacDecoders;
+    delete[] m_pcBufferBinCABACs;
     m_pcBufferSbacDecoders = new TDecSbac    [uiTilesAcross];  
     m_pcBufferBinCABACs    = new TDecBinCABAC[uiTilesAcross];
     for (UInt ui = 0; ui < uiTilesAcross; ui++)
diff --git a/source/Lib/TLibDecoder/TDecTop.cpp b/source/Lib/TLibDecoder/TDecTop.cpp
index 2f35b29..e2eaf52 100644
--- a/source/Lib/TLibDecoder/TDecTop.cpp
+++ b/source/Lib/TLibDecoder/TDecTop.cpp
@@ -106,6 +106,7 @@ Void TDecTop::destroy()
         }
       }
     }
+    m_vAPS.clear();
   }
 #else
 #if E045_SLICE_COMMON_INFO_SHARING
@@ -442,6 +443,7 @@ Bool TDecTop::decode(InputNALUnit& nalu, Int& iSkipFrame, Int& iPOCLastDisplay)
         //  Get a new picture buffer
         xGetNewPicBuffer (m_apcSlicePilot, pcPic);
         
+        delete pcPic->getSEIs();
         /* transfer any SEI messages that have been received to the picture */
         pcPic->setSEIs(m_SEIs);
         m_SEIs = NULL;

修正后在windows平台下测试发现最大内存用量减少将近一半！

另外附上前文提示的异常支持去除主要代码(source/Lib/TLibDecoder/AnnexBread.cpp)：

bool byteStreamNALUnit(istream& is,vector<uint8_t>& unit,AnnexBStats& stats)
{
	//find start 00 00 01
	int zeros = 0;
	int ch = 0;
	for(;;){
		ch = is.get();
		if(is.eof() || ch) break;
		zeros++;
	}
	stats.m_numLeadingZero8BitsBytes = zeros;
	if(ch==1){
		stats.m_numStartCodePrefixBytes = 3;
		stats.m_numLeadingZero8BitsBytes -= 2;
		if(stats.m_numLeadingZero8BitsBytes>0){
			stats.m_numLeadingZero8BitsBytes--;
			stats.m_numZeroByteBytes = 1;
		}
	}else{
		return true;
	}
	//nal data | end
	uint32_t tmp = 0;//Conditional jump or move depends on uninitialised value,so set to 0
	is.read((char*)&tmp,3);
	if(is.eof()){
		int got = is.gcount();
		while(got){
			unit.push_back((tmp>>0) & 0xFF);
			tmp >>= 8;
			got--;
		}
		return true;
	}else{
		while(tmp!= 0 && tmp != 0x10000){
			unit.push_back((tmp>>0) & 0xFF);
			tmp>>=8;
			is.read(((char*)&tmp)+2,1);
			if(is.eof()){
				unit.push_back((tmp>>0) & 0xFF);
				unit.push_back((tmp>>8) & 0xFF);
				return true;
			}
		}
		if(tmp==0x10000){
			stats.m_numTrailingZero8BitsBytes = 0;
			is.seekg(-3,ios_base::cur);//for next nal unit
			return false;
		}else{
			stats.m_numTrailingZero8BitsBytes = 3;
			for(;;){
				ch = is.get();
				if(ch==1){
					stats.m_numTrailingZero8BitsBytes -= 3;
					if(stats.m_numTrailingZero8BitsBytes<0){
						stats.m_numTrailingZero8BitsBytes = 0;
					}
					is.seekg(-3,ios_base::cur);//for next nal unit
					return false;
				}else if(ch) {
					return true;
				}else{
					stats.m_numTrailingZero8BitsBytes++;
				}
			}
		}
	}
}