H.264视频中SPS/PPS缺失时实现补帧

最新推荐文章于 2023-10-06 13:56:46 发布

TopBand

最新推荐文章于 2023-10-06 13:56:46 发布

阅读量1.4k

点赞数 1

分类专栏：音视频 C 文章标签：音视频 h.264 ffmpeg

本文链接：https://blog.csdn.net/TopBand/article/details/127802871

版权

C 同时被 2 个专栏收录

4 篇文章 0 订阅

订阅专栏

音视频

1 篇文章 0 订阅

订阅专栏

问题说明

有些视频文件中, 关键帧的SPS/PPS缺失, 导致播放时解码失败.

比如某些mp4视频, 第一个关键帧有SPS/PPS, 其后所有关键帧都没有SPS/PPS, 播放该mp4文件本身是没问题的, 但是, 如果需要将该mp4文件转封装(不转码)到m3u8格式, 除了第一个ts片, 后续所有ts片的关键帧都没有SPS/PPS. 除非从头开始播放, 而且不拖动进度条. 否则, 从中途开始播放, 或拖动进度条, 将会黑屏无法播放.

为了m3u8文件可正常播放, 在转封装的过程中, 需要对关键帧补上SPS/PPS信息.
此文说明一种补SPS/PPS的方法, 以C语言实现.

如何使用

使用本文提供的函数, 可以很方便的实现补帧, 使用很简单, 如下:

   h264_codecpar_t  cp = { 0 };
   AVPacket pkt = { 0 };
   AVFormatContext * ic = NULL;
   ...
   
   while(0 == av_read_frame(ic, &pkt))
   {
   		if (pkt.stream_index == video_index )
   		{
  			// 是否关键帧
  			if (pkt.flags & AV_PKT_FLAG_KEY)
			{
				// 进行判断和补帧.
				h264_codecpar_update(&cp, &pkt, TRUE);
			}
			...
   		}
		...
   }	
   h264_codecpar_free(&cp);

其中关键函数就是 h264_codecpar_update, 以下对该函数的实现进行说明.

实现说明

h264_codecpar_update 函数的实现逻辑不复杂:

判断Frame是AnnexB还是AVCC格式;
根据不同格式进行解析, 获取其中的SPS/PPS信息;
如果成功取到SPS/PPS, 则判断和之前保存的SPS/PPS是否相同,相同的则直接返回, 否则保存新的SPS/PPS信息到临时内存中, 然后返回
如果无法获取到SPS/PPS, 则将之前保存的SPS/PPS信息复制到本Frame的开头位置, 然后返回

实现的代码:


// 定义结构体:

typedef struct h264_codecpar_t h264_codecpar_t;
struct h264_codecpar_t
{
	uint32_t profile_idc;
	uint32_t level_idc;
	uint32_t width;
	uint32_t height;
	int fps;
	int deinterlace;

	uint8_t* sps_ptr;
	uint8_t* pps_ptr;
	uint32_t sps_size;
	uint32_t pps_size;

	// 临时分配的, 用于保持extradata的内存区.
	char* extradata_buff;
	int extradata_size;
};

// 主要函数的实现
int  h264_codecpar_update(h264_codecpar_t* codecpar, AVPacket* pkt, int copy)
{
	// 如果codecpar有变化, 则返回TRUE, 否则返回FALSE.
	// copy 标识是否要补帧

	h264_codecpar_t  tmp = { 0 };
	uint8_t* data = pkt->data;
	int changed = FALSE;
	int ret = -1, avcc = FALSE;

	// 必须是I帧.
	assert(pkt->flags & AV_PKT_FLAG_KEY);

	if (data[0] == 0 && data[1] == 0 &&
		((data[2] == 0 && data[3] == 1) || (data[2] == 1)))
	{
		// AnnexB: start with 00 00 00 01 or 00 00 01
		ret = parseAnnexNalu(&tmp, data, pkt->size);
	}
	else
	{
		// AVCC, AVC1
		avcc = TRUE;
		ret = parseAvccNalu(&tmp, data, pkt->size);
	}

	if (0 == ret)
	{
		// 成功取到SPS/PPS, 则判断信息是否改变.
		if (tmp.width != codecpar->width ||
			tmp.height != codecpar->height ||
			tmp.profile_idc != codecpar->profile_idc ||
			tmp.level_idc != codecpar->level_idc)
		{
			// 如果原先不是0, 则表示有变化.
			if (codecpar->width > 0)
			{
				changed = TRUE;
			}
			if (copy)
			{
				// 保存SPS/PPS信息.
				h264_copy_codecpar(codecpar, &tmp, avcc);
			}
		}
	}
	else if (codecpar->extradata_buff && copy)
	{
		// 如果没有I帧, 则需要补帧.
		// 前提是原先有发现I帧.

		int size = pkt->size + codecpar->extradata_size;
		data = (uint8_t*)av_malloc((size_t)size);
		if (data)
		{
			// 复制内容: SPS/PPS信息
			memcpy(data, codecpar->extradata_buff, (size_t)codecpar->extradata_size);

			// 复制内容: 视频包数据.
			memcpy(data + codecpar->extradata_size, pkt->data, (size_t)pkt->size);

			// 释放原先的Buff
			av_buffer_unref(&pkt->buf);

			// 将临时分配的内存放入Buff, 此处不要释放data.
			av_packet_from_data(pkt, data, size);
		}
	}

	return changed;
}

其中 h264_copy_codecpar 函数的实现:


static void h264_copy_codecpar(h264_codecpar_t* codecpar, h264_codecpar_t* info, int avcc)
{
	// 将SPS/PPS信息保存到临时缓存中.

	size_t size = 0;
	char* data;

	// SPS数据长度:  4字节长度信息+SPS数据
	size += 4 + info->sps_size;
	if (info->pps_ptr)
	{
		// 如果有PPS, 则再加上PPS头和数据长度.
		size += 4 + info->pps_size;
	}

	// 分配内存, 保存SPS信息.
	data = malloc(size);
	assert(data);

	// 如果原先有SPS, 则释放.
	if (codecpar->extradata_buff) free(codecpar->extradata_buff);

	// 保存SPS数据.
	codecpar->extradata_buff = data;
	codecpar->extradata_size = (int)size;

	if (avcc)
	{
		// 如果是AVCC格式, 则前4字节是长度
		*(uint32_t*)data = htonl(info->sps_size);
	}
	else
	{
		// 如果是 AnnexB 格式, 则前4字节是StartCode, 即可: 0x00 0x00 0x00 0x01, 此处需要转换为网络字节顺序.
		*(uint32_t*)data = 0x01000000;
	}

	// 跳过前4字节(保存长度或StartCode)
	data += 4;
	// 复制SPS内存
	memcpy(data, info->sps_ptr, (size_t)info->sps_size);
	data += info->sps_size;

	if (info->pps_ptr)
	{
		// 如果还有PPS, 则复制PPS信息.
		if (avcc)
		{
			*(uint32_t*)data = htonl(info->pps_size);
		}
		else
		{
			*(uint32_t*)data = 0x01000000;
		}
		data += 4;
		memcpy(data, info->pps_ptr, (size_t)info->pps_size);
	}

	// 记录其他参数, 用于判断SPS/PPS是否改变.
	codecpar->width = info->width;
	codecpar->height = info->height;
	codecpar->profile_idc = info->profile_idc;
	codecpar->level_idc = info->level_idc;
	codecpar->fps = info->fps;
	codecpar->deinterlace = info->deinterlace;
}

以上除了SPS/PPS的解析外, 完成的判断和补帧逻辑.

SPS/PPS的解析不进行具体分析, 直接上代码:


enum
{
	FRAME_UNDEFINED = 0,
	FRAME_SLICE_NON_IDR = 1,
	FRAME_SLICE_DATA1 = 2,
	FRAME_SLICE_DATA2 = 3,
	FRAME_SLICE_DATA3 = 4,
	FRAME_IDR = 5,
	FRAME_SEI = 6,
	FRAME_SPS = 7,
	FRAME_PPS = 8,
	FRAME_AUD = 9,	// AccessUnitDelimiter
	FRAME_ENDSEQ = 10,	// EndOfSequence
	FRAME_ENDSTREAM = 11,
	FRAME_FILLERDATA = 12,
};

static uint32_t get_uint32(const uint8_t* p)
{
	return (uint32_t)(p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3]);
}


//static uint32_t get_uint16(const uint8_t* p)
//{
//	return (uint32_t)(p[0] << 8 | p[1]);
//}

static uint32_t Ue(uint8_t* pBuff, uint32_t nLen, uint32_t* nStartBit)
{
	//计算0bit的个数
	uint32_t nZeroNum = 0;

	while (*nStartBit < nLen * 8)
	{
		//&:按位与，%取余
		if (pBuff[*nStartBit / 8] & (0x80 >> (*nStartBit % 8)))
		{
			break;
		}

		nZeroNum++;
		(*nStartBit)++;
	}

	(*nStartBit)++;

	//计算结果
	uint32_t dwRet = 0;

	int i = 0;
	for (i = 0; i < nZeroNum; i++)
	{
		dwRet <<= 1;

		if (pBuff[(*nStartBit) / 8] & (0x80 >> ((*nStartBit) % 8)))
		{
			dwRet += 1;
		}

		(*nStartBit)++;
	}

	return (uint32_t)((uint32_t)(1 << nZeroNum) - 1 + dwRet);
}

static int Se(uint8_t* pBuff, uint32_t nLen, uint32_t* nStartBit)
{
	int nUeVal = (int)Ue(pBuff, nLen, nStartBit);

	//double k = nUeVal;

	//ceil函数：ceil函数的作用是求不小于给定实数的最小整数。ceil(2)=ceil(1.2)=cei(1.5)=2.00
	int nValue = (int)ceil((double)nUeVal / 2.f);


	if (nUeVal % 2 == 0)
		nValue = -nValue;

	return nValue;
}

// u Just returns the BitCount bits of buf and change it to decimal.
// e.g. BitCount = 4, buf = 01011100, then return 5(0101)
static uint32_t u(uint32_t nBitCount, uint8_t* buf, uint32_t* nStartBit)
{
	uint32_t dwRet = 0;

	int i = 0;
	for (i = 0; i < nBitCount; i++)
	{
		dwRet <<= 1;

		if (buf[*nStartBit / 8] & (0x80 >> (*nStartBit % 8)))
		{
			dwRet += 1;
		}

		(*nStartBit)++;
	}

	return dwRet;
}


// w h profile_idc level_idc
static int get_resolution(mdf_h264_codecpar_t* info, uint8_t* pspsData, uint32_t nspsDataLen)//, int* nWidth, int* nHeight, int* profile, int* level, int* nDeinterlace)
{
	//uint8_t ucLastNalType = pspsData[0];

	//Analyze SPS to find width and height
	uint32_t   nStartBit = 0;
	uint8_t* pBuf = pspsData;
	uint32_t nDataLeft = nspsDataLen;

	//int forbidden_zero_bit = 
	u(1, pBuf, &nStartBit);
	//int nal_ref_idc = 
	u(2, pBuf, &nStartBit);
	uint32_t nal_unit_type = u(5, pBuf, &nStartBit);

	//printf("get_resolution  forbidden_zero_bit=%d, nal_ref_idc=%d, nal_unit_type=%d ",forbidden_zero_bit, nal_ref_idc, nal_unit_type);

	if (nal_unit_type == FRAME_SPS)
	{
		uint32_t profile_idc = u(8, pBuf, &nStartBit);
		//int constraint_set0_flag = 
		u(1, pBuf, &nStartBit);//(buf[1] & 0x80)>>7;
		//int constraint_set1_flag = 
		u(1, pBuf, &nStartBit);//(buf[1] & 0x40)>>6;
		//int constraint_set2_flag = 
		u(1, pBuf, &nStartBit);//(buf[1] & 0x20)>>5;
		//int constraint_set3_flag = 
		u(1, pBuf, &nStartBit);//(buf[1] & 0x10)>>4;
		//int reserved_zero_4bits = 
		u(4, pBuf, &nStartBit);
		uint32_t level_idc = u(8, pBuf, &nStartBit);
		//int seq_parameter_set_id = 
		Ue(pBuf, nDataLeft, &nStartBit);

		info->profile_idc = profile_idc;
		info->level_idc = level_idc;

		uint32_t chroma_format_idc = 0;

		if (profile_idc == 100 ||  // High profile
			profile_idc == 110 ||  // High10 profile
			profile_idc == 122 ||  // High422 profile
			profile_idc == 244 ||  // High444 Predictive profile
			profile_idc == 44 ||  // Cavlc444 profile
			profile_idc == 83 ||  // Scalable Constrained High profile (SVC)
			profile_idc == 86 ||  // Scalable High Intra profile (SVC)
			profile_idc == 118 ||  // Stereo High profile (MVC)
			profile_idc == 128 ||  // Multiview High profile (MVC)
			profile_idc == 138 ||  // Multiview Depth High profile (MVCD)
			profile_idc == 144)    // old High444 profile
		{
			chroma_format_idc = Ue(pBuf, nDataLeft, &nStartBit);

			if (chroma_format_idc == 3)
			{
				//int residual_colour_transform_flag = 
				u(1, pBuf, &nStartBit);
			}

			//int bit_depth_luma_minus8 = 
			Ue(pBuf, nDataLeft, &nStartBit);

			//int bit_depth_chroma_minus8 = 
			Ue(pBuf, nDataLeft, &nStartBit);
			//int qpprime_y_zero_transform_bypass_flag = 
			u(1, pBuf, &nStartBit);

			uint32_t seq_scaling_matrix_present_flag = u(1, pBuf, &nStartBit);

			//uint32_t seq_scaling_list_present_flag[8];

			if (seq_scaling_matrix_present_flag)
			{
				int i = 0;
				for (i = 0; i < 8; i++)
				{
					//	seq_scaling_list_present_flag[i] = 
					u(1, pBuf, &nStartBit);
				}
			}
		}
		else
		{
			chroma_format_idc = 1;
		}

		//int log2_max_frame_num_minus4 = 
		Ue(pBuf, nDataLeft, &nStartBit);
		uint32_t pic_order_cnt_type = Ue(pBuf, nDataLeft, &nStartBit);

		if (pic_order_cnt_type == 0)
		{
			//int log2_max_pic_order_cnt_lsb_minus4 = 
			Ue(pBuf, nDataLeft, &nStartBit);
		}
		else if (pic_order_cnt_type == 1)
		{
			//int delta_pic_order_always_zero_flag = 
			u(1, pBuf, &nStartBit);
			//int offset_for_non_ref_pic = 
			Se(pBuf, nDataLeft, &nStartBit);
			//int offset_for_top_to_bottom_field = 
			Se(pBuf, nDataLeft, &nStartBit);
			uint32_t num_ref_frames_in_pic_order_cnt_cycle = Ue(pBuf, nDataLeft, &nStartBit);
			int* offset_for_ref_frame = (int*)malloc(num_ref_frames_in_pic_order_cnt_cycle * sizeof(int));
			int i = 0;
			for (i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++)
				offset_for_ref_frame[i] = Se(pBuf, nDataLeft, &nStartBit);

			free(offset_for_ref_frame);
		}

		//int num_ref_frames = 
		Ue(pBuf, nDataLeft, &nStartBit);
		//int gaps_in_frame_num_value_allowed_flag = 
		u(1, pBuf, &nStartBit);
		uint32_t pic_width_in_mbs_minus1 = Ue(pBuf, nDataLeft, &nStartBit);
		uint32_t pic_height_in_map_units_minus1 = Ue(pBuf, nDataLeft, &nStartBit);
		uint32_t frame_mbs_only_flag = u(1, pBuf, &nStartBit);

		info->deinterlace = 0;
		if (0 == frame_mbs_only_flag)
		{
			info->deinterlace = 1;
		}

		if (!frame_mbs_only_flag)
		{
			//int mb_adaptive_frame_field_flag = 
			u(1, pBuf, &nStartBit);
		}

		//int direct_8x8_inference_flag = 
		u(1, pBuf, &nStartBit);
		uint32_t frame_cropping_flag = u(1, pBuf, &nStartBit);

		uint32_t frame_crop_left_offset = 0;
		uint32_t frame_crop_right_offset = 0;
		uint32_t frame_crop_top_offset = 0;
		uint32_t frame_crop_bottom_offset = 0;

		if (frame_cropping_flag)
		{
			frame_crop_left_offset = Ue(pBuf, nDataLeft, &nStartBit);
			frame_crop_right_offset = Ue(pBuf, nDataLeft, &nStartBit);
			frame_crop_top_offset = Ue(pBuf, nDataLeft, &nStartBit);
			frame_crop_bottom_offset = Ue(pBuf, nDataLeft, &nStartBit);
		}

		info->width = (pic_width_in_mbs_minus1 + 1) * 16;
		//*nHeight = (pic_height_in_map_units_minus1 + 1) * 16;
		info->height = (2 - frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1) * 16;
		if (frame_cropping_flag)
		{
			uint32_t crop_unit_x;
			uint32_t crop_unit_y;
			if (0 == chroma_format_idc) // monochrome
			{
				crop_unit_x = 1;
				crop_unit_y = 2 - frame_mbs_only_flag;
			}
			else if (1 == chroma_format_idc) // 4:2:0
			{
				crop_unit_x = 2;
				crop_unit_y = 2 * (2 - frame_mbs_only_flag);
			}
			else if (2 == chroma_format_idc) // 4:2:2
			{
				crop_unit_x = 2;
				crop_unit_y = 2 - frame_mbs_only_flag;
			}
			else // 3 == sps.chroma_format_idc // 4:4:4
			{
				crop_unit_x = 1;
				crop_unit_y = 2 - frame_mbs_only_flag;
			}

			info->width -= crop_unit_x * (frame_crop_left_offset + frame_crop_right_offset);
			info->height -= crop_unit_y * (frame_crop_top_offset + frame_crop_bottom_offset);
		}
		//printf( "get_resolution:: Find SPS frame, Invalid nal unit type, nDataLen(%d), nal_unit_type(%d)", nspsDataLen, nal_unit_type);
		return 0;
	}

	return -1;
}

static int parseAvccNalu(mdf_h264_codecpar_t* info, uint8_t* buf, int size)
{
	//find sps pps

	int64_t index = 0;
	int ret = -1;
	while (index < size)
	{
		index += 4;
		if (index >= size)
		{
			// parse error, no nal len;
			break;
		}

		uint32_t len = get_uint32(&buf[index - 4]);
		if (index + len > size)
		{
			break;
		}

		int nal_type = buf[index] & 0x1f;
		if (nal_type == FRAME_SPS)
		{
			// get sps
			// parse sps
			ret = get_resolution(info, &buf[index], len);

			info->sps_ptr = buf + index;
			info->sps_size = len;
			if (info->pps_ptr) break;
		}
		else if (nal_type == FRAME_PPS)
		{
			// get pps
			info->pps_ptr = buf + index;
			info->pps_size = len;

			if (info->sps_ptr) break;
		}

		index += len;
	}

	return ret;
}

static int searchStartCode(uint8_t* data, int size, uint8_t** front, uint8_t** latter)
{
	uint8_t* p, * end;
	end = data + size;
	for (p = data; p < end; ++p)
	{
		if (p[0] == 0 &&
			p + 1 < end && p[1] == 0 &&
			p + 2 < end && p[2] == 0 &&
			p + 3 < end && p[3] == 1)
		{
			//find 
			*front = p;
			*latter = p + 4;
			return TRUE;

		}
		else if (p[0] == 0 &&
			p + 1 < end && p[1] == 0 &&
			p + 2 < end && p[2] == 1)
		{
			//find 
			*front = p;
			*latter = p + 3;
			return TRUE;
		}
	}
	return FALSE;
}

int parseAnnexNalu(mdf_h264_codecpar_t* info, uint8_t* buf, int size)
{
	uint8_t* p, * end;
	int len;
	uint8_t* last_end, * start = 0, * last_start;
	int ret = -1;

	len = size;
	p = buf;
	end = buf + size;
	last_start = p;

	while (searchStartCode(p, len, &last_end, &start))
	{

		if (last_end > last_start && last_end < end)
		{
			//nal unit type
			if ((*last_start & 0x1f) == FRAME_SPS)
			{
				//sps 
				ret = get_resolution(info, last_start, (uint32_t)(last_end - last_start));

				info->sps_ptr = last_start;
				info->sps_size = (uint32_t)(last_end - last_start);

				if (info->pps_ptr) break;
			}
			else if ((*last_start & 0x1f) == FRAME_PPS)
			{
				info->pps_ptr = last_start;
				info->pps_size = (uint32_t)(last_end - last_start);

				if (info->sps_ptr)break;
			}
		}

		p = start;
		len = (int)(end - p);
		last_start = start;
	}

	if ((*start & 0x1f) == FRAME_SPS)
	{
		ret = get_resolution(info, start, (uint32_t)(end - start));
		info->sps_ptr = start;
		info->sps_size = (uint32_t)(end - start);
	}
	else if ((*start & 0x1f) == FRAME_PPS)
	{
		//parserPps(start, end - start);
		info->pps_ptr = start;
		info->pps_size = (uint32_t)(end - start);
	}

	return ret;
}

AVCC格式说明

AVCC格式也叫AVC1格式，MPEG-4格式，字节对齐，因此也叫Byte-Stream Format。用于mp4/flv/mkv, VideoToolbox。

例如:

01 42 C0 28 ff e1 00 18 67 64 00 29 ac b4 02 80 2d d0 80 00 00 03 00 80 0f 42 40 07 8c 19 50 01 00 04 68 ef 3c b0 fd f8 f8 00 00 00 00

前4个字节：

0x01: version
　　0x42: avc profile （首个SPS的第1个字节）
　　0xc0: avc compatibility (首个SPS的第2个字节)
0x28: avc level （首个SPS的第3个字节，可以发现后面0x0989位置的3个字，和这3个是一样的）

第5个字节：
　　0xff：
　　　　6_bit: 默认111111 1100 0000
　　 2_bit: 编码数据长度所需字节数

第6个字节：
　　0xe1: [111 00001]
　　　　3_bit: 默认 111
　　　　5_bit: 接下来的sps或pps的个数:：这里为1
　　　　
第7 8个字节：
　　0x00 0x18: 表示接下来sps或者pps的长度为24

第9个字节：
　　0x67: [0110 0111] nalu_type为7，表示SPS，就是说从0988到099f这24个数据为sps

第33个字节：9 (sps_pos) + 24（sps_size）
　　0x01: 接下来的sps或pps的个数:：这里为1

第34 35字节：
　　0x00 0x04: 表示接下来sps或者pps的长度为4

第36个字节：
　　0x68: [0110 1000] nalu_type为8，表示PPS

TopBand

关注

1
点赞
踩
8

收藏

觉得还不错? 一键收藏
0
评论
H.264视频中SPS/PPS缺失时实现补帧

H.264的SPS/PPS补帧实现
复制链接

扫一扫

专栏目录