问题说明
有些视频文件中, 关键帧的SPS/PPS缺失, 导致播放时解码失败.
比如某些mp4视频, 第一个关键帧有SPS/PPS, 其后所有关键帧都没有SPS/PPS, 播放该mp4文件本身是没问题的, 但是, 如果需要将该mp4文件转封装(不转码)到m3u8格式, 除了第一个ts片, 后续所有ts片的关键帧都没有SPS/PPS. 除非从头开始播放, 而且不拖动进度条. 否则, 从中途开始播放, 或拖动进度条, 将会黑屏无法播放.
为了m3u8文件可正常播放, 在转封装的过程中, 需要对关键帧补上SPS/PPS信息.
此文说明一种补SPS/PPS的方法, 以C语言实现.
如何使用
使用本文提供的函数, 可以很方便的实现补帧, 使用很简单, 如下:
h264_codecpar_t cp = { 0 };
AVPacket pkt = { 0 };
AVFormatContext * ic = NULL;
...
while(0 == av_read_frame(ic, &pkt))
{
if (pkt.stream_index == video_index )
{
// 是否关键帧
if (pkt.flags & AV_PKT_FLAG_KEY)
{
// 进行判断和补帧.
h264_codecpar_update(&cp, &pkt, TRUE);
}
...
}
...
}
h264_codecpar_free(&cp);
其中关键函数就是 h264_codecpar_update, 以下对该函数的实现进行说明.
实现说明
h264_codecpar_update 函数的实现逻辑不复杂:
- 判断Frame是AnnexB还是AVCC格式;
- 根据不同格式进行解析, 获取其中的SPS/PPS信息;
- 如果成功取到SPS/PPS, 则判断和之前保存的SPS/PPS是否相同,相同的则直接返回, 否则保存新的SPS/PPS信息到临时内存中, 然后返回
- 如果无法获取到SPS/PPS, 则将之前保存的SPS/PPS信息复制到本Frame的开头位置, 然后返回
实现的代码:
// 定义结构体:
typedef struct h264_codecpar_t h264_codecpar_t;
struct h264_codecpar_t
{
uint32_t profile_idc;
uint32_t level_idc;
uint32_t width;
uint32_t height;
int fps;
int deinterlace;
uint8_t* sps_ptr;
uint8_t* pps_ptr;
uint32_t sps_size;
uint32_t pps_size;
// 临时分配的, 用于保持extradata的内存区.
char* extradata_buff;
int extradata_size;
};
// 主要函数的实现
int h264_codecpar_update(h264_codecpar_t* codecpar, AVPacket* pkt, int copy)
{
// 如果codecpar有变化, 则返回TRUE, 否则返回FALSE.
// copy 标识是否要补帧
h264_codecpar_t tmp = { 0 };
uint8_t* data = pkt->data;
int changed = FALSE;
int ret = -1, avcc = FALSE;
// 必须是I帧.
assert(pkt->flags & AV_PKT_FLAG_KEY);
if (data[0] == 0 && data[1] == 0 &&
((data[2] == 0 && data[3] == 1) || (data[2] == 1)))
{
// AnnexB: start with 00 00 00 01 or 00 00 01
ret = parseAnnexNalu(&tmp, data, pkt->size);
}
else
{
// AVCC, AVC1
avcc = TRUE;
ret = parseAvccNalu(&tmp, data, pkt->size);
}
if (0 == ret)
{
// 成功取到SPS/PPS, 则判断信息是否改变.
if (tmp.width != codecpar->width ||
tmp.height != codecpar->height ||
tmp.profile_idc != codecpar->profile_idc ||
tmp.level_idc != codecpar->level_idc)
{
// 如果原先不是0, 则表示有变化.
if (codecpar->width > 0)
{
changed = TRUE;
}
if (copy)
{
// 保存SPS/PPS信息.
h264_copy_codecpar(codecpar, &tmp, avcc);
}
}
}
else if (codecpar->extradata_buff && copy)
{
// 如果没有I帧, 则需要补帧.
// 前提是原先有发现I帧.
int size = pkt->size + codecpar->extradata_size;
data = (uint8_t*)av_malloc((size_t)size);
if (data)
{
// 复制内容: SPS/PPS信息
memcpy(data, codecpar->extradata_buff, (size_t)codecpar->extradata_size);
// 复制内容: 视频包数据.
memcpy(data + codecpar->extradata_size, pkt->data, (size_t)pkt->size);
// 释放原先的Buff
av_buffer_unref(&pkt->buf);
// 将临时分配的内存放入Buff, 此处不要释放data.
av_packet_from_data(pkt, data, size);
}
}
return changed;
}
其中 h264_copy_codecpar 函数的实现:
static void h264_copy_codecpar(h264_codecpar_t* codecpar, h264_codecpar_t* info, int avcc)
{
// 将SPS/PPS信息保存到临时缓存中.
size_t size = 0;
char* data;
// SPS数据长度: 4字节长度信息+SPS数据
size += 4 + info->sps_size;
if (info->pps_ptr)
{
// 如果有PPS, 则再加上PPS头和数据长度.
size += 4 + info->pps_size;
}
// 分配内存, 保存SPS信息.
data = malloc(size);
assert(data);
// 如果原先有SPS, 则释放.
if (codecpar->extradata_buff) free(codecpar->extradata_buff);
// 保存SPS数据.
codecpar->extradata_buff = data;
codecpar->extradata_size = (int)size;
if (avcc)
{
// 如果是AVCC格式, 则前4字节是长度
*(uint32_t*)data = htonl(info->sps_size);
}
else
{
// 如果是 AnnexB 格式, 则前4字节是StartCode, 即可: 0x00 0x00 0x00 0x01, 此处需要转换为网络字节顺序.
*(uint32_t*)data = 0x01000000;
}
// 跳过前4字节(保存长度或StartCode)
data += 4;
// 复制SPS内存
memcpy(data, info->sps_ptr, (size_t)info->sps_size);
data += info->sps_size;
if (info->pps_ptr)
{
// 如果还有PPS, 则复制PPS信息.
if (avcc)
{
*(uint32_t*)data = htonl(info->pps_size);
}
else
{
*(uint32_t*)data = 0x01000000;
}
data += 4;
memcpy(data, info->pps_ptr, (size_t)info->pps_size);
}
// 记录其他参数, 用于判断SPS/PPS是否改变.
codecpar->width = info->width;
codecpar->height = info->height;
codecpar->profile_idc = info->profile_idc;
codecpar->level_idc = info->level_idc;
codecpar->fps = info->fps;
codecpar->deinterlace = info->deinterlace;
}
以上除了SPS/PPS的解析外, 完成的判断和补帧逻辑.
SPS/PPS的解析不进行具体分析, 直接上代码:
enum
{
FRAME_UNDEFINED = 0,
FRAME_SLICE_NON_IDR = 1,
FRAME_SLICE_DATA1 = 2,
FRAME_SLICE_DATA2 = 3,
FRAME_SLICE_DATA3 = 4,
FRAME_IDR = 5,
FRAME_SEI = 6,
FRAME_SPS = 7,
FRAME_PPS = 8,
FRAME_AUD = 9, // AccessUnitDelimiter
FRAME_ENDSEQ = 10, // EndOfSequence
FRAME_ENDSTREAM = 11,
FRAME_FILLERDATA = 12,
};
static uint32_t get_uint32(const uint8_t* p)
{
return (uint32_t)(p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3]);
}
//static uint32_t get_uint16(const uint8_t* p)
//{
// return (uint32_t)(p[0] << 8 | p[1]);
//}
static uint32_t Ue(uint8_t* pBuff, uint32_t nLen, uint32_t* nStartBit)
{
//计算0bit的个数
uint32_t nZeroNum = 0;
while (*nStartBit < nLen * 8)
{
//&:按位与,%取余
if (pBuff[*nStartBit / 8] & (0x80 >> (*nStartBit % 8)))
{
break;
}
nZeroNum++;
(*nStartBit)++;
}
(*nStartBit)++;
//计算结果
uint32_t dwRet = 0;
int i = 0;
for (i = 0; i < nZeroNum; i++)
{
dwRet <<= 1;
if (pBuff[(*nStartBit) / 8] & (0x80 >> ((*nStartBit) % 8)))
{
dwRet += 1;
}
(*nStartBit)++;
}
return (uint32_t)((uint32_t)(1 << nZeroNum) - 1 + dwRet);
}
static int Se(uint8_t* pBuff, uint32_t nLen, uint32_t* nStartBit)
{
int nUeVal = (int)Ue(pBuff, nLen, nStartBit);
//double k = nUeVal;
//ceil函数:ceil函数的作用是求不小于给定实数的最小整数。ceil(2)=ceil(1.2)=cei(1.5)=2.00
int nValue = (int)ceil((double)nUeVal / 2.f);
if (nUeVal % 2 == 0)
nValue = -nValue;
return nValue;
}
// u Just returns the BitCount bits of buf and change it to decimal.
// e.g. BitCount = 4, buf = 01011100, then return 5(0101)
static uint32_t u(uint32_t nBitCount, uint8_t* buf, uint32_t* nStartBit)
{
uint32_t dwRet = 0;
int i = 0;
for (i = 0; i < nBitCount; i++)
{
dwRet <<= 1;
if (buf[*nStartBit / 8] & (0x80 >> (*nStartBit % 8)))
{
dwRet += 1;
}
(*nStartBit)++;
}
return dwRet;
}
// w h profile_idc level_idc
static int get_resolution(mdf_h264_codecpar_t* info, uint8_t* pspsData, uint32_t nspsDataLen)//, int* nWidth, int* nHeight, int* profile, int* level, int* nDeinterlace)
{
//uint8_t ucLastNalType = pspsData[0];
//Analyze SPS to find width and height
uint32_t nStartBit = 0;
uint8_t* pBuf = pspsData;
uint32_t nDataLeft = nspsDataLen;
//int forbidden_zero_bit =
u(1, pBuf, &nStartBit);
//int nal_ref_idc =
u(2, pBuf, &nStartBit);
uint32_t nal_unit_type = u(5, pBuf, &nStartBit);
//printf("get_resolution forbidden_zero_bit=%d, nal_ref_idc=%d, nal_unit_type=%d ",forbidden_zero_bit, nal_ref_idc, nal_unit_type);
if (nal_unit_type == FRAME_SPS)
{
uint32_t profile_idc = u(8, pBuf, &nStartBit);
//int constraint_set0_flag =
u(1, pBuf, &nStartBit);//(buf[1] & 0x80)>>7;
//int constraint_set1_flag =
u(1, pBuf, &nStartBit);//(buf[1] & 0x40)>>6;
//int constraint_set2_flag =
u(1, pBuf, &nStartBit);//(buf[1] & 0x20)>>5;
//int constraint_set3_flag =
u(1, pBuf, &nStartBit);//(buf[1] & 0x10)>>4;
//int reserved_zero_4bits =
u(4, pBuf, &nStartBit);
uint32_t level_idc = u(8, pBuf, &nStartBit);
//int seq_parameter_set_id =
Ue(pBuf, nDataLeft, &nStartBit);
info->profile_idc = profile_idc;
info->level_idc = level_idc;
uint32_t chroma_format_idc = 0;
if (profile_idc == 100 || // High profile
profile_idc == 110 || // High10 profile
profile_idc == 122 || // High422 profile
profile_idc == 244 || // High444 Predictive profile
profile_idc == 44 || // Cavlc444 profile
profile_idc == 83 || // Scalable Constrained High profile (SVC)
profile_idc == 86 || // Scalable High Intra profile (SVC)
profile_idc == 118 || // Stereo High profile (MVC)
profile_idc == 128 || // Multiview High profile (MVC)
profile_idc == 138 || // Multiview Depth High profile (MVCD)
profile_idc == 144) // old High444 profile
{
chroma_format_idc = Ue(pBuf, nDataLeft, &nStartBit);
if (chroma_format_idc == 3)
{
//int residual_colour_transform_flag =
u(1, pBuf, &nStartBit);
}
//int bit_depth_luma_minus8 =
Ue(pBuf, nDataLeft, &nStartBit);
//int bit_depth_chroma_minus8 =
Ue(pBuf, nDataLeft, &nStartBit);
//int qpprime_y_zero_transform_bypass_flag =
u(1, pBuf, &nStartBit);
uint32_t seq_scaling_matrix_present_flag = u(1, pBuf, &nStartBit);
//uint32_t seq_scaling_list_present_flag[8];
if (seq_scaling_matrix_present_flag)
{
int i = 0;
for (i = 0; i < 8; i++)
{
// seq_scaling_list_present_flag[i] =
u(1, pBuf, &nStartBit);
}
}
}
else
{
chroma_format_idc = 1;
}
//int log2_max_frame_num_minus4 =
Ue(pBuf, nDataLeft, &nStartBit);
uint32_t pic_order_cnt_type = Ue(pBuf, nDataLeft, &nStartBit);
if (pic_order_cnt_type == 0)
{
//int log2_max_pic_order_cnt_lsb_minus4 =
Ue(pBuf, nDataLeft, &nStartBit);
}
else if (pic_order_cnt_type == 1)
{
//int delta_pic_order_always_zero_flag =
u(1, pBuf, &nStartBit);
//int offset_for_non_ref_pic =
Se(pBuf, nDataLeft, &nStartBit);
//int offset_for_top_to_bottom_field =
Se(pBuf, nDataLeft, &nStartBit);
uint32_t num_ref_frames_in_pic_order_cnt_cycle = Ue(pBuf, nDataLeft, &nStartBit);
int* offset_for_ref_frame = (int*)malloc(num_ref_frames_in_pic_order_cnt_cycle * sizeof(int));
int i = 0;
for (i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++)
offset_for_ref_frame[i] = Se(pBuf, nDataLeft, &nStartBit);
free(offset_for_ref_frame);
}
//int num_ref_frames =
Ue(pBuf, nDataLeft, &nStartBit);
//int gaps_in_frame_num_value_allowed_flag =
u(1, pBuf, &nStartBit);
uint32_t pic_width_in_mbs_minus1 = Ue(pBuf, nDataLeft, &nStartBit);
uint32_t pic_height_in_map_units_minus1 = Ue(pBuf, nDataLeft, &nStartBit);
uint32_t frame_mbs_only_flag = u(1, pBuf, &nStartBit);
info->deinterlace = 0;
if (0 == frame_mbs_only_flag)
{
info->deinterlace = 1;
}
if (!frame_mbs_only_flag)
{
//int mb_adaptive_frame_field_flag =
u(1, pBuf, &nStartBit);
}
//int direct_8x8_inference_flag =
u(1, pBuf, &nStartBit);
uint32_t frame_cropping_flag = u(1, pBuf, &nStartBit);
uint32_t frame_crop_left_offset = 0;
uint32_t frame_crop_right_offset = 0;
uint32_t frame_crop_top_offset = 0;
uint32_t frame_crop_bottom_offset = 0;
if (frame_cropping_flag)
{
frame_crop_left_offset = Ue(pBuf, nDataLeft, &nStartBit);
frame_crop_right_offset = Ue(pBuf, nDataLeft, &nStartBit);
frame_crop_top_offset = Ue(pBuf, nDataLeft, &nStartBit);
frame_crop_bottom_offset = Ue(pBuf, nDataLeft, &nStartBit);
}
info->width = (pic_width_in_mbs_minus1 + 1) * 16;
//*nHeight = (pic_height_in_map_units_minus1 + 1) * 16;
info->height = (2 - frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1) * 16;
if (frame_cropping_flag)
{
uint32_t crop_unit_x;
uint32_t crop_unit_y;
if (0 == chroma_format_idc) // monochrome
{
crop_unit_x = 1;
crop_unit_y = 2 - frame_mbs_only_flag;
}
else if (1 == chroma_format_idc) // 4:2:0
{
crop_unit_x = 2;
crop_unit_y = 2 * (2 - frame_mbs_only_flag);
}
else if (2 == chroma_format_idc) // 4:2:2
{
crop_unit_x = 2;
crop_unit_y = 2 - frame_mbs_only_flag;
}
else // 3 == sps.chroma_format_idc // 4:4:4
{
crop_unit_x = 1;
crop_unit_y = 2 - frame_mbs_only_flag;
}
info->width -= crop_unit_x * (frame_crop_left_offset + frame_crop_right_offset);
info->height -= crop_unit_y * (frame_crop_top_offset + frame_crop_bottom_offset);
}
//printf( "get_resolution:: Find SPS frame, Invalid nal unit type, nDataLen(%d), nal_unit_type(%d)", nspsDataLen, nal_unit_type);
return 0;
}
return -1;
}
static int parseAvccNalu(mdf_h264_codecpar_t* info, uint8_t* buf, int size)
{
//find sps pps
int64_t index = 0;
int ret = -1;
while (index < size)
{
index += 4;
if (index >= size)
{
// parse error, no nal len;
break;
}
uint32_t len = get_uint32(&buf[index - 4]);
if (index + len > size)
{
break;
}
int nal_type = buf[index] & 0x1f;
if (nal_type == FRAME_SPS)
{
// get sps
// parse sps
ret = get_resolution(info, &buf[index], len);
info->sps_ptr = buf + index;
info->sps_size = len;
if (info->pps_ptr) break;
}
else if (nal_type == FRAME_PPS)
{
// get pps
info->pps_ptr = buf + index;
info->pps_size = len;
if (info->sps_ptr) break;
}
index += len;
}
return ret;
}
static int searchStartCode(uint8_t* data, int size, uint8_t** front, uint8_t** latter)
{
uint8_t* p, * end;
end = data + size;
for (p = data; p < end; ++p)
{
if (p[0] == 0 &&
p + 1 < end && p[1] == 0 &&
p + 2 < end && p[2] == 0 &&
p + 3 < end && p[3] == 1)
{
//find
*front = p;
*latter = p + 4;
return TRUE;
}
else if (p[0] == 0 &&
p + 1 < end && p[1] == 0 &&
p + 2 < end && p[2] == 1)
{
//find
*front = p;
*latter = p + 3;
return TRUE;
}
}
return FALSE;
}
int parseAnnexNalu(mdf_h264_codecpar_t* info, uint8_t* buf, int size)
{
uint8_t* p, * end;
int len;
uint8_t* last_end, * start = 0, * last_start;
int ret = -1;
len = size;
p = buf;
end = buf + size;
last_start = p;
while (searchStartCode(p, len, &last_end, &start))
{
if (last_end > last_start && last_end < end)
{
//nal unit type
if ((*last_start & 0x1f) == FRAME_SPS)
{
//sps
ret = get_resolution(info, last_start, (uint32_t)(last_end - last_start));
info->sps_ptr = last_start;
info->sps_size = (uint32_t)(last_end - last_start);
if (info->pps_ptr) break;
}
else if ((*last_start & 0x1f) == FRAME_PPS)
{
info->pps_ptr = last_start;
info->pps_size = (uint32_t)(last_end - last_start);
if (info->sps_ptr)break;
}
}
p = start;
len = (int)(end - p);
last_start = start;
}
if ((*start & 0x1f) == FRAME_SPS)
{
ret = get_resolution(info, start, (uint32_t)(end - start));
info->sps_ptr = start;
info->sps_size = (uint32_t)(end - start);
}
else if ((*start & 0x1f) == FRAME_PPS)
{
//parserPps(start, end - start);
info->pps_ptr = start;
info->pps_size = (uint32_t)(end - start);
}
return ret;
}
AVCC格式说明
AVCC格式 也叫AVC1格式,MPEG-4格式,字节对齐,因此也叫Byte-Stream Format。用于mp4/flv/mkv, VideoToolbox。
例如:
01 42 C0 28 ff e1 00 18 67 64 00 29 ac b4 02 80 2d d0 80 00 00 03 00 80 0f 42 40 07 8c 19 50 01 00 04 68 ef 3c b0 fd f8 f8 00 00 00 00
前4个字节:
0x01: version
0x42: avc profile (首个SPS的第1个字节)
0xc0: avc compatibility (首个SPS的第2个字节)
0x28: avc level (首个SPS的第3个字节,可以发现后面0x0989位置的3个字,和这3个是一样的)
第5个字节:
0xff:
6_bit: 默认111111 1100 0000
2_bit: 编码数据长度所需字节数
第6个字节:
0xe1: [111 00001]
3_bit: 默认 111
5_bit: 接下来的sps或pps的个数::这里为1
第7 8个字节:
0x00 0x18: 表示接下来sps或者pps的长度为24
第9个字节:
0x67: [0110 0111] nalu_type为7,表示SPS,就是说从0988到099f这24个数据为sps
第33个字节:9 (sps_pos) + 24(sps_size)
0x01: 接下来的sps或pps的个数::这里为1
第34 35字节:
0x00 0x04: 表示接下来sps或者pps的长度为4
第36个字节:
0x68: [0110 1000] nalu_type为8,表示PPS