文章目录
前言
我们通过编码得到的h264和aac数据通常需要封装成mp4文件,可以使用mp4v2实现这样的封装,封装的时候需要注意一些细节,比如读取sps、pps,判断idr、音频固定时间戳等。
一、视频封装
1.获取nalu
我们拿到的h264数据,需要先将其内部的nalu解析出来,解析方法参考《C++ 读取h264中的nalu》。
2.添加视频流
添加视频流,通常需要取得sps,时间基(timescale)一般设为90000,相对时间戳(duration)则是timescale除以framerate即可。
unsigned char* pNalu;
unsigned char naluType;
naluType = pNalu[0] & 0x1F;
MP4TrackId video = MP4_INVALID_TRACK_ID;
int timeScale=90000;
switch (naluType)
{
case 7: // SPS
if (video == MP4_INVALID_TRACK_ID)
{
video = MP4AddH264VideoTrack(pHandle, timeScale, timeScale/ framerate, width, height, pNalu[1], pNalu[2], pNalu[2], 3);
}
break;
}
3.写入sps、pps
取得nalu后判断其naluType,通常h264首帧是一个idr,idr的第一个nalu是sps、第二个nalu是pps、第三个nalu才是idr帧。
//nalu数据
unsigned char* pNalu;
//nalu类型
unsigned char naluType;
//nalu数据长度
int len;
//获取nalu类型
naluType = pNalu[0] & 0x1F;
switch (naluType)
{
case 7: // SPS
MP4AddH264SequenceParameterSet(pHandle, videoId, pNalu, len);
break;
case 8: // PPS
MP4AddH264PictureParameterSet(pHandle, videoId, pNalu, len);
break;
}
4.写入视频帧
判断nalu的类型为视频帧时通过MP4WriteSample方法写入,需要注意的是判断是idr帧时MP4WriteSample最后一个参数设为true,否则为false。相对时间戳(duration)在实时流中通常是使用帧间时间差。
//nalu数据
unsigned char* pNalu;
//nalu类型
unsigned char naluType;
//nalu数据长度
int len;
//获取nalu类型
naluType = pNalu[0] & 0x1F;
bool isIdr = true;
//上一帧时间戳
int64_t timestamp=0;
//naluType :1-5都是视频帧,其中5是idr。
switch (naluType)
{
case 01:
case 02:
case 03:
case 04:
isIdr = false;
case 05://idr
//mp4 sample的nalu前面需要加上四位需要记录长度
pNalu -= 4;
pNalu[0] = (len >> 24) & 0xFF;
pNalu[1] = (len >> 16) & 0xFF;
pNalu[2] = (len >> 8) & 0xFF;
pNalu[3] = (len >> 0) & 0xFF;
auto _duration = frame.timestamp - timestamp;
timestamp = frame.timestamp;
MP4WriteSample(pHandle, videoId, pNalu, len + 4, duration * timeScale / 1000, 0, isIdr);
break;
}
二、音频封装
1.添加音频流
添加音频轨道需要注意时间基(timescale)和相对时间戳(duration)的设置,最方便的方法是timescale=samplerate,duration=1024。因为一个aac帧就包含1024个采样,音频1秒有samplerate个采样,所以一帧的时间就是1024。无论samplerate怎么变,timescale只要等于samplerate则duration就是1024。
auto audio=MP4AddAudioTrack(pHandle, 44100, 1024, MP4_MPEG4_AUDIO_TYPE);
2.设置DecoderSpecificInfo
如果是faac编码器可以通过faacEncGetDecoderSpecificInfo方法获得,是一个16位的值结构如下:
序号 | 字段 | 长度 | 说明 |
---|---|---|---|
1 | AAC Object Type | 5bit | AOT,比如: AAC_MAIN = 1, AAC_LC = 2, AAC_SSR = 3, AAC_LTP = 4, 略 |
2 | Sample Rate Index | 4bit | 采样率下标,下标对应的采样率如下 。 0: 96000 Hz 1: 88200 Hz 2 : 64000 Hz 3 : 48000 Hz 4 : 44100 Hz 5 : 32000 Hz 6 : 24000 Hz 7 : 22050 Hz 8 : 16000 Hz 9 : 12000 Hz 10 : 11025 Hz 11 : 8000 Hz 12 : 7350 Hz 13 : Reserved 14 : Reserved 15 : frequency is written explictly |
3 | Channel Number | 4bit | 声道数。 0: Defined in AOT Specifc Config 1: 1 channel : front - center 2 : 2 channels : front - left, front - right 3 : 3 channels : front - center, front - left, front - right 4 : 4 channels : front - center, front - left, front - right, back - center 5 : 5 channels : front - center, front - left, front - right, back - left, back - right 6 : 6 channels : front - center, front - left, front - right, back - left, back - right, LFE - channel 7 : 8 channels : front - center, front - left, front - right, side - left, side - right, back - left, back - right, LFE - channel 8 - 15 : Reserved |
4 | Don’t care | 3bit | 设0 |
计算方法如下:
#include<stdint.h>
uint16_t GetDecoderSpecificInfo(uint8_t audioObjectType, uint8_t sampleRateIndex, uint8_t channelNumber)
{
uint16_t decoderSpecificInfo=0;
uint8_t* p = (uint8_t*) & decoderSpecificInfo;
p[0] = ((audioObjectType << 3) & 0xf8) | ((sampleRateIndex >> 1) & 0x07);
p[1]=((sampleRateIndex << 7) & 0x80) | ((channelNumber <<3) & 0x78);
return decoderSpecificInfo;
}
获取到数据后通过如下方法设置:
auto config= GetDecoderSpecificInfo(2, 4, 2);
MP4SetTrackESConfiguration(mp4, audio, (uint8_t*) &config, 2);
3.写入音频帧
音频通常使用恒定时间戳设为MP4_INVALID_DURATION即可。
MP4WriteSample(pHandle, audio, frame.data, frame.dataLength, MP4_INVALID_DURATION, 0, 1);
三、完整例子
1.将h264、aac文件封装成mp4
其中h264文件的解析对象NaluParse
参考《C++ 读取h264中的nalu》,aac文件的解析对象AacADTSParse
参考《C++ 解析aac-adts的头部信息》,GetDecoderSpecificInfo
方法参考二、2。
#include<stdio.h>
#include<stdint.h>
#include "mp4v2/mp4v2.h"
#include"AacADTSHeader.h"
#include"NaluParse.h"
#include<exception>
int main(int argc, char* argv[])
{
MP4FileHandle mp4 = NULL;
FILE* h264= NULL;
FILE* aac= NULL;
MP4TrackId videoTrack = MP4_INVALID_TRACK_ID;
MP4TrackId audioTrack = MP4_INVALID_TRACK_ID;
try {
AC::NaluParse naluParse;
AC::Nalu nalu;
AC::AacADTSHeader adtsHeader;
unsigned char bufferFull[1028];
unsigned char* buf = bufferFull + 4;
int size;
int naluType;
unsigned char* aacData;
int aacDataLength;
int width = 1920;
int height = 1080;
int frameRate = 20;
int timeScale = 90000;
int sampling_frequency_set[] = { 196000 ,88200 ,64000 ,48000 ,44100 ,32000 ,24000 ,22050 ,16000 ,12000 , 11025 , 8000 , 7350 };
mp4 = MP4Create("test.mp4", 0);
if (mp4 == MP4_INVALID_FILE_HANDLE)
{
throw std::exception("Create mp4 handle fialed.\n");
}
h264 = fopen("test.h264", "rb+");
if (!h264)
{
throw std::exception("Opene h264 handle fialed.\n");
}
aac = fopen("test.aac", "rb+");
if (!aac)
{
throw std::exception("Opene aac handle fialed.\n");
}
//视频
while (1)
{
size = fread(buf, 1, 1024, h264);
if (size < 1)
break;
naluParse.SendH264Stream(buf, size);
while (naluParse.ReceiveNalu(nalu))
{
bool isIdr = true;
naluType = nalu.GetData()[0] & 0x1F;
switch (naluType)
{
case 01:
case 02:
case 03:
case 04:
isIdr = false;
case 05://idr
{
auto pNalu = nalu.GetData();
pNalu -= 4;
pNalu[0] = (nalu.GetDataLength() >> 24) & 0xFF;
pNalu[1] = (nalu.GetDataLength() >> 16) & 0xFF;
pNalu[2] = (nalu.GetDataLength() >> 8) & 0xFF;
pNalu[3] = (nalu.GetDataLength() >> 0) & 0xFF;
if (!MP4WriteSample(mp4, videoTrack, pNalu, nalu.GetDataLength() + 4, MP4_INVALID_DURATION, 0, isIdr))
{
printf("Error:Can't write sample.\n");
}
}
break;
case 7: // SPS
{
if (videoTrack == MP4_INVALID_TRACK_ID)
{
videoTrack = MP4AddH264VideoTrack
(mp4,
timeScale,
timeScale / frameRate,
width,
height,
nalu.GetData()[1],
nalu.GetData()[2],
nalu.GetData()[3],
3); // 4 bytes length before each NAL unit
if (videoTrack == MP4_INVALID_TRACK_ID)
{
printf("Error:Can't add track.\n");
return -1;
}
MP4SetVideoProfileLevel(mp4, 0x7F);
MP4AddH264SequenceParameterSet(mp4, videoTrack, nalu.GetData(), nalu.GetDataLength());
}
}
break;
case 8: // PPS
{
MP4AddH264PictureParameterSet(mp4, videoTrack, nalu.GetData(), nalu.GetDataLength());
}
break;
}
}
}
//音频
while (1)
{
int size;
size = fread(buf, 1, 7, aac);
if (size < 7)
break;
AC::AacADTSParse::BinaryToHeader(buf, adtsHeader);
size = fread(buf, 1, adtsHeader.aac_frame_length - 7, aac);
if (size != adtsHeader.aac_frame_length - 7)
{
throw std::exception("incorrect length!");
}
if (adtsHeader.protection_absent == 0)
//有校验位
{
aacData = buf + 2;
aacDataLength = size - 2;
//TODO:处理校验位buffer[0]、buffer[1]
}
else
{
aacData = buf;
aacDataLength = size;
}
//TODO:到此取得aac数据aacData、aacDataLength
if (audioTrack == MP4_INVALID_TRACK_ID)
{
audioTrack = MP4AddAudioTrack(mp4, sampling_frequency_set[adtsHeader.sampling_frequency_index], 1024, MP4_MPEG4_AUDIO_TYPE);
if (audioTrack == MP4_INVALID_TRACK_ID)
{
throw std::exception("Add audio track failed!");
}
MP4SetAudioProfileLevel(mp4, 0x02);
auto config = GetDecoderSpecificInfo(adtsHeader.profile + 1, adtsHeader.sampling_frequency_index, adtsHeader.channel_configuration);
if (!MP4SetTrackESConfiguration(mp4, audioTrack, (uint8_t*)&config, 2))
{
throw std::exception("set config failed!");
}
}
MP4WriteSample(mp4, audioTrack, aacData, aacDataLength, MP4_INVALID_DURATION, 0, 1);
}
}
catch (const std::exception& e)
{
printf("%s,\n", e.what());
}
if (aac)
fclose(aac);
if (h264)
fclose(h264);
if (mp4)
MP4Close(mp4);
return 0;
}
四、下载
vs2022项目完整代码下载:https://download.csdn.net/download/u013113678/85313965
总结
以上就是今天要讲的内容,对于MP4的封装需要注意的就是一些参数的获取和设置,以及时间戳的计算。视频通常都需要实时时间戳即间隔时间不是绝对按照帧率,而是根据实际采集时间来设置的。而音频的话一般情况下是采用固定帧率,这个与音频数据播放特点有关。