MP4音频解码信息

最新推荐文章于 2024-03-11 13:07:43 发布

flyingqr

最新推荐文章于 2024-03-11 13:07:43 发布

阅读量1.7k

点赞数

分类专栏： media framework 文章标签： audio compression struct codec descriptor standards

media framework 专栏收录该内容

25 篇文章 0 订阅

订阅专栏

MP4文件格式分为头部和数据两部分，头部是由许多被称作Atom的结构单元嵌套或排列而成，数据部分则完全为实际数据不包含元信息，因此具体解码时音视频帧的位置和大小都要在头部获取。详细内容见以下链接：
http://wqyuwss.52rd.net
这里总结下音频解码信息获取的一些经验，当然详细内容需要查看quick time file format的文档。
MP4的音频解码信息保存在如下嵌套的Atom中，{moov{mdia{minf{smhd{stbl{stsd}}}}}}
stsd可能包括多个音频信息的描述，结构如下：

typedef struct stsdtable { unsigned int size;//Atom大小 char format[4];//音频编码格式 int res1; int ref; short version;//版本 short pad1; int pad2; short channels;//声道 short bitspersample; short compress_id; short res2; short samplerate1;//采样率 short samplerate2; //{if(version==1) int sampleperpacket; int bytesperpacket; int bytesperframe; int bytespersample; //} } stsdtable;

其中format对应音频编码格式：
PCM_S32BE, in32
PCM_S32LE, in32
PCM_S24BE, in24
PCM_S24LE, in24
PCM_S16BE, twos // 16 bits //
PCM_S16LE, sowt //
PCM_S16LE, lpcm
PCM_F32BE, fl32
PCM_F64BE, fl64
PCM_S8,     sowt
PCM_U8,     raw // 8 bits unsigned
PCM_U8,     NONE // uncompressed
PCM_MULAW, ulaw //
PCM_ALAW,   alaw //
ADPCM_IMA_QT, ima4 // IMA-4 ADPCM //
MACE3,      MAC3 // Macintosh Audio Compression and Expansion 3:1 ///
MACE6,      MAC6 // Macintosh Audio Compression and Expansion 6:1 //
MP3,        .mp3 // MPEG layer 3 */ /* sample files at http://www.3ivx.com/showcase.html use this tag //
MP3,        0x6D730055 // MPEG layer 3 //
OGG_VORBIS, OggS sample files at http://heroinewarrior.com/xmovie.php3 use this tag //
AAC,        mp4a // MPEG-4 AAC //
AC3,        ac-3 // ETSI TS 102 366 Annex F //
AMR_NB,     samr // AMR-NB 3gp //
AMR_WB,     sawb // AMR-WB 3gp//
GSM,        agsm
ALAC,       alac // Apple Lossless //
QCELP,      Qclp
QCELP,      sqcp // ISO Media fourcc //
QDM2,       QDM2 // QDM2 //
DVAUDIO,    vdva
DVAUDIO,    dvca
WMAV2,      WMA2
这个获取比较简单，下面是解码私有数据的获取：
这些解码私有数据也保存在Atom中，通常在上面结构体的后面，有esds、frma、mp4a、wave。AAC的私有数据保存在esds的0x05标签的数据，QDM2的则是"wave"Atom的数据部分(以下按顺序分析)：
   4字节长度
   4字节 "esds" or "m4ds" 标志
   4字节版本标识

   1字节 ES描述类型标签 0x03
   --3字节扩展描述类型标签可能没有
   1字节描述类型长度
   2字节 ES ID
   1字节流优先级

   1字节解码配置描述类型标签 0x04
   --3字节扩展描述类型标签可能没有
   1字节描述类型长度
   1字节描述对象ID
   1字节
   3字节
   4字节
   4字节

   1字节解码配置描述类型标签 0x05
   --3字节扩展描述类型标签可能没有
   1字节长度

   1字节 0x06
   0x06不再分析
下面是一个例子：
长度标签
00015218h: 00 00 00 10 73 6D 68 64 00 00 00 00 00 00 00 00 ; ....smhd........
00015228h: 00 00 00 24 64 69 6E 66 00 00 00 1C 64 72 65 66 ; ...$dinf....dref
00015238h: 00 00 00 00 00 00 00 01 00 00 00 0C 75 72 6C 20 ; ............url
00015248h: 00 00 00 01 00 02 C0 97 73 74 62 6C 00 00 00 5B ; ......罈stbl...[
00015258h: 73 74 73 64 00 00 00 00 00 00 00 01 00 00 00 4B ; stsd...........K
00015268h: 6D 70 34 61 00 00 00 00 00 00 00 01 00 00 00 00 ; mp4a............
00015278h: 00 00 00 00 00 01 00 10 00 00 00 00 7D 00 00 00 ; ............}...
00015288h: 00 00 00 27 65 73 64 73 00 00 00 00 03 19 00 00 ; ...'esds........
00015298h: 00 04 11 40 15 00 00 D2 00 00 BB 88 00 00 7D 00 ; ...@...?.粓..}.
000152a8h: 05 02 12 88 06 01 02                            ; ...?..

0x12 0x88即私有数据(对应ffmpeg中AVCodecContext.extradata)
下面是mp4音频部分分析的代码：

//MP4Analyze.h #define uint8_t unsigned char /******atom tag*******/ uint8_t moov[] = "moov"; uint8_t trak[] = "trak"; uint8_t mdia[] = "mdia"; uint8_t minf[] = "minf"; uint8_t stbl[] = "stbl"; uint8_t stsd[] = "stsd"; uint8_t stsc[] = "stsc"; uint8_t stsz[] = "stsz"; uint8_t stco[] = "stco"; uint8_t ftyp[] = "ftyp"; uint8_t mdat[] = "mdat"; typedef struct Atom { unsigned int size; uint8_t tag[4]; int ver_flag; unsigned int num_of_entries; unsigned int pos; uint8_t *data; } Atom; /****audio format****/ uint8_t kmp3[] = {0x6D,0x73,0x00,0x55}; uint8_t fmp3[] = ".mp3"; uint8_t raw[] = "raw "; uint8_t wave[] = "wave"; uint8_t mp4a[] = "mp4a"; uint8_t enca[] = "enca";//encrypted to ISO/IEC 14496-12 or 3GPP standards uint8_t smar[] = "smar";//encoded to 3GPP GSM 6.10 AMR narrowband standards uint8_t sawb[] = "sawb";//encoded to 3GPP GSM 6.10 AMR wideband standards uint8_t m4ds[] = "m4ds";//encoded to ISO/IEC 14496-10 AVC standards uint8_t esds[] = "esds"; uint8_t fram[] = "fram"; /*** We may not need these ***/ #define MKTAG(a,b,c,d) (a | (b << 8) | (c << 16) | (d << 24)) typedef struct AVCodecTag { int id; unsigned int tag; } AVCodecTag; typedef struct stsdtable { unsigned int size; char format[4]; int res1; int ref; short version; short pad1; int pad2; short channels; short bitspersample; short compress_id; short res2; short samplerate1; short samplerate2; //{if(version==1) int sampleperpacket; int bytesperpacket; int bytesperframe; int bytespersample; //} } stsdtable; /***** result is stored here ******/ typedef struct sampletable { unsigned int size; unsigned int id_of_sd; } sampletable; //MP4Analyze.cpp #include "MP4Analyze.h" #include <vector> #include <map> #include <iostream> #include <string> #ifdef WIN32 #include <winsock2.h> #pragma comment(lib, "Ws2_32.lib") #pragma warning (disable:4786) #endif #ifdef __GNUG__ #include <netinet/in.h> #endif using namespace std; /** *** mp4存在宽度为8字节的wide atom tag，需要注意，这里暂未考虑 **/ /* * check if a mov/mp4/3gp type */ int check_format(uint8_t *data, int size) { if(strncmp((char*)moov,(char*)(data+4),4)==0 || strncmp((char*)ftyp,(char*)(data+4),4)==0 ||strncmp((char*)mdat,(char*)(data+4),4)==0 ) return 0; return -1; } unsigned int get_size(const uint8_t *data,int size) { unsigned int tmp = 0; for(int i=0; i<size; ++i) { tmp <<= 8; tmp += *data++; } return tmp; } /* if found,return the offset from the data[0]*/ int seek_tag(uint8_t tag[],uint8_t *data, unsigned int size1,uint8_t **pos,unsigned int *size2) { if(data == NULL || size1 == 0) return -1; unsigned int tag_size = get_size(data,4); if(tag_size >size1 + 8) return -1; unsigned int tmp = 0; while(strncmp((char*)data+4,(char*)tag,4) != 0) { // printf("%s/n",data+4); if(tag_size==0) return -1; if(tag_size < size1 + 8) { data += tag_size; tmp += tag_size; } else return -1; tag_size = get_size(data,4); } printf("find :%c%c%c%c/n",tag[0],tag[1],tag[2],tag[3]); if(tmp + tag_size > size1 ) printf("warning: the atom may be not complete!/n"); *pos = data+8; *size2 = tag_size -8; return tmp; } /*** elementary stream descriptor analyse ***/ /* unsigned int codec_get_tag(const AVCodecTag *tags, int id) { while (tags->id != CODEC_ID_NONE) { if (tags->id == id) return tags->tag; tags++; } return 0; } /* may not need analyse int esds_analyze(uint8_t *data, unsigned int size) { return 0; } */ /*version == 2 ??? reffer to ffmpeg source mov.c line 943 if (format == MKTAG('l','p','c','m')) st->codec->codec_id = mov_get_lpcm_codec_id(st->codec->bits_per_coded_sample, flags); */ vector<stsdtable>& get_audio_info(uint8_t *data, unsigned int size, vector<stsdtable>& stable)//stsd { uint8_t * datapos = data; Atom *stsd_audio =(Atom *)data; int tmp_size = 16; printf("size : %u/n",ntohl(stsd_audio->size)); printf("num_entr: %u/n",ntohl(stsd_audio->num_of_entries)); for(int i=0; i < ntohl(stsd_audio->num_of_entries); ++i) { if(tmp_size > size)//注意 return stable; datapos += tmp_size; stsdtable * audio_entry = (stsdtable *)(datapos); stable.push_back(*audio_entry);//这里存入的是网络序的数据，使用时需进行转换 tmp_size += ntohl(audio_entry->size); /***************/ printf("--tablesize: %d/n",ntohl(audio_entry->size)); printf("--format : %s/n",audio_entry->format); printf("--version : %d/n",ntohs(audio_entry->version)); printf("--channels: %d/n",ntohs(audio_entry->channels)); printf("--bitpersam: %d/n",ntohs(audio_entry->bitspersample)); printf("--IDcompress: %d/n",ntohs(audio_entry->compress_id)); printf("--samplerate: %d.%d/n",ntohs(audio_entry->samplerate1),ntohs(audio_entry->samplerate2)); /**************/ tmp_size = sizeof(stsdtable); if(ntohs(audio_entry->version)==0) { tmp_size -= 16; } datapos += tmp_size; //if(ntohs(audio_entry->compress_id)==-2)//此处尚需考证 if(ntohl(audio_entry->size) > sizeof(stsdtable)) { printf("----atom size:%d/n",get_size(datapos,4)); printf("----atom name:%c%c%c%c/n",datapos[4],datapos[5],datapos[6],datapos[7]); if(strncmp((char*)datapos,(char*)esds,4)==0) { //handle esds } } } return stable; } map<unsigned int,sampletable> & get_packet_offset(uint8_t *STBL[], map<unsigned int,sampletable>& table) { //table.insert(pair<long,sampletable>(1,sample)); unsigned int num_sam_to_chunk = get_size(STBL[0]-4,4);//stsc unsigned int num_sample = get_size(STBL[1]-4,4);//stsz unsigned int num_chunk = get_size(STBL[2]-4,4);//stco unsigned int chunk_index = 0; unsigned int next_chunk_index = 0; uint8_t *cur_sam_to_chunk = STBL[0]; uint8_t *cur_sam_size = STBL[1]; uint8_t *cur_chunk_offset = STBL[2]; sampletable sample; printf("number of stsc entries:%d /nnumber of sample size:%d /nnumber of chunk offset:%d/n",num_sam_to_chunk,num_sample,num_chunk); for(unsigned int i = 0; i < num_sam_to_chunk; ++i)//对所有的entries { chunk_index = get_size(cur_sam_to_chunk,4); next_chunk_index = get_size(cur_sam_to_chunk+12,4); sample.id_of_sd = get_size(cur_sam_to_chunk+8,4); if(i == num_sam_to_chunk -1)//最后一个 { next_chunk_index = num_chunk+1; } printf("chunk_index:(%d---%d)/n",chunk_index,next_chunk_index); for(unsigned int k=chunk_index; k < next_chunk_index; ++k)//当前chunk序号到下一个chunk序号之间的chunk {//处理所有重复的chunk printf("chunk_index:%d sample num:%d/n",chunk_index,get_size(cur_sam_to_chunk+4,4)); unsigned int offset = get_size(cur_chunk_offset+(chunk_index-1)*4,4); for(unsigned int j=0; j < get_size(cur_sam_to_chunk+4,4); ++j)//chunk内地sample数目 {//处理该chunk中的sample sample.size = get_size(cur_sam_size,4); printf("--sample offset:%d %x size:%d/n",offset,offset,sample.size); table.insert(pair<unsigned int,sampletable>(offset,sample)); offset = offset + sample.size; cur_sam_size += 4; } system("pause"); chunk_index++; } cur_sam_to_chunk += 12; } return table; } int seek_audio_atom( uint8_t *data1, unsigned int size1) { uint8_t tag[] = "mdiaminfsmhd"; uint8_t *datapos; unsigned int tag_size; uint8_t *data; unsigned int size; int offset_of_atom = 0; if((offset_of_atom = seek_tag(moov, data1, size1, &data, &size)) == -1) return -1; if(offset_of_atom + size >size1) { //some handles printf("moov atom is not complete,need more data"); } data1 = data; size1 = size; uint8_t *nexttrak = data; unsigned int traksize = size; int i=0; while(1) { printf("-----/n"); if(seek_tag(trak, nexttrak, traksize, &datapos, &tag_size) != -1) { nexttrak = datapos + tag_size; if(size1 < (nexttrak - data1)) return -1; traksize = size1 - (nexttrak - data1); data = datapos; size = tag_size; } else { return -1; } i=0; while(i<3) { if(seek_tag(tag+i*4, data, size, &datapos, &tag_size) != -1) { if(i==2) break; data = datapos; size = tag_size; ++i; } else { break; } } if(strncmp("smhd",(char*)(datapos-4),4) == 0) { if(seek_tag(stbl, data, size, &datapos, &tag_size)!= -1) { printf("—find audio stbl—!/n"); data = datapos; size = tag_size; if(seek_tag(stsd, data, size, &datapos, &tag_size) != -1) { vector<stsdtable> stable; //音频信息 get_audio_info(datapos-8, tag_size,stable); } uint8_t *STBL[3] ={NULL,NULL,NULL};// uint8_t *datapos1; unsigned int tag_size1;// if(seek_tag(stsc, data, size, &datapos1, &tag_size1) != -1) { STBL[0] = datapos1 + 8; } uint8_t *datapos2; unsigned int tag_size2; if(seek_tag(stsz, data, size, &datapos2, &tag_size2) != -1) { STBL[1] = datapos2 + 12; } uint8_t *datapos3; unsigned int tag_size3; if(seek_tag(stco, data, size, &datapos3, &tag_size3) != -1) { STBL[2] = datapos3 + 8; } if(STBL[0] && STBL[1] && STBL[2] ) { map<unsigned int,sampletable> postable;//音频帧信息 get_packet_offset(STBL,postable); } } return 0; } } return -1; } int main(char arg, char *argv[]) { FILE *mp4; cout<<"please input the file name :"<<endl; string filename; cin>>filename; mp4 = fopen(filename.c_str(),"rb"); uint8_t buffer[300000]; fread(buffer,1,300000,mp4); seek_audio_atom((uint8_t*)buffer,300000); fclose(mp4); return 0; }