
这里总结下音频解码信息获取的一些经验,当然详细内容需要查看quick time file format的文档。

typedefstruct stsdtable
    unsigned int size;//Atom大小
    char format[4];//音频编码格式
    int res1;
    int ref;
    short version;//版本
    short pad1;
    int pad2;
    short channels;//声道
    short bitspersample;
    short compress_id;
    short res2;
    short samplerate1;//采样率
    short samplerate2;

        int sampleperpacket;
        int bytesperpacket;
        int bytesperframe;
        int bytespersample;

} stsdtable;

PCM_S32BE,  in32
PCM_S32LE,  in32
PCM_S24BE,  in24
PCM_S24LE,  in24
PCM_S16BE,  twos // 16 bits //
PCM_S16LE,  sowt // 
PCM_S16LE,  lpcm
PCM_F32BE,  fl32
PCM_F64BE,  fl64
PCM_S8,     sowt
PCM_U8,     raw  // 8 bits unsigned
PCM_U8,     NONE // uncompressed
PCM_MULAW,  ulaw //
PCM_ALAW,   alaw //
ADPCM_IMA_QT, ima4 // IMA-4 ADPCM //
MACE3,      MAC3 // Macintosh Audio Compression and Expansion 3:1 ///
MACE6,      MAC6 // Macintosh Audio Compression and Expansion 6:1 //
MP3,        .mp3 // MPEG layer 3 */ /* sample files at use this tag //
MP3,        0x6D730055  // MPEG layer 3 //
OGG_VORBIS, OggS sample files at use this tag //
AAC,        mp4a // MPEG-4 AAC //
AC3,        ac-3 // ETSI TS 102 366 Annex F //
AMR_NB,     samr // AMR-NB 3gp //
AMR_WB,     sawb // AMR-WB 3gp//
GSM,        agsm
ALAC,       alac // Apple Lossless //
QCELP,      Qclp
QCELP,      sqcp // ISO Media fourcc //
QDM2,       QDM2 // QDM2 //
DVAUDIO,    vdva
DVAUDIO,    dvca
WMAV2,      WMA2
   4字节 长度
   4字节 "esds" or "m4ds" 标志
   4字节 版本标识

   1字节 ES描述类型标签 0x03
   --3字节 扩展描述类型标签 可能没有
   1字节 描述类型长度
   2字节 ES ID
   1字节 流优先级

   1字节 解码配置描述类型标签 0x04
   --3字节 扩展描述类型标签 可能没有
   1字节 描述类型长度
   1字节 描述对象ID

   1字节 解码配置描述类型标签 0x05
   --3字节 扩展描述类型标签 可能没有
   1字节 长度

   1字节 0x06
长度 标签
00015218h: 00 00 00 10 73 6D 68 64 00 00 00 00 00 00 00 00 ; ....smhd........
00015228h: 00 00 00 24 64 69 6E 66 00 00 00 1C 64 72 65 66 ; ...$dinf....dref
00015238h: 00 00 00 00 00 00 00 01 00 00 00 0C 75 72 6C 20 ; ............url
00015248h: 00 00 00 01 00 02 C0 97 73 74 62 6C 00 00 00 5B ; ......罈stbl...[
00015258h: 73 74 73 64 00 00 00 00 00 00 00 01 00 00 00 4B ; stsd...........K
00015268h: 6D 70 34 61 00 00 00 00 00 00 00 01 00 00 00 00 ; mp4a............
00015278h: 00 00 00 00 00 01 00 10 00 00 00 00 7D 00 00 00 ; ............}...
00015288h: 00 00 00 27 65 73 64 73 00 00 00 00 03 19 00 00 ; ...'esds........
00015298h: 00 04 11 40 15 00 00 D2 00 00 BB 88 00 00 7D 00 ; ...@...?.粓..}.
000152a8h: 05 02 12 88 06 01 02                            ; ...?..

0x12 0x88即私有数据(对应ffmpeg中AVCodecContext.extradata)


#define uint8_t unsigned char

/******atom tag*******/
uint8_t moov[]= "moov";
uint8_t trak[]= "trak";
uint8_t mdia[]= "mdia";
uint8_t minf[]= "minf";
uint8_t stbl[]= "stbl";
uint8_t stsd[]= "stsd";
uint8_t stsc[]= "stsc";
uint8_t stsz[]= "stsz";
uint8_t stco[]= "stco";
uint8_t ftyp[]= "ftyp";
uint8_t mdat[]= "mdat";

typedef struct Atom
    unsigned int size;
    uint8_t tag[4];
    int ver_flag;
    unsigned int num_of_entries;
    unsigned int pos;
    uint8_t *data;
} Atom;

/****audio format****/
uint8_t kmp3[]= {0x6D,0x73,0x00,0x55};
uint8_t fmp3[]= ".mp3";
uint8_t raw[]= "raw ";

uint8_t wave[]= "wave";
uint8_t mp4a[]= "mp4a";
uint8_t enca[]= "enca";//encrypted to ISO/IEC 14496-12 or 3GPP standards

uint8_t smar[]= "smar";//encoded to 3GPP GSM 6.10 AMR narrowband standards

uint8_t sawb[]= "sawb";//encoded to 3GPP GSM 6.10 AMR wideband standards

uint8_t m4ds[]= "m4ds";//encoded to ISO/IEC 14496-10 AVC standards

uint8_t esds[]= "esds";
uint8_t fram[]= "fram";

/*** We may not need these ***/
#define MKTAG(a,b,c,d)(a | (b << 8)| (c << 16) | (d << 24))
typedef struct AVCodecTag{
    int id;
    unsigned int tag;
} AVCodecTag;

typedef struct stsdtable
    unsigned int size;
    char format[4];
    int res1;
    int ref;
    short version;
    short pad1;
    int pad2;
    short channels;
    short bitspersample;
    short compress_id;
    short res2;
    short samplerate1;
    short samplerate2;

        int sampleperpacket;
        int bytesperpacket;
        int bytesperframe;
        int bytespersample;

} stsdtable;

/***** result is stored here ******/
typedef struct sampletable
    unsigned int size;
    unsigned int id_of_sd;
} sampletable;

#include "MP4Analyze.h"
#include <vector>
#include <map>
#include <iostream>
#include <string>
#ifdef WIN32
#include <winsock2.h>
#pragma comment(lib,"Ws2_32.lib")
#pragma warning(disable:4786)

#ifdef __GNUG__
#include <netinet/in.h>
using namespacestd;

*** mp4存在宽度为8字节的wide atom tag,需要注意,这里暂未考虑

* check if a mov/mp4/3gp type

int check_format(uint8_t*data, int size)
        return 0;
    return -1;

unsigned int get_size(constuint8_t *data,int size)
    unsigned int tmp= 0;
    for(int i=0; i<size;++i)
        tmp <<= 8;
        tmp += *data++;
    return tmp;
/* if found,return the offset from the data[0]*/
int seek_tag(uint8_t tag[],uint8_t*data, unsigned int size1,uint8_t**pos,unsignedint *size2)
    if(data == NULL || size1 == 0)
        return -1;
    unsigned int tag_size= get_size(data,4);
    if(tag_size>size1 + 8)
        return -1;
    unsigned int tmp= 0;
    while(strncmp((char*)data+4,(char*)tag,4)!= 0)
    //    printf("%s\n",data+4);

            return -1;
        if(tag_size< size1 + 8)
            data += tag_size;
            tmp += tag_size;
            return -1;
        tag_size = get_size(data,4);
    printf("find :%c%c%c%c\n",tag[0],tag[1],tag[2],tag[3]);
    if(tmp + tag_size > size1 )
     printf("warning: the atom may be not complete!\n");
    *pos = data+8;
    *size2 = tag_size-8;
    return tmp;
/*** elementary stream descriptor analyse ***/
unsigned int codec_get_tag(const AVCodecTag *tags, int id)
    while (tags->id != CODEC_ID_NONE) {
        if (tags->id == id)
            return tags->tag;
    return 0;
/* may not need analyse
int esds_analyze(uint8_t *data, unsigned int size)
    return 0;

/*version == 2 ??? reffer to ffmpeg source mov.c line 943
if (format == MKTAG('l','p','c','m'))
        st->codec->codec_id = mov_get_lpcm_codec_id(st->codec->bits_per_coded_sample, flags);

vector<stsdtable>& get_audio_info(uint8_t*data, unsigned int size,vector<stsdtable>& stable)//stsd

    uint8_t * datapos= data;
    Atom *stsd_audio =(Atom*)data;
    int tmp_size = 16;

    printf("size : %u\n",ntohl(stsd_audio->size));
    printf("num_entr: %u\n",ntohl(stsd_audio->num_of_entries));

    for(int i=0; i< ntohl(stsd_audio->num_of_entries);++i)
        if(tmp_size> size)//注意

            return stable;
        datapos += tmp_size;
        stsdtable * audio_entry = (stsdtable *)(datapos);

        tmp_size +=ntohl(audio_entry->size);

        printf("--tablesize: %d\n",ntohl(audio_entry->size));
        printf("--format : %s\n",audio_entry->format);
        printf("--version : %d\n",ntohs(audio_entry->version));
        printf("--channels: %d\n",ntohs(audio_entry->channels));
        printf("--bitpersam: %d\n",ntohs(audio_entry->bitspersample));
        printf("--IDcompress: %d\n",ntohs(audio_entry->compress_id));    
        printf("--samplerate: %d.%d\n",ntohs(audio_entry->samplerate1),ntohs(audio_entry->samplerate2));

     tmp_size = sizeof(stsdtable);
            tmp_size -= 16;
        datapos += tmp_size;

        if(ntohl(audio_entry->size)> sizeof(stsdtable))
            printf("----atom size:%d\n",get_size(datapos,4));
            printf("----atom name:%c%c%c%c\n",datapos[4],datapos[5],datapos[6],datapos[7]);
                //handle esds

    return stable;
map<unsignedint,sampletable>& get_packet_offset(uint8_t*STBL[],map<unsignedint,sampletable>& table)

    unsigned int num_sam_to_chunk= get_size(STBL[0]-4,4);//stsc

    unsigned int num_sample= get_size(STBL[1]-4,4);//stsz

    unsigned int num_chunk= get_size(STBL[2]-4,4);//stco

    unsigned int chunk_index= 0;
    unsigned int next_chunk_index= 0;
    uint8_t *cur_sam_to_chunk= STBL[0];
    uint8_t *cur_sam_size= STBL[1];
    uint8_t *cur_chunk_offset= STBL[2];
    sampletable sample;
    printf("number of stsc entries:%d \nnumber of sample size:%d \nnumber of chunk offset:%d\n",num_sam_to_chunk,num_sample,num_chunk);
    for(unsignedint i = 0; i< num_sam_to_chunk;++i)//对所有的entries

        chunk_index = get_size(cur_sam_to_chunk,4);
        next_chunk_index = get_size(cur_sam_to_chunk+12,4);
        sample.id_of_sd = get_size(cur_sam_to_chunk+8,4);
        if(i == num_sam_to_chunk -1)//最后一个

            next_chunk_index = num_chunk+1;
        for(unsignedint k=chunk_index; k< next_chunk_index;++k)//当前chunk序号到下一个chunk序号之间的chunk


            printf("chunk_index:%d sample num:%d\n",chunk_index,get_size(cur_sam_to_chunk+4,4));
            unsigned int offset= get_size(cur_chunk_offset+(chunk_index-1)*4,4);
            for(unsignedint j=0; j< get_size(cur_sam_to_chunk+4,4);++j)//chunk内地sample数目


                sample.size = get_size(cur_sam_size,4);    
                printf("--sample offset:%d %x size:%d\n",offset,offset,sample.size);
                offset = offset + sample.size;
                cur_sam_size += 4;
        cur_sam_to_chunk += 12;
    return table;

int seek_audio_atom(uint8_t *data1,unsigned int size1)
    uint8_t tag[]= "mdiaminfsmhd";
    uint8_t *datapos;
    unsigned int tag_size;
    uint8_t *data;
    unsigned int size;
    int offset_of_atom = 0;
    if((offset_of_atom= seek_tag(moov, data1, size1,&data, &size)) == -1)
        return -1;
    if(offset_of_atom+ size >size1)
    { //some handles

        printf("moov atom is not complete,need more data");
    data1 = data;
    size1 = size;
    uint8_t *nexttrak= data;
    unsigned int traksize= size;
    int i=0;
        if(seek_tag(trak, nexttrak, traksize,&datapos, &tag_size) !=-1)
            nexttrak = datapos + tag_size;
            if(size1< (nexttrak - data1))
                return -1;
            traksize = size1 - (nexttrak - data1);
            data = datapos;
            size = tag_size;
            return -1;
            if(seek_tag(tag+i*4, data, size, &datapos,&tag_size) != -1)
                data = datapos;
                size = tag_size;
        if(strncmp("smhd",(char*)(datapos-4),4)== 0)
            if(seek_tag(stbl, data, size,&datapos, &tag_size)!=-1)
                printf("—find audio stbl—!\n");
                data = datapos;
                size = tag_size;

                if(seek_tag(stsd, data, size,&datapos, &tag_size) !=-1)
                    vector<stsdtable> stable;//音频信息

                    get_audio_info(datapos-8, tag_size,stable);

                uint8_t *STBL[3]={NULL,NULL,NULL};//

                uint8_t *datapos1;
                unsigned int tag_size1;//

                if(seek_tag(stsc, data, size,&datapos1, &tag_size1) !=-1)
                    STBL[0]= datapos1 + 8;
                uint8_t *datapos2;
                unsigned int tag_size2;
                if(seek_tag(stsz, data, size,&datapos2, &tag_size2) !=-1)
                    STBL[1]= datapos2 + 12;
                uint8_t *datapos3;
                unsigned int tag_size3;
                if(seek_tag(stco, data, size,&datapos3, &tag_size3) !=-1)
                    STBL[2]= datapos3 + 8;
                if(STBL[0]&& STBL[1]&& STBL[2])
                    map<unsignedint,sampletable> postable;//音频帧信息

            return 0;
    return -1;
int main(chararg, char *argv[])
    FILE *mp4;
    cout<<"please input the file name :"<<endl;
    string filename;
    mp4 = fopen(filename.c_str(),"rb");
    uint8_t buffer[300000];


    return 0;


好的,以下是一个基于FFmpeg的简单的只播放MP4文件的音频解码demo,供您参考: ```c #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <libavformat/avformat.h> #include <libavcodec/avcodec.h> int main(int argc, char **argv) { AVFormatContext *fmt_ctx; AVCodecContext *codec_ctx; AVCodec *codec; AVPacket pkt; AVFrame *frame; int stream_index; int ret; // 打开输入文件 ret = avformat_open_input(&fmt_ctx, argv[1], NULL, NULL); if (ret < 0) { printf("Error: Could not open input file.\n"); return -1; } // 获取流信息 ret = avformat_find_stream_info(fmt_ctx, NULL); if (ret < 0) { printf("Error: Could not find stream information.\n"); return -1; } // 查找音频流 stream_index = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0); if (stream_index < 0) { printf("Error: Could not find audio stream in input file.\n"); return -1; } // 打开音频解码器 codec_ctx = avcodec_alloc_context3(codec); avcodec_parameters_to_context(codec_ctx, fmt_ctx->streams[stream_index]->codecpar); ret = avcodec_open2(codec_ctx, codec, NULL); if (ret < 0) { printf("Error: Could not open audio codec.\n"); return -1; } // 创建音频帧 frame = av_frame_alloc(); if (!frame) { printf("Error: Could not allocate audio frame.\n"); return -1; } // 逐帧读取音频数据并解码 while (av_read_frame(fmt_ctx, &pkt) >= 0) { if (pkt.stream_index == stream_index) { ret = avcodec_send_packet(codec_ctx, &pkt); if (ret < 0) { printf("Error: Could not send audio packet for decoding.\n"); break; } while (ret >= 0) { ret = avcodec_receive_frame(codec_ctx, frame); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { break; } else if (ret < 0) { printf("Error: Could not receive decoded audio frame.\n"); break; } // 处理解码后的音频帧数据,这里可以将其写入到文件中 } } av_packet_unref(&pkt); } // 释放资源 av_frame_free(&frame); avcodec_free_context(&codec_ctx); avformat_close_input(&fmt_ctx); return 0; } ``` 您需要将以上代码保存到一个文件中,例如`audio_decoder.c`,然后使用以下命令编译生成可执行文件: ```sh gcc -o audio_decoder audio_decoder.c -lavformat -lavcodec -lavutil ``` 其中,`-lavformat`、`-lavcodec`、`-lavutil`参数用于链接FFmpeg库。 最后,您可以使用以下命令运行该demo: ```sh ./audio_decoder input.mp4 ``` 其中,`input.mp4`为您想要解码MP4音频文件。




当前余额3.43前往充值 >
领取后你会自动成为博主和红包主的粉丝 规则
钱包余额 0


