解析wav和mp3文件头以获得采样率和通道数

最新推荐文章于 2024-11-13 20:11:06 发布

Levi2o8

最新推荐文章于 2024-11-13 20:11:06 发布

阅读量1.8k

点赞数 23

文章标签： c语言

本文链接：https://blog.csdn.net/mike2o8/article/details/139258248

版权

解析wav文件头

解析MP3文件头

文章目录

解析wav文件头
解析MP3文件头
前言
一、解析wav文件头
- wav文件头格式
- 解析代码
二、解析MP3文件头
总结

前言

最近在工作中，会涉及到播放mp3文件和wav文件，会想要获得音频文件的采样率和通道数，所以去查了一下mp3文件头和wav文件头的格式，写代码来解析。

一、解析wav文件头

wav文件头格式

typedef struct
{
    uint8_t  ChunkID[4];    // 文档标识。       大写字符串"RIFF",标明该文件为有效的 RIFF 格式文档。
    uint32_t ChunkSize;     // 文件数据长度。    从下一个字段首地址开始到文件末尾的总字节数。该字段的数值加 8 为当前文件的实际长度。
    uint8_t  Format[4];     // 文件格式类型。    所有 WAV 格式的文件此处为字符串"WAVE",标明该文件是 WAV 格式文件。
    uint8_t  FmtChunkID[4]; // 格式块标识。     小写字符串,"fmt "。
    uint32_t FmtChunkSize;  // 格式块长度。     其数值不确定,取决于编码格式。可以是 16、 18 、20、40 等。
    uint16_t AudioFormat;   // 编码格式代码。    常见的 WAV 文件使用 PCM 脉冲编码调制格式,该数值通常为 1。
    uint16_t NumChannels;   // 声道个数。       单声道为 1,立体声或双声道为 2。
    uint32_t SampleRate;    // 采样频率。       每个声道单位时间采样次数。常用的采样频率有 11025, 22050 和 44100 kHz。
    uint32_t ByteRate;      // 数据传输速率。    该数值为:声道数×采样频率×每样本的数据位数/8。播放软件利用此值可以估计缓冲区的大小。
    uint16_t BlockAlign;    // 数据块对齐单位。  采样帧大小。该数值为:声道数×位数/8。播放软件需要一次处理多个该值大小的字节数据,用该数值调整缓冲区。
    uint16_t BitsPerSample; // 采样位数。       存储每个采样值所用的二进制数位数。常见的位数有 4、8、12、16、24、32。
    uint8_t  DataChunkID[4];
    uint32_t DataChunkSize;
}__attribute__((packed, aligned(1))) WaveHeader;

解析代码

static int audio_parse_wav_file(char *filename, int *sample_rate, int *channel_num, int *bit_width)
{
    int ret = 0;
    if(filename == NULL)
    {
        SAMPLE_PRT("wav filename is NULL!\n");
        return -1;
    }

    WavHeader header;
    FILE *file = fopen(filename, "rb");

    // 读取WAV文件头
    fread(&header, sizeof(header), 1, file);

    // 输出解析结果
    printf("%s Number of channels: %d\n", filename, header.numChannels);
    printf("%s Sample rate: %d Hz\n", filename, header.sampleRate);
    printf("%s Bits per sample: %d bits\n", filename, header.bitsPerSample);

    *sample_rate = header.sampleRate;// out
    *channel_num = header.numChannels;// out
    *bit_width   = header.bitsPerSample;// out

    // 关闭文件
    fclose(file);

    return ret;
}

二、解析MP3文件头

mp3文件结构

mp3文件结构大致结构如下：
在这里插入图片描述
注意：并不是所有mp3文件都包含ID3V2部分，有的MP3文件包含ID3V2部分，有的MP3文件不包含ID3V2部分。

ID3V2：

ID3V2 到现在一共有 4 个版本,但流行的播放软件一般只支持第 3 版,既 ID3v2.3。由于
ID3V1 记录在 MP3 文件的末尾,ID3V2 就只好记录在 MP3 文件的首部了。
每个 ID3V2.3 的标签都一个标签头和若干个标签帧或一个扩展标签头组成。关于曲目的信息如标题、作者等都存放在不同的标签帧中,扩展标签头和标签帧并不是必要的,但每个标
签至少要有一个标签帧。标签头和标签帧一起顺序存放在 MP3 文件的首部。

(一)、标签头

在文件的首部顺序记录 10 个字节的 ID3V2.3 的头部。数据结构如下:

typedef struct
{
    char Header[3]; /*必须为"ID3"否则认为标签不存在*/
    char Ver; /*版本号 ID3V2.3 就记录 3*/
    char Revision; /*副版本号此版本记录为 0*/
    char Flag; /*存放标志的字节,这个版本只定义了三位,稍后详细解说*/
    char Size[4]; /*标签大小,包括标签头的 10 个字节和所有的标签帧的大小*/
}__attribute__((packed, aligned(1))) Mp3ID3V2TagHeader;

注:对这里我有疑惑,因为在实际寻找首帧的过程中,我发现有的 mp3 文件的标签大小是不
包含标签头的,但有的又是包含的,可能是某些 mp3 编码器写标签的 BUG,所以为了兼容
只好认为其是包含的,如果按大小找不到,再向后搜索,直到找到首帧为止。

(1).标志字节

标志字节一般为 0,定义如下:
abc00000
a – 表示是否使用 Unsynchronisation(这个单词不知道是什么意思,字典里也没有找到,一般
不设置)
b – 表示是否有扩展头部,一般没有(至少 Winamp 没有记录),所以一般也不设置
c – 表示是否为测试标签(99.99%的标签都不是测试用的啦,所以一般也不设置)

(2).标签大小

一共四个字节,但每个字节只用 7 位,最高位不使用恒为 0。所以格式如下
0xxxxxxx 0xxxxxxx 0xxxxxxx 0xxxxxxx
计算大小时要将 0 去掉,得到一个 28 位的二进制数,就是标签大小(不懂为什么要这样做),
计算公式如
下:

int total_size;
total_size = (Size[0]&0x7F)*0x200000
			+(Size[1]&0x7F)*0x4000
			+(Size[2]&0x7F)*0x80
			+(Size[3]&0x7F)

Frame帧头结构如下：

使用字符 A 到 M 表示不同的区域。在表格中你可以看到每一区域
的详细内容。
AAAAAAAA AAABBCCD EEEEFFGH IIJJKLMM
在这里插入图片描述

帧头定义

typedef struct
{
    unsigned int sync1: 8; //同步信息 1
    unsigned int error_protection: 1; //CRC 校验
    unsigned int layer: 2; //层
    unsigned int version: 2; //版本
    unsigned int sync2: 3; //同步信息 2
    unsigned int extension: 1; //版权
    unsigned int padding: 1; //填充空白字
    unsigned int sample_rate_index: 2; //采样率索引
    unsigned int bit_rate_index: 4; //位率索引
    unsigned int emphasis: 2; //强调方式
    unsigned int original: 1; //原始媒体
    unsigned int copyright: 1; //版权标志
    unsigned int mode_extension: 2; //扩展模式,仅用于联合立体声
    unsigned int channel_mode: 2; //声道模式
}__attribute__((packed, aligned(1))) Mp3FrameHeader;

请注意这里同步信息分成了两个部分,而且其他的位的顺序也和上表列出的有所差别,这个
主要是因为 c 语言在存取数据时总是从低位开始,而这个帧头是需要从高位来读取的。

解析帧头，获得采样率和通道数示例代码

static int audio_parse_mp3_file(char *filename, int *sample_rate, int *channel_num)
{
    int ret = 0;
    if(filename == NULL)
    {
        SAMPLE_PRT("mp3 filename is NULL!\n");
        return -1;
    }

    // ID3V2标签头格式 共10Bytes 可以无
    typedef struct{
        char Header[3]; /*必须为"ID3"否则认为标签不存在*/
        char Ver; /*版本号 ID3V2.3 就记录 3*/
        char Revision; /*副版本号此版本记录为 0*/
        char Flag; /*存放标志的字节,这个版本只定义了三位,稍后详细解说*/
        char Size[4]; /*标签大小,包括标签头的 10 个字节和所有的标签帧的大小*/    
    }Mp3ID3V2TagHeader;

    // Frame帧头格式
    typedef struct{
        unsigned int sync1:8; //同步信息 1
        unsigned int error_protection:1; //CRC 校验
        unsigned int layer:2; //层
        unsigned int version:2; //版本
        unsigned int sync2:3; //同步信息 2
        unsigned int extension:1; //版权
        unsigned int padding:1; //填充空白字
        unsigned int sample_rate_index:2; //采样率索引
        unsigned int bit_rate_index:4; //位率索引
        unsigned int emphasis:2; //强调方式
        unsigned int original:1; //原始媒体
        unsigned int copyright:1; //版权标志
        unsigned int mode_extension:2; //扩展模式,仅用于联合立体声
        unsigned int channel_mode:2; //声道模式
    }Mp3FrmHeader;

    typedef enum {
        MPEG_2_5 = 0,
        MPEG_RESERVED = 1,
        MPEG_2   = 2,
        MPEG_1   = 3,
        MPEG_VER_BUTT
    } MPEG_VERSION_TYPE_E;

    typedef enum {
        LAYER_RESERVED = 0,
        LAYER_3  = 1,
        LAYER_2  = 2,
        LAYER_1  = 3,
        LAYER_BUTT
    } MPEG_LAYER_TYPE_E;
    
    Mp3ID3V2TagHeader mp3_id3v2_tag_header;
    Mp3FrmHeader mp3_frm_header;

    FILE *file = fopen(filename, "rb");

    // 读取ID3V2标签头
    fread(&mp3_id3v2_tag_header, sizeof(mp3_id3v2_tag_header), 1, file);
    printf("mp3_id3v2_tag_header.Header = %c%c%c\n", mp3_id3v2_tag_header.Header[0], mp3_id3v2_tag_header.Header[1], mp3_id3v2_tag_header.Header[2]);

    if(mp3_id3v2_tag_header.Header[0] == 'I' && 
        mp3_id3v2_tag_header.Header[1] == 'D' && 
        mp3_id3v2_tag_header.Header[2] == '3')//包含ID3V2标签，MP3文件开头是ID3V2标签
    {
        //包含ID3V2标签

        //找到帧头
        int tag_total_size;
        tag_total_size = (mp3_id3v2_tag_header.Size[0]&0x7F)*0x200000
                        +(mp3_id3v2_tag_header.Size[1]&0x7F)*0x4000
                        +(mp3_id3v2_tag_header.Size[2]&0x7F)*0x80
                        +(mp3_id3v2_tag_header.Size[3]&0x7F);
        printf("tag_total_size = 0x%x\n", tag_total_size);
        tag_total_size -= 10;//减去tag自己的长度（有的size会包含tag头的10字节，有的不包含，这里可以一一尝试）
        //假设tag长度包含10字节的tag头
        //移动文件指针到当前位置后的tag_total_size字节处，看是否是Frame帧头
        fseek(file, tag_total_size, SEEK_CUR);
        // A.假设此时是MP3帧头，读取MP3 Frame帧头
        fread(&mp3_frm_header, sizeof(mp3_frm_header), 1, file);
        
        if(mp3_frm_header.sync1 == 0xFF && mp3_frm_header.sync2 == 0x7)
        {
            //找到了帧头帧同步，确实是帧头，继续往下解析
            //1. get sample_rate
            if(mp3_frm_header.version == MPEG_1)
            {
                if(mp3_frm_header.sample_rate_index == 0)
                {
                    *sample_rate = 44100;// out
                }
                else if(mp3_frm_header.sample_rate_index == 1)
                {
                    *sample_rate = 48000;// out
                }
                else if(mp3_frm_header.sample_rate_index == 2)
                {
                    *sample_rate = 32000;// out
                }
                else
                {
                    SAMPLE_PRT("Unsupported mp3_frm_header.sample_rate_index\n");
                }
            }
            else if(mp3_frm_header.version == MPEG_2)
            {
                if(mp3_frm_header.sample_rate_index == 0)
                {
                    *sample_rate = 22050;// out
                }
                else if(mp3_frm_header.sample_rate_index == 1)
                {
                    *sample_rate = 24000;// out
                }
                else if(mp3_frm_header.sample_rate_index == 2)
                {
                    *sample_rate = 16000;// out
                }
                else
                {
                    SAMPLE_PRT("Unsupported mp3_frm_header.sample_rate_index\n");
                }
            }
            else if(mp3_frm_header.version == MPEG_2_5)
            {
                if(mp3_frm_header.sample_rate_index == 0)
                {
                    *sample_rate = 11025;// out
                }
                else if(mp3_frm_header.sample_rate_index == 1)
                {
                    *sample_rate = 12000;// out
                }
                else if(mp3_frm_header.sample_rate_index == 2)
                {
                    *sample_rate = 8000;// out
                }
                else
                {
                    SAMPLE_PRT("Unsupported mp3_frm_header.sample_rate_index\n");
                }
            }
            
            printf("%s Sample rate: %d Hz\n", filename, *sample_rate);

            //2. get channel_num
            if(mp3_frm_header.channel_mode == 0)//立体声
            {
                *channel_num = 2;// out
            }
            else if(mp3_frm_header.channel_mode == 1)//联合立体声（立体声）
            {
                *channel_num = 2;// out
            }
            else if(mp3_frm_header.channel_mode == 2)//双声道（立体声）
            {
                *channel_num = 2;// out
            }
            else if(mp3_frm_header.channel_mode == 3)//单声道（单声）
            {
                *channel_num = 1;// out
            }
            else
            {
                SAMPLE_PRT("Unsupported mp3_frm_header.channel_mode\n");
            }

            printf("%s Number of channels: %d\n", filename, *channel_num);
        }
        else //没有找到了帧头帧同步，不是帧头，重新计算偏移tag_total_size，重新解析
        {
            // 将文件位置指针移动到文件的开头位置
            fseek(file, 0, SEEK_SET);
            // 读取ID3V2标签头
            fread(&mp3_id3v2_tag_header, sizeof(mp3_id3v2_tag_header), 1, file);
            printf("mp3_id3v2_tag_header.Header = %c%c%c\n", mp3_id3v2_tag_header.Header[0], mp3_id3v2_tag_header.Header[1], mp3_id3v2_tag_header.Header[2]);

            if(mp3_id3v2_tag_header.Header[0] == 'I' && 
                mp3_id3v2_tag_header.Header[1] == 'D' && 
                mp3_id3v2_tag_header.Header[2] == '3')//包含ID3V2标签，MP3文件开头是ID3V2标签
            {
                //包含ID3V2标签

                //找到帧头
                int tag_total_size;
                tag_total_size = (mp3_id3v2_tag_header.Size[0]&0x7F)*0x200000
                                +(mp3_id3v2_tag_header.Size[1]&0x7F)*0x4000
                                +(mp3_id3v2_tag_header.Size[2]&0x7F)*0x80
                                +(mp3_id3v2_tag_header.Size[3]&0x7F);
                
                // tag_total_size -= 10;//现在不减去tag自己的长度（这里size不包含tag头的10字节）
                //tag长度不包含10字节的tag头
                //移动文件指针到当前位置后的tag_total_size字节处，看是否是Frame帧头
                fseek(file, tag_total_size, SEEK_CUR);
                // A.假设此时是MP3帧头，读取MP3 Frame帧头
                fread(&mp3_frm_header, sizeof(mp3_frm_header), 1, file);
                
                if(mp3_frm_header.sync1 == 0xFF && mp3_frm_header.sync2 == 0x7)
                {
                    //找到了帧头帧同步，确实是帧头，继续往下解析
                    //1. get sample_rate
                    if(mp3_frm_header.version == MPEG_1)
                    {
                        if(mp3_frm_header.sample_rate_index == 0)
                        {
                            *sample_rate = 44100;// out
                        }
                        else if(mp3_frm_header.sample_rate_index == 1)
                        {
                            *sample_rate = 48000;// out
                        }
                        else if(mp3_frm_header.sample_rate_index == 2)
                        {
                            *sample_rate = 32000;// out
                        }
                        else
                        {
                            SAMPLE_PRT("Unsupported mp3_frm_header.sample_rate_index\n");
                        }
                    }
                    else if(mp3_frm_header.version == MPEG_2)
                    {
                        if(mp3_frm_header.sample_rate_index == 0)
                        {
                            *sample_rate = 22050;// out
                        }
                        else if(mp3_frm_header.sample_rate_index == 1)
                        {
                            *sample_rate = 24000;// out
                        }
                        else if(mp3_frm_header.sample_rate_index == 2)
                        {
                            *sample_rate = 16000;// out
                        }
                        else
                        {
                            SAMPLE_PRT("Unsupported mp3_frm_header.sample_rate_index\n");
                        }
                    }
                    else if(mp3_frm_header.version == MPEG_2_5)
                    {
                        if(mp3_frm_header.sample_rate_index == 0)
                        {
                            *sample_rate = 11025;// out
                        }
                        else if(mp3_frm_header.sample_rate_index == 1)
                        {
                            *sample_rate = 12000;// out
                        }
                        else if(mp3_frm_header.sample_rate_index == 2)
                        {
                            *sample_rate = 8000;// out
                        }
                        else
                        {
                            SAMPLE_PRT("Unsupported mp3_frm_header.sample_rate_index\n");
                        }
                    }
                    
                    printf("%s Sample rate: %d Hz\n", filename, *sample_rate);

                    //2. get channel_num
                    if(mp3_frm_header.channel_mode == 0)//立体声
                    {
                        *channel_num = 2;// out
                    }
                    else if(mp3_frm_header.channel_mode == 1)//联合立体声（立体声）
                    {
                        *channel_num = 2;// out
                    }
                    else if(mp3_frm_header.channel_mode == 2)//双声道（立体声）
                    {
                        *channel_num = 2;// out
                    }
                    else if(mp3_frm_header.channel_mode == 3)//单声道（单声）
                    {
                        *channel_num = 1;// out
                    }
                    else
                    {
                        SAMPLE_PRT("Unsupported mp3_frm_header.channel_mode\n");
                    }

                    printf("%s Number of channels: %d\n", filename, *channel_num);
                }
                else
                {
                    SAMPLE_PRT("found mp3 ID3V2 tag header, but can not find mp3 frame header!");
                }
            }

        }
    }
    else//不包含ID3V2标签，MP3文件开头就是Frame帧头
    {
        // 将文件位置指针移动到文件的开头位置
        fseek(file, 0, SEEK_SET);
        // 此时因是MP3帧头，读取MP3 Frame帧头
        fread(&mp3_frm_header, sizeof(mp3_frm_header), 1, file);
        
        if(mp3_frm_header.sync1 == 0xFF && mp3_frm_header.sync2 == 0x7)
        {
            //找到了帧头帧同步，确实是帧头，继续往下解析
            //1. get sample_rate
            if(mp3_frm_header.version == MPEG_1)
            {
                if(mp3_frm_header.sample_rate_index == 0)
                {
                    *sample_rate = 44100;// out
                }
                else if(mp3_frm_header.sample_rate_index == 1)
                {
                    *sample_rate = 48000;// out
                }
                else if(mp3_frm_header.sample_rate_index == 2)
                {
                    *sample_rate = 32000;// out
                }
                else
                {
                    SAMPLE_PRT("Unsupported mp3_frm_header.sample_rate_index\n");
                }
            }
            else if(mp3_frm_header.version == MPEG_2)
            {
                if(mp3_frm_header.sample_rate_index == 0)
                {
                    *sample_rate = 22050;// out
                }
                else if(mp3_frm_header.sample_rate_index == 1)
                {
                    *sample_rate = 24000;// out
                }
                else if(mp3_frm_header.sample_rate_index == 2)
                {
                    *sample_rate = 16000;// out
                }
                else
                {
                    SAMPLE_PRT("Unsupported mp3_frm_header.sample_rate_index\n");
                }
            }
            else if(mp3_frm_header.version == MPEG_2_5)
            {
                if(mp3_frm_header.sample_rate_index == 0)
                {
                    *sample_rate = 11025;// out
                }
                else if(mp3_frm_header.sample_rate_index == 1)
                {
                    *sample_rate = 12000;// out
                }
                else if(mp3_frm_header.sample_rate_index == 2)
                {
                    *sample_rate = 8000;// out
                }
                else
                {
                    SAMPLE_PRT("Unsupported mp3_frm_header.sample_rate_index\n");
                }
            }
            
            printf("%s Sample rate: %d Hz\n", filename, *sample_rate);

            //2. get channel_num
            if(mp3_frm_header.channel_mode == 0)//立体声
            {
                *channel_num = 2;// out
            }
            else if(mp3_frm_header.channel_mode == 1)//联合立体声（立体声）
            {
                *channel_num = 2;// out
            }
            else if(mp3_frm_header.channel_mode == 2)//双声道（立体声）
            {
                *channel_num = 2;// out
            }
            else if(mp3_frm_header.channel_mode == 3)//单声道（单声）
            {
                *channel_num = 1;// out
            }
            else
            {
                SAMPLE_PRT("Unsupported mp3_frm_header.channel_mode\n");
            }

            printf("%s Number of channels: %d\n", filename, *channel_num);
        }
    }

    // 关闭文件
    fclose(file);
    return ret;
}