wav文件格式解析
概述
wav文件支持多种不同的比特率、采样率、多声道音频。
wav是一种RIFF(resource interchange file format 资源互换文件格式,以chunk为单位组织文件)格式文件。在windows上,大部分多媒体文件都是RIFF文件。
wav文件由RIFF chunk构成,fmt /data chunk是其必须的两个子chunk,除此以外还可以有fact等可选chunk,我们在做parse的时候,主要是分析fmt chunk的数据。
wav标准结构
1.wav文件都是由chunk组成,chunk的格式如下:
size | 内容 | 解释 |
---|---|---|
4 bytes | ID | 如“RIFF” |
4 bytes | chunk size N | 如标准fmt chunk为16字节 |
N bytes | data | chunk的内容 |
2.只有“fmt ”、“data”两个子chunk的wav文件格式:
size | 内容 | 解释 |
---|---|---|
4 bytes | RIFF | 文件类型格式 |
4 bytes | filesize | 其值为filesize字段后的大小,真正的文件大小为filesize + 8 bytes |
4 bytes | WAVE | 文件格式 |
4 bytes | "fmt " | chunk id,fmt即format,这个chunk保存了音频的采样率、声道数、采样位数等关键信息 |
4 bytes | fmt chunk size | fmt chunk的大小,一般有16/18/20/22/40字节(也有超过40字节的情况,如果不知道后面部分的含义,直接跳过即可),超过16字节部分为扩展块 |
2 bytes | AudioFormat | 编码格式代码,其值见 《常见编码格式》 表 |
2 bytes | Num channels | 声道数,一般有1-8 |
4 bytes | Sample Rate | 采样率,8/11.025/12/16/22.05/24/32/44.1/48/64/88.2/96/176.4/192 kHZ |
4 bytes | Byte Rate | 传输速率,每秒的字节数,计算公式为:采样率*channels*采样位数/8 |
2 bytes | Block Align | 块对齐,告知播放软件一次性需处理多少字节,公式为: 采样位数*声道数/8 |
2 bytes | Bits Per Sample | 采样位数,一般有8/16/24/32/64,值越大,对声音的还原度越高 |
4 bytes | “data” | data chunk id |
4 bytes | Data Size N | 原始音频数据的大小 |
N bytes | Data | 原始音频数据,也是我们最终想获得的部分,数据保存方式见 pcm存储 表 |
3.fmt 扩展块
fmt chunk的大小一般为16/18/20/22/40字节,当其大小大于16字节时,多出的内容都保存在扩展块中。
size | 内容 | 解释 |
---|---|---|
2bytes | cbsize | 扩展块长度,为0时,fmt chunk size为18;为2时,fmt chunk size为20;为22时,fmt chunk size为40 |
2bytes | 有效采样位数 | 最大值为采样字节数*8 |
4bytes | 声道掩码 | 声道号与扬声器位置的映射掩码 |
2bytes | codec | 真正的编码格式代码(相对于extension 的fmt codec为0xFFFE而言) |
14bytes | unknown | 还不知道其含义,在mediainfo中与2字节的codec一起组成subformat |
示例
以下代码为解析wave格式文件的示例,其功能为将wave文件中的数据部分取出并输出为文件,同时打印相关信息。
output文件可命名为xxx.pcm。
命令行使用:xxx.exe [input] [output]
wav_type.h
/*****************************************************
File Name : wav_type.h
Date : 2020/10/27
Author : l.hua
Descriptions: all the declarations used by wav_to_pcm.c
Change log :
Date Mender Cause
******************************************************/
#ifndef _WAV_TYPE_H_
#define _WAV_TYPE_H_
extern "C"
{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <windows.h>
#include <direct.h>
#include <stdint.h>
}
#define DECODE_FAIL -1
#define COPY_FAIL -2
#define READ_BUF_SIZE (1024*1024*4)
#define WAVE_FORMAT_UNKNOWN 0x0000
#define WAVE_FORMAT_UNKNOWN_DES "WAVE_FORMAT_UNKNOWN"
#define WAVE_FORMAT_PCM 0x0001
#define WAVE_FORMAT_PCM_DES "WAVE_FORMAT_PCM"
#define WAVE_FORMAT_ADPCM 0x0002
#define WAVE_FORMAT_ADPCM_DES "WAVE_FORMAT_ADPCM"
#define WAVE_FORMAT_IEEE_FLOAT 0x0003
#define WAVE_FORMAT_IEEE_FLOAT_DES "WAVE_FORMAT_IEEE_FLOAT"
#define WAVE_FORMAT_ALAW 0x0006
#define WAVE_FORMAT_ALAW_DES "WAVE_FORMAT_ALAW"
#define WAVE_FORMAT_MULAW 0x0007
#define WAVE_FORMAT_MULAW_DES "WAVE_FORMAT_MULAW"
#define WAVE_FORMAT_DTS_MS 0x0008
#define WAVE_FORMAT_DTS_MS_DES "WAVE_FORMAT_DTS_MS"
#define WAVE_FORMAT_WMAS 0x000A
#define WAVE_FORMAT_WMAS_DES "WAVE_FORMAT_WMAS"
#define WAVE_FORMAT_IMA_ADPCM 0x0011
#define WAVE_FORMAT_IMA_ADPCM_DES "WAVE_FORMAT_IMA_ADPCM"
#define WAVE_FORMAT_TRUESPEECH 0x0022
#define WAVE_FORMAT_TRUESPEECH_DES "WAVE_FORMAT_TRUESPEECH"
#define WAVE_FORMAT_GSM610 0x0031
#define WAVE_FORMAT_GSM610_DES "WAVE_FORMAT_GSM610"
#define WAVE_FORMAT_MSNAUDIO 0x0032
#define WAVE_FORMAT_MSNAUDIO_DES "WAVE_FORMAT_MSNAUDIO"
#define WAVE_FORMAT_G726 0x0045
#define WAVE_FORMAT_G726_DES "WAVE_FORMAT_G726"
#define WAVE_FORMAT_MPEG 0x0050
#define WAVE_FORMAT_MPEG_DES "WAVE_FORMAT_MPEG"
#define WAVE_FORMAT_MPEGLAYER3 0x0055
#define WAVE_FORMAT_MPEGLAYER3_DES "WAVE_FORMAT_MPEGLAYER3"
#define WAVE_FORMAT_UNKNOWN2 0x0070
#define WAVE_FORMAT_UNKNOWN2_DES "WAVE_FORMAT_UNKNOWN2"
#define WAVE_FORMAT_UNKNOWN3 0x0072
#define WAVE_FORMAT_UNKNOWN3_DES "WAVE_FORMAT_UNKNOWN3"
#define WAVE_FORMAT_DOLBY_AC3_SPDIF 0x0092
#define WAVE_FORMAT_DOLBY_AC3_SPDIF_DES "WAVE_FORMAT_DOLBY_AC3_SPDIF"
#define WAVE_FORMAT_A52 0x2000
#define WAVE_FORMAT_A52_DES "WAVE_FORMAT_A52"
#define WAVE_FORMAT_DTS 0x2001
#define WAVE_FORMAT_DTS_DES "WAVE_FORMAT_DTS"
#define WAVE_FORMAT_WMA1 0x0160
#define WAVE_FORMAT_WMA1_DES "WAVE_FORMAT_WMA1"
#define WAVE_FORMAT_WMA2 0x0161
#define WAVE_FORMAT_WMA2_DES "WAVE_FORMAT_WMA2"
#define WAVE_FORMAT_WMAP 0x0162
#define WAVE_FORMAT_WMAP_DES "WAVE_FORMAT_WMAP"
#define WAVE_FORMAT_WMAL 0x0163
#define WAVE_FORMAT_WMAL_DES "WAVE_FORMAT_WMAL"
#define WAVE_FORMAT_DIVIO_AAC 0x4143
#define WAVE_FORMAT_DIVIO_AAC_DES "WAVE_FORMAT_DIVIO_AAC"
#define WAVE_FORMAT_AAC 0x00FF
#define WAVE_FORMAT_AAC_DES "WAVE_FORMAT_AAC"
#define WAVE_FORMAT_FFMPEG_AAC 0x7060
#define WAVE_FORMAT_FFMPEG_AAC_DES "WAVE_FORMAT_FFMPEG_AAC"
#define WAVE_FORMAT_EXTENSIBLE 0xFFFE
#define WAVE_FORMAT_EXTENSIBLE_DES "WAVE_FORMAT_EXTENSIBLE"
/******************************
STRUCTURE DECLARATIONS
******************************/
typedef struct {
uint16_t u16FormatTag;
uint16_t u16Channels;
uint32_t u32SamplesPerSec;
uint32_t u32BytePerSec;
uint16_t u16BlockAlign;
uint16_t u16BitsPerSample;
}WavFmtCommTex_S;
typedef struct {
WavFmtCommTex_S sFmtComm;
uint16_t u16ExtraSize;
uint16_t u16EffectiveBitsPerSample;
uint32_t u32Loudspeaker;
uint16_t u16RealCodecTag;
uint8_t u8Occupy[24];
}WavFmtTex_S;
typedef struct {
char cRiff[4];
uint32_t lFileSize;
char cWave[4];
char cFmtChunk[4];
uint32_t lChunkLen;
}WavComTex_S;
typedef struct {
WavComTex_S sComm;
WavFmtTex_S sFmt;
}WavHeaderTex_S;
/******************************
FUNCTION DECLARATIONS
******************************/
int WavDecode(FILE* FSrcFile, WavHeaderTex_