1 简介
以RIFF(Resource Interchange File Format)格式为标准的。
PCM 数据:原始音频数据,没有文件格式
WAV数据:存储了PCM或压缩型,数据以小端方式存储
WAVE文件是由若干个Chunk组成的。按照在文件中的出现位置包括:RIFF WAVE Chunk, Format Chunk, Fact Chunk(可选), Data Chunk。具体见下图:
2 格式详解
2.1 RIFF块
字段 | 长度 | 内容 |
---|---|---|
ckID | 4 Bytes | “RIFF” |
cksize | 4 Bytes | 文件大小=cksize +8 |
Type | 4 Bytes | “WAVE” |
2.2 FORMAT块
字段 | 长度 | 内容 |
---|---|---|
ckID | 4 Bytes | 'fmt ’ (这里有个空格) |
cksize | 4 Bytes | 数值为16、18或40,18或40则最后又附加信息,不包含ckID和cksize |
FormatTag | 2 Bytes | 编码方式,一般为0x0001 |
Channels | 2 Bytes | 声道数目,1–单声道;2–双声道 |
SamplesPerSec | 4 Bytes | 采样频率 |
AvgBytesPerSec | 4 Bytes | 每秒数据字节数 |
BlockAlign | 2 Bytes | 数据块对齐(每个采样所需的字节数),BlockAlign=Channels *BitsPerSample/8 |
BitsPerSample | 2 Bytes | 采样位数 |
cbSize | 2 Bytes | 附加信息(可选,通过Size来判断有无,0或22) |
2.3 DATA块
字段 | 长度 | 内容 |
---|---|---|
ckID | 4 Bytes | ‘data’ |
cksize | 4 Bytes | 音频数据的长度 |
Data | N | 音频数据 |
wav文件示例
3 源码分析
参考Android中源码,路径:frameworks/av/media/extractors/wav/WAVExtractor.cpp
基本流程:
(1)sniff函数:判断音频格式是否是wav
(2)init:对文件初步解析,获取format信息
(3)read:读取音频数据单元
3.1 类
class WAVExtractor : public MediaExtractor {
public:
explicit WAVExtractor(DataSourceBase *source);
virtual size_t countTracks();
virtual MediaTrack *getTrack(size_t index);
virtual status_t getTrackMetaData(MetaDataBase& meta, size_t index, uint32_t flags);
virtual status_t getMetaData(MetaDataBase& meta);
virtual const char * name() { return "WAVExtractor"; }
virtual ~WAVExtractor();
private:
DataSourceBase *mDataSource;
status_t mInitCheck;
bool mValidFormat;
uint16_t mWaveFormat;
uint16_t mNumChannels;
uint32_t mChannelMask;
uint32_t mSampleRate;
uint16_t mBitsPerSample;
off64_t mDataOffset;
size_t mDataSize;
MetaDataBase mTrackMeta;
status_t init();
WAVExtractor(const WAVExtractor &);
WAVExtractor &operator=(const WAVExtractor &);
};
} // namespace android
3.2 sniff
对格式进行判断,不满足以下条件,将返回null,判断文件不是WAV格式。
(1)格式信息:读取文件前12个字节,判断是否前4个字节是“RIFF”,header[8]~header[11]是否是“WAVE”。
(2)轨道信息:WAVExtractor构造函数中会调用init()函数,获取轨道信息,通过countTracks计算轨道个数,个数需大于0。
static MediaExtractor::CreatorFunc Sniff(
DataSourceBase *source,
float *confidence,
void **,
MediaExtractor::FreeMetaFunc *) {
char header[12];
if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
return NULL;
}
if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
return NULL;
}
MediaExtractor *extractor = new WAVExtractor(source);
int numTracks = extractor->countTracks();
delete extractor;
if (numTracks == 0) {
return NULL;
}
*confidence = 0.3f;
return CreateExtractor;
}
3.3 WAVExtractor
WAVExtractor调用init()函数
WAVExtractor::WAVExtractor(DataSourceBase *source)
: mDataSource(source),
mValidFormat(false),
mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) {
mInitCheck = init();
}
init函数,对各个字段进行校验
(1)对RIFF块校验
(2)对fmt块校验
(3)如果fmt中各字段有效,将一些信息填入metadata中,包括通道数、采样率、采样位宽、编码方式、duration等
status_t WAVExtractor::init() {
//riff chuck
uint8_t header[12];
if (mDataSource->readAt(
0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
return NO_INIT;
}
if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
return NO_INIT;
}
//以小端方式读取,文件大小
size_t totalSize = U32_LE_AT(&header[4]);
//跳过RIFF块,开始解析后面部分
off64_t offset = 12;
size_t remainingSize = totalSize;
while (remainingSize >= 8) {
uint8_t chunkHeader[8];
if (mDataSource->readAt(offset, chunkHeader, 8) < 8) {
return NO_INIT;
}
remainingSize -= 8;
offset += 8;
uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]);
if (chunkSize > remainingSize) {
return NO_INIT;
}
//fmt chuck解析
if (!memcmp(chunkHeader, "fmt ", 4)) {
if (chunkSize < 16) {
return NO_INIT;
}
uint8_t formatSpec[40];
if (mDataSource->readAt(offset, formatSpec, 2) < 2) {
return NO_INIT;
}
//FormatTag
mWaveFormat = U16_LE_AT(formatSpec);
if (mWaveFormat != WAVE_FORMAT_PCM
&& mWaveFormat != WAVE_FORMAT_IEEE_FLOAT
&& mWaveFormat != WAVE_FORMAT_ALAW
&& mWaveFormat != WAVE_FORMAT_MULAW
&& mWaveFormat != WAVE_FORMAT_MSGSM
&& mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
return ERROR_UNSUPPORTED;
}
uint8_t fmtSize = 16;
if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
fmtSize = 40;
}
if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) {
return NO_INIT;
}
mNumChannels = U16_LE_AT(&formatSpec[2]);
if (mNumChannels < 1 || mNumChannels > 8) {
ALOGE("Unsupported number of channels (%d)", mNumChannels);
return ERROR_UNSUPPORTED;
}
if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
if (mNumChannels != 1 && mNumChannels != 2) {
ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask",
mNumChannels);
}
}
mSampleRate = U32_LE_AT(&formatSpec[4]);
if (mSampleRate == 0) {
return ERROR_MALFORMED;
}
mBitsPerSample = U16_LE_AT(&formatSpec[14]);
if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]);
if (validBitsPerSample != mBitsPerSample) {
if (validBitsPerSample != 0) {
ALOGE("validBits(%d) != bitsPerSample(%d) are not supported",
validBitsPerSample, mBitsPerSample);
return ERROR_UNSUPPORTED;
} else {
// we only support valitBitsPerSample == bitsPerSample but some WAV_EXT
// writers don't correctly set the valid bits value, and leave it at 0.
ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring");
}
}
mChannelMask = U32_LE_AT(&formatSpec[20]);
ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask);
if ((mChannelMask >> 18) != 0) {
ALOGE("invalid channel mask 0x%x", mChannelMask);
return ERROR_MALFORMED;
}
if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER)
&& (popcount(mChannelMask) != mNumChannels)) {
ALOGE("invalid number of channels (%d) in channel mask (0x%x)",
popcount(mChannelMask), mChannelMask);
return ERROR_MALFORMED;
}
// In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain
// the sample format, using the same definitions as a regular WAV header
mWaveFormat = U16_LE_AT(&formatSpec[24]);
if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14) &&
memcmp(&formatSpec[26], AMBISONIC_SUBFORMAT, 14)) {
ALOGE("unsupported GUID");
return ERROR_UNSUPPORTED;
}
}
if (mWaveFormat == WAVE_FORMAT_PCM) {
if (mBitsPerSample != 8 && mBitsPerSample != 16
&& mBitsPerSample != 24 && mBitsPerSample != 32) {
return ERROR_UNSUPPORTED;
}
} else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
if (mBitsPerSample != 32) { // TODO we don't support double
return ERROR_UNSUPPORTED;
}
}
else if (mWaveFormat == WAVE_FORMAT_MSGSM) {
if (mBitsPerSample != 0) {
return ERROR_UNSUPPORTED;
}
} else if (mWaveFormat == WAVE_FORMAT_MULAW || mWaveFormat == WAVE_FORMAT_ALAW) {
if (mBitsPerSample != 8) {
return ERROR_UNSUPPORTED;
}
} else {
return ERROR_UNSUPPORTED;
}
mValidFormat = true;
} else if (!memcmp(chunkHeader, "data", 4)) { //data chuck
if (mValidFormat) {
mDataOffset = offset;
mDataSize = chunkSize;
mTrackMeta.clear();
switch (mWaveFormat) {
case WAVE_FORMAT_PCM:
case WAVE_FORMAT_IEEE_FLOAT:
mTrackMeta.setCString(
kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
break;
case WAVE_FORMAT_ALAW:
mTrackMeta.setCString(
kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW);
break;
case WAVE_FORMAT_MSGSM:
mTrackMeta.setCString(
kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM);
break;
default:
CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW);
mTrackMeta.setCString(
kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW);
break;
}
mTrackMeta.setInt32(kKeyChannelCount, mNumChannels);
mTrackMeta.setInt32(kKeyChannelMask, mChannelMask);
mTrackMeta.setInt32(kKeySampleRate, mSampleRate);
mTrackMeta.setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit);
int64_t durationUs = 0;
if (mWaveFormat == WAVE_FORMAT_MSGSM) {
// 65 bytes decode to 320 8kHz samples
durationUs =
1000000LL * (mDataSize / 65 * 320) / 8000;
} else {
size_t bytesPerSample = mBitsPerSample >> 3;
if (!bytesPerSample || !mNumChannels)
return ERROR_MALFORMED;
size_t num_samples = mDataSize / (mNumChannels * bytesPerSample);
if (!mSampleRate)
return ERROR_MALFORMED;
durationUs =
1000000LL * num_samples / mSampleRate;
}
mTrackMeta.setInt64(kKeyDuration, durationUs);
return OK;
}
}
offset += chunkSize;
}
return NO_INIT;
}
duration计算:
//总样本数 = 文件大小/(通道数*每个样本字节数)
//时长 = 总样本数/采样率
size_t num_samples = mDataSize / (mNumChannels * bytesPerSample);
durationUs = 1000000LL * num_samples / mSampleRate;
对于WAVE_FORMAT_MSGSM格式,即GSM610的duration,进行了特殊处理
// 65 bytes decode to 320 8kHz samples
durationUs = 1000000LL * (mDataSize / 65 * 320) / 8000;