1. wave文件
wave文件采用RIFF文件,所以wave遵循RIFF文件结构。
1.1 RIFF文件
先简单介绍下什么是RIFF文件,RIFF是Microsoft提出的一种多媒体文件的存储方式,不同编码的音频、视频文件,可以按照它定义的存储规则保存、记录各自不同的数据,如:数据内容、采集信息、显示尺寸、编码方式等。
在RIFF的文件存储规范中,主要有几个重要的概念需要理解,它们是FOURCC、CHUNK、LIST。RIFF格式是一种树状的结构,其基本组成单元为LIST和CHUNK。
1.2 wave结构
wave文件是由多个chunk嵌套组成的,wave文件就是一个chunk1,结构如下:
chunk1结构如下
字段 | 长度 | 字节序 | 说明 |
---|---|---|---|
chunkId | 4bytes | 大端 | 第一个chunk的标识始终是"RIFF" |
chunkSize | 4bytes | 小端 | data + chunkType的长度,注意不包含chunkId和chunkSize |
chunkType | 4bytes | 大端 | 对于Wave文件chunk的类型为"WAVE" |
data | 4bytes | 无 | 包含chunk2和chunk3 |
chunk2结构如下
字段 | 长度 | 字节序 | 说明 |
---|---|---|---|
chunkId | 4bytes | 大端 | wave文件的第二个chunk类型为"WAVE" |
chunkSize | 4bytes | 小端 | chunk的数据大小,不包含该chunk的chunkSize和chunkId |
wFormatTag | 4byes | 小端 | 音频数据格式,0x0001表示PCM数据 |
nChannels | 2bytes | 小端 | 声道数 |
nSamplesPerSec | 4bytes | 小端 | 采样率,每秒采样次数 |
nAvgBytesPerSec | 4bytes | 小端 | 每秒的音频数据大小(B),声道数x采样率x每个采样点的比特数/8 |
nBlockAlign | 2bytes | 小端 | 每个时间点的音频数据块大小(B),声道数x每个采样点的比特数/8 |
wBitsPerSample | 2bytes | 小端 | 采样深度,每个采样点用多少比特编码(8/16) |
chunk3结构如下
字段 | 长度 | 字节序 | 说明 |
---|---|---|---|
chunkId | 4bytes | 大端 | Wave文件的第三个chunk标识为"data" |
chunkSize | 4bytes | 小端 | chunk的数据大小,不包含该chunk的chunkSize和chunkId |
data | chunkSize | 无 | PCM数据部分 |
2. wave文件读写
wave读写类定义
typedef struct waveinfo_{
uint16_t audioformat;
uint16_t numchannels;
uint32_t sample_rate;
uint32_t byte_rate;
uint16_t blockalign;
uint16_t bits_persample;
char *data;
uint64_t size;
}waveinfo;
class wavefile{
public:
wavefile(string path){
path_ = path;
buffer_ = NULL;
fd_ = -1;
}
~wavefile(){
if(buffer_){
char *data = buffer_->data();
delete [] data;
delete buffer_;
}
if(fd_ != -1){
close(fd_);
}
}
//打开wavefile
int open();
//解析wavefile文件
int read();
//写wavefile文件
int write(waveinfo &info);
private:
Buffer *buffer_;
string path_;
int fd_;
private:
uint32_t chunk_size;
uint32_t sub_chunk1_size;
uint16_t audioformat;
uint16_t numchannels;
uint32_t sample_rate;
uint32_t byte_rate;
uint16_t blockalign;
uint16_t bits_persample;
uint32_t sub_chunk2_size;
char *data_buffer;
uint64_t data_len;
};
2.1 读wave文件
int wavefile::open(){
size_t ret = 0;
size_t count = 1024;
char read_buf[count+1];
size_t raw_buffer_size = 0;
char *raw_buffer = NULL;
// 1. 打开文件,返回文件句柄
if((fd_ = ::open(path_.c_str(), O_RDONLY)) < 0){
cout<<"open error!"<<endl;
return -1;
}
// 2. 分配buffer用于读wave文件
raw_buffer_size = ::lseek(fd_, 0, SEEK_END);
raw_buffer = new char[raw_buffer_size];
SrsBuffer buffer(raw_buffer, raw_buffer_size);
buffer_ = buffer.copy();
cout<<"raw_buffer_size:"<<raw_buffer_size<<endl;
// 3. 读wave文件
::lseek(fd_, 0, SEEK_SET);
while(1){
ret = ::read(fd_, read_buf, count);
if(ret == -1){
cout<<"read error!"<<endl;
close(fd_);
return -1;
}else if(ret == 0){
break;
}
buffer_->write_bytes(read_buf, ret);
}
buffer_->reset();
return 0;
}
int wavefile::read(){
string chunk_id;
string format;
string sub_chunk1_id;
string sub_chunk2_id;
size_t list_chunk_size = 0;
// 1. 读取chunk1数据
chunk_id = buffer_->read_string(4);
if(chunk_id[0] != 'R' || chunk_id[1] != 'I' \
|| chunk_id[2] != 'F' || chunk_id[3] != 'F'){
cout<<"chunk id != RIFF"<<endl;
return -1;
}
chunk_size = buffer_->read_le4bytes();
if(!buffer_->require(chunk_size)){
cout<<"no engouth require:"<<chunk_size \
<<" actually require:"<<buffer_->left()<<endl;
return -1;
}
format = buffer_->read_string(4);
if(format[0] != 'W' || format[1] != 'A' \
|| format[2] != 'V' || format[3] != 'E'){
cout<<"format != WAVE"<<endl;
return -1;
}
// 2. 读取chunk2数据
sub_chunk1_id = buffer_->read_string(4);
if(sub_chunk1_id[0] != 'f' || sub_chunk1_id[1] != 'm' \
|| sub_chunk1_id[2] != 't'){
cout<<"sub chunk1 id != fmt"<<endl;
return -1;
}
sub_chunk1_size = buffer_->read_le4bytes();
audioformat = buffer_->read_le2bytes();
numchannels = buffer_->read_le2bytes();
sample_rate = buffer_->read_le4bytes();
byte_rate = buffer_->read_le4bytes();
blockalign = buffer_->read_le2bytes();
bits_persample = buffer_->read_le2bytes();
if(sub_chunk1_size - 16 > 0){
buffer_->read_string(sub_chunk1_size - 16);
}
// 3. 读取chunk3数据
sub_chunk2_id = buffer_->read_string(4);
if(sub_chunk2_id[0] == 'L' && sub_chunk2_id[1] == 'I' \
&& sub_chunk2_id[2] == 'S' && sub_chunk2_id[3] == 'T'){
list_chunk_size = buffer_->read_le4bytes();
buffer_->read_string(list_chunk_size);
sub_chunk2_id = buffer_->read_string(4);
}
if(sub_chunk2_id[0] != 'd' || sub_chunk2_id[1] != 'a' \
|| sub_chunk2_id[2] != 't' || sub_chunk2_id[3] != 'a'){
cout<<"sub chunk2 id != data"<<endl;
return -1;
}
sub_chunk2_size = buffer_->read_le4bytes();
if(chunk_size < (4 + (8 + sub_chunk1_size) + (8 + sub_chunk2_size))){
cout<<"wav format error chunksize:"<<chunk_size \
<<" sub_chunk1_size:"<<sub_chunk1_size \
<<" list_chunk_size:"<<list_chunk_size \
<<" sub_chunk2_size:"<<sub_chunk2_size \
<<endl;
return -1;
}
cout<<"audioformat:"<<audioformat<<"\r\n" \
<<"numchannels:"<<numchannels<<"\r\n" \
<<"sample_rate:"<<sample_rate<<"\r\n" \
<<"byte_rate:"<<byte_rate<<"\r\n" \
<<"blockalign:"<<blockalign<<"\r\n" \
<<"bits_persample:"<<bits_persample<<"\r\n" \
<<endl;
// read data
data_buffer = buffer_->head();
data_len = sub_chunk2_size;
return 0;
}
2.2 写wave文件
#pragma pack (1)
typedef struct{
char ChunkID[4];//内容为"RIFF"
unsigned int ChunkSize;//存储文件的字节数(不包含ChunkID和ChunkSize这8个字节)
char Format[4];//内容为"WAVE"
}WAVE_HEADER;
typedef struct{
char Subchunk1ID[4];//内容为"fmt"
unsigned int Subchunk1Size;//存储该子块的字节数(不含前面的Subchunk1ID和Subchunk1Size这8个字节)
unsigned short AudioFormat;//存储音频文件的编码格式,例如若为PCM则其存储值为1,若为其他非PCM格式的则有一定的压缩。
unsigned short NumChannels;//通道数,单通道(Mono)值为1,双通道(Stereo)值为2,等等
unsigned int SampleRate;//采样率,如8k,44.1k等
unsigned int ByteRate;//每秒存储的bit数,其值=SampleRate * NumChannels * BitsPerSample/8
unsigned short BlockAlign;//块对齐大小,其值=NumChannels * BitsPerSample/8
unsigned short BitsPerSample;//每个采样点的bit数,一般为8,16,32等。
}WAVE_FMT;
typedef struct{
char Subchunk2ID[4];//内容为“data”
unsigned int Subchunk2Size;//内容为接下来的正式的数据部分的字节数,其值=NumSamples * NumChannels * BitsPerSample/8
}WAVE_DATA;
#pragma pack ()
int write(waveinfo &info){
WAVE_HEADER wav_header;
WAVE_FMT wav_format;
WAVE_DATA wav_data;
wav_format.AudioFormat = channel.audioformat;
wav_format.NumChannels = channel.numchannels;
wav_format.SampleRate = channel.sample_rate;
wav_format.ByteRate = channel.byte_rate;
wav_format.BlockAlign = channel.blockalign;
wav_format.BitsPerSample = channel.bits_persample;
wav_format.Subchunk1Size = 16;
memcpy(wav_format.Subchunk1ID, "fmt ", 4);
memcpy(wav_data.Subchunk2ID, "data", 4);
wav_data.Subchunk2Size = channel.size;
memcpy(wav_header.ChunkID, "RIFF", 4);
memcpy(wav_header.Format, "WAVE", 4);
wav_header.ChunkSize = 36 + channel.size;
// 1. 创建新的wave文件
if((fd_ = ::open(path_.c_str(), O_WRONLY | O_CREAT)) < 0){
cout<<"open error!"<<endl;
return -1;
}
cout<<"wav_header size:"<<sizeof(wav_header)<<"\r\n";
cout<<"wav_format size:"<<sizeof(wav_format)<<"\r\n";
cout<<"wav_data size:"<<sizeof(wav_data)<<"\r\n";
// 2. 将wave信息写入到wave文件
::write(fd_, &wav_header, sizeof(wav_header));
::write(fd_, &wav_format, sizeof(wav_format));
::write(fd_, &wav_data, sizeof(wav_data));
::write(fd_, channel.data, channel.size);
::close(fd_);
}