一直以来都想把自己的代码打磨成工业级别的,但是奈何实力有限,憋了两天,还是这个一个烂烂的代码。还是又很多硬编码的地方,而且没经过充分测试。
马上就放假回家了,想着一定要有一个结尾,不能把事情拖到寒假或者明年,就先把这个烂代码放上。算是一个总结。
主要实现了从mp3中提取一段时长的pcm数据和wav头。这个需求是在做基于内容的音乐检索的时候遇到的。上代码。
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <stdint.h>
#include <malloc.h>
#include <libavutil/avutil.h>
#include <libavutil/attributes.h>
#include <libavutil/channel_layout.h>
#include <libavutil/common.h>
#include <libavutil/file.h>
#include <libavutil/imgutils.h>
#include <libavutil/mathematics.h>
#include <libavutil/opt.h>
#include <libavutil/samplefmt.h>
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libavcodec/avcodec.h>
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
#include "AudioFormatUtils.h"
//#define LOG_TAG "GETSTREAM"
//#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG,LOG_TAG,__VA_ARGS__) // 定义LOGD类型
//#define LOGI(...) __android_log_print(ANDROID_LOG_INFO,LOG_TAG,__VA_ARGS__) // 定义LOGI类型
//#define LOGW(...) __android_log_print(ANDROID_LOG_WARN,LOG_TAG,__VA_ARGS__) // 定义LOGW类型
//#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR,LOG_TAG,__VA_ARGS__) // 定义LOGE类型
//#define LOGF(...) __android_log_print(ANDROID_LOG_FATAL,LOG_TAG,__VA_ARGS__) // 定义LOGF类型
#define LOG_TAG stderr
#define LOGD(...) fprintf(LOG_TAG, __VA_ARGS__)
#define LOGI(...) fprintf(LOG_TAG, __VA_ARGS__)
#define LOGW(...) fprintf(LOG_TAG, __VA_ARGS__)
#define LOGE(...) fprintf(LOG_TAG, __VA_ARGS__)
#define LOGF(...) fprintf(LOG_TAG, __VA_ARGS__)
uint8_t *resdatas = NULL;
unsigned char header[44];
int get_wav_header(unsigned char header[], WavHeader *wavHeader) {
//unsigned char header[44];
unsigned char* p = header;
unsigned int bytes = (wavHeader->bits_per_sample + 7) / 8;
float data_size = (float) bytes * wavHeader->total_samples;
unsigned long word32;
*p++ = 'R';
*p++ = 'I';
*p++ = 'F';
*p++ = 'F'; //RIFF 4
word32 = (unsigned long) data_size + (44 - 8);
*p++ = (unsigned char) (word32 >> 0);
*p++ = (unsigned char) (word32 >> 8);
*p++ = (unsigned char) (word32 >> 16);
*p++ = (unsigned char) (word32 >> 24); //数据长度 4+4
*p++ = 'W';
*p++ = 'A';
*p++ = 'V';
*p++ = 'E'; //WAVE 4+4+4
*p++ = 'f';
*p++ = 'm';
*p++ = 't';
*p++ = ' '; //fmt 4+4+4+4
*p++ = 0x10;
*p++ = 0x00;
*p++ = 0x00;
*p++ = 0x00; //16 4+4+4+4+4=20
*p++ = 0x01;
*p++ = 0x00; //1 4+4+4+4+4+2=22
*p++ = (unsigned char) (wavHeader->channels >> 0);
*p++ = (unsigned char) (wavHeader->channels >> 8); //声道数 4+4+4+4+4+2+2=24
word32 = (unsigned long) (wavHeader->samplerate + 0.5);
*p++ = (unsigned char) (word32 >> 0);
*p++ = (unsigned char) (word32 >> 8);
*p++ = (unsigned char) (word32 >> 16);
*p++ = (unsigned char) (word32 >> 24); //采样率 4+4+4+4+4+2+2+4=28
word32 = wavHeader->samplerate * bytes * wavHeader->channels;
*p++ = (unsigned char) (word32 >> 0);
*p++ = (unsigned char) (word32 >> 8);
*p++ = (unsigned char) (word32 >> 16);
*p++ = (unsigned char) (word32 >> 24); //每秒所需字节数 4+4+4+4+4+2+2+4=32
word32 = bytes * wavHeader->channels;
*p++ = (unsigned char) (word32 >> 0);
*p++ = (unsigned char) (word32 >> 8); // blockAlign(2个字节) 4+4+4+4+4+2+2+4+2=34
//每个采样需要的字节数,计算公式:声道数 * 每个采样需要的bit / 8
*p++ = (unsigned char) (wavHeader->bits_per_sample >> 0);
*p++ = (unsigned char) (wavHeader->bits_per_sample >> 8); //bitPerSample(2个字节)4+4+4+4+4+2+2+4+2+2=36
//每个采样需要的bit数,一般为8或16
*p++ = 'd';
*p++ = 'a';
*p++ = 't';
*p++ = 'a'; //data 4+4+4+4+4+2+2+4+2+2+4=40
word32 = (unsigned long) data_size;
*p++ = (unsigned char) (word32 >> 0);
*p++ = (unsigned char) (word32 >> 8);
*p++ = (unsigned char) (word32 >> 16);
*p++ = (unsigned char) (word32 >> 24);
//size2(4个字节) 4+4+4+4+4+2+2+4+2+2+4+4=44
// 录音数据的长度,不包括头部长度
return 0;
}
/**
* 获取一定时长的pcm数据.
* @param inFile 输入文件.
* @param start_pos_s 起始位置,单位为秒.
* @param fmt len_s 持续长度,单位为秒.
* @param channel_num 声道数.
* @param bit_rate 比特率.
* @param sample_rate 采样率.
* @return 成功时获取的数据长度,失败时返回-1.
*/
int get_wav_stream(const char * inFile, double start_pos_s, double len_s,
int channel_num, int bit_rate, int sample_rate) {
int res_data_size = 0;
FILE *fout;
AVPacket packet;
AVFrame* frame = NULL;
AVFormatContext* formatContext = NULL;
AVStream* audioStream = NULL;
AVCodecContext* codecContext = NULL;
SwrContext* swrContext = NULL;
frame = av_frame_alloc();
if (!frame) {
LOGE("Error allocating the frame");
goto end;
}
if (avformat_open_input(&formatContext, inFile, NULL, NULL) != 0) {
LOGE("Error opening the file");
goto end;
}
if (avformat_find_stream_info(formatContext, NULL) < 0) {
LOGE("Error finding the stream info");
goto end;
}
double end_pos_s = (start_pos_s + len_s); //请求的时长,单位为内部时长
double duration = formatContext->duration / AV_TIME_BASE; // AV_TIME_BASE; //文件总时长
if (start_pos_s > duration) {
LOGE("Start time is biger than the total time.");
goto end;
}
if ((end_pos_s > duration) || len_s == 0) {
end_pos_s = duration;
len_s = duration - start_pos_s;
}
LOGE("len_s= %f\n", len_s);
unsigned int i;
for (i = 0; i < formatContext->nb_streams; ++i) {
if (formatContext->streams[i]->codec->codec_type
== AVMEDIA_TYPE_AUDIO) {
audioStream = formatContext->streams[i];
break;
}
}
if (audioStream == NULL) {
LOGE("Could not find any audio stream in the file");
goto end;
}
codecContext = audioStream->codec;
codecContext->codec = avcodec_find_decoder(codecContext->codec_id);
if (codecContext->codec == NULL) {
LOGE("Couldn't find a proper decoder");
goto end;
} else if (avcodec_open2(codecContext, codecContext->codec, NULL) != 0) {
LOGE("Couldn't open the context with the decoder");
goto end;
}
/*START*************************START***********************START*/
/*设置输出格式,为了未来jni调用的简单,所以都传的int,反正是自己用的,直观一些*/
int64_t outChannelLayout;
if (channel_num == 1) {
outChannelLayout = AV_CH_LAYOUT_MONO;
} else {
outChannelLayout = AV_CH_LAYOUT_STEREO;
}
enum AVSampleFormat outSampleFormat;
if (bit_rate == 32) {
outSampleFormat = AV_SAMPLE_FMT_S32;
} else if (bit_rate == 8) {
outSampleFormat = AV_SAMPLE_FMT_U8;
} else {
outSampleFormat = AV_SAMPLE_FMT_S16;
}
int outSampleRate = sample_rate;
swrContext = swr_alloc_set_opts(NULL, outChannelLayout, outSampleFormat,
outSampleRate,
av_get_default_channel_layout(codecContext->channels),
codecContext->sample_fmt, codecContext->sample_rate, 0, NULL);
if (swrContext == NULL) {
LOGE( "Couldn't create the SwrContext");
goto end;
}
if (swr_init(swrContext) != 0) {
LOGE("Couldn't initialize the SwrContext");
goto end;
}
/*END**************************END*************************END*/
/*START**************************Seek to pos*************************START*/
int seek_pos = start_pos_s * AV_TIME_BASE;
LOGE("AV_TIME_BASE=%d seek_pos=%d\n", AV_TIME_BASE, seek_pos);
if (formatContext->start_time != AV_NOPTS_VALUE)
seek_pos += formatContext->start_time;
if (av_seek_frame(formatContext, -1, seek_pos, AVSEEK_FLAG_BACKWARD) < 0) {
LOGE(
"%s,av_seek_frame() seek to %.3f failed!", __FUNCTION__, (double)seek_pos/AV_TIME_BASE);
goto end;
}
/*END**************************seek END*************************END*/
int total_samples = len_s * sample_rate;
LOGE("total_samples=%d\n", total_samples);
int res_bufsize = av_samples_get_buffer_size(0, channel_num, total_samples,
outSampleFormat, 1);
if(!res_bufsize){
LOGE("av_samples_get_buffer_size error");
goto end;
}
LOGE("res_bufsize=%d\n", res_bufsize);
//for transfout = fopen("fwrite.wav", "w");
//fseek(fout, 44, SEEK_SET);
WavHeader h;
h.bits_per_sample = bit_rate;
h.channels = channel_num;
h.samplerate = sample_rate;
h.total_samples = res_bufsize;
get_wav_header(header, &h);
av_init_packet(&packet);
while (av_read_frame(formatContext, &packet) == 0) {
if (packet.stream_index == audioStream->index) {
AVPacket decodingPacket = packet;
while (decodingPacket.size > 0) {
// Try to decode the packet into a frame
int frameFinished = 0;
int result = avcodec_decode_audio4(codecContext, frame,
&frameFinished, &decodingPacket);
if (result < 0 || frameFinished == 0) {
break;
}
int dst_data_size = (outSampleRate / frame->sample_rate)
* frame->nb_samples * channel_num * (bit_rate + 7) / 8
+ (bit_rate + 7) / 8;
uint8_t buffer[dst_data_size];
uint8_t* pointers[dst_data_size];
pointers[0] = &buffer[0];
int numSamplesOut = swr_convert(swrContext, pointers,
dst_data_size / channel_num,
(const unsigned char**) frame->extended_data,
frame->nb_samples);
int dst_bufsize = av_samples_get_buffer_size(0, channel_num,
numSamplesOut, outSampleFormat, 1);
///filefwrite(pointers[0], 1, dst_bufsize, fout);
int tmp = res_data_size + dst_bufsize;
int sign = 1;
if (tmp > res_bufsize) {
dst_bufsize -= (tmp - res_bufsize);
sign = 0;
}
resdatas = realloc(resdatas, res_data_size + dst_bufsize);
int i = 0;
for (i = res_data_size; i < res_data_size + dst_bufsize; i++) {
resdatas[i] = buffer[i - res_data_size];
}
if (!sign) {
break;
}
res_data_size += dst_bufsize;
decodingPacket.size -= result;
decodingPacket.data += result;
}
}
av_free_packet(&packet);
}
fprintf(stderr, "count=%d\n", res_data_size);
//fseek(fout, 0, SEEK_SET);
///filefwrite(&header, 1, 44, fout);
end: if (frame) {
av_free(frame);
}
if (codecContext) {
avcodec_close(codecContext);
}
if (formatContext) {
avformat_close_input(&formatContext);
}
if (swrContext) {
swr_free(&swrContext);
}
return res_data_size > 0 ? res_data_size : -1;
}
int main(int argc, char** argv) {
char *file = "ping.mp3";
double start = 0;
double len = 12;
int channel = 2;
int bit = 16;
int sample = 44100;
int res_size = get_wav_stream(file, start, len, channel, bit, sample);
FILE *out_file = fopen("out.pcm", "w");
fwrite(resdatas, 1, res_size, out_file);
return 1;
}
对了,还有一个数据结构。
typedef struct {
unsigned long samplerate;
unsigned int bits_per_sample;
unsigned int channels;
unsigned long total_samples;
} WavHeader;
ok,就酱紫了先。