在我们的音视频直播中,一般都需要做规范化处理,保证更多机器的兼容性和直播效果。对于音频来说,做的最多的就是重采样了。那么怎么对音频进行重采样呢,一般来说,最常用的就两种,一种是使用现成的库,如FFmpeg,opus, webrtc , sox等。另外一种就是使用差值采样算法,音频重采样其实也就是插值算法。与图像方面的插值算法没有太大的区别。
1.使用FFmpeg进行音频重采样
FFmpeg解码音频数据时,进行重采样(即改变文件原有的采样率),我们使用ffmpeg解码音频的时候,往往需要改变原音频的采样率,即需要重采样。
比如一音乐文件的采样率22050,而播放端往往是固定的采样率,比如44100。在这种情况下,如果把解码出来的数据直接播放,会产生快进的效果。这个时候就需要对解码出来的数据作一次重采样,将数据转化为44100采样率下的数据,才能正确播放。
(1).FFmpeg原生API:
ffmpeg提供了一组用来重采样的API,主要如下:
/**
* Initialize audio resampling context.
*
* @param output_channels number of output channels
* @param input_channels number of input channels
* @param output_rate output sample rate
* @param input_rate input sample rate
* @param sample_fmt_out requested output sample format
* @param sample_fmt_in input sample format
* @param filter_length length of each FIR filter in the filterbank relative to the cutoff frequency
* @param log2_phase_count log2 of the number of entries in the polyphase filterbank
* @param linear if 1 then the used FIR filter will be linearly interpolated
between the 2 closest, if 0 the closest will be used
* @param cutoff cutoff frequency, 1.0 corresponds to half the output sampling rate
* @return allocated ReSampleContext, NULL if error occured
*/
ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,
int output_rate, int input_rate,
enum AVSampleFormat sample_fmt_out,
enum AVSampleFormat sample_fmt_in,
int filter_length, int log2_phase_count,
int linear, double cutoff);
int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples);
/**
* Free resample context.
*
* @param s a non-NULL pointer to a resample context previously
* created with av_audio_resample_init()
*/
void audio_resample_close(ReSampleContext *s);
函数av_audio_resample_init()用来初始化重采样的参数,前4个参数很好理解;后6个参数基本上是使用缺省参数,分别为:
AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16,16, 10, 0, 1
函数audio_resample()用来重采样,前3个参数都好理解,最后一个参数是指“原数据的采样个数”,而不是input的bytes数。该函数的返回值也是采样个数,不过是重采样之后的。
函数audio_resample_close()用来清理重采样时分配的资源。
相关代码如下:
初始化
// need to do re-sample
if (m_codec_ctx->sample_rate != m_out_samplerate)
{
LOGW("%s, need re-sample, initialize re-sample engine! out channels:%d, out sample rate:%d hz, in channels:%d, in sample rate:%d",__FUNCTION__, 2, m_out_samplerate, m_codec_ctx->channels, m_codec_ctx->sample_rate);
m_resample_engine = av_audio_resample_init( 2, m_codec_ctx->channels, m_out_samplerate, m_codec_ctx->sample_rate, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16, 16, 10, 0, 1);
}
(2).重采样代码:
frame_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
memset(m_audio_buff, 0, AVCODEC_MAX_AUDIO_FRAME_SIZE);
decoded_len = avcodec_decode_audio3(m_codec_ctx, (short *)m_audio_buff, &frame_size, &m_avpkt);
LOGI("%s, current decoded size:%d", __FUNCTION__, decoded_len);
if (decoded_len > 0)
{
m_avpkt.size -= decoded_len;
m_avpkt.data += decoded_len;
decoded_audio_len = frame_size;
valid_data_pointer = m_audio_buff;
// need to re-sample
if (m_resample_engine)
{
// convert byte to short
int after_resampled_len = 0;
int before_resampled_len = frame_size/(2 * m_codec_ctx->channels);
memset(m_audio_resampled_buff, 0, AVCODEC_MAX_AUDIO_FRAME_SIZE);
after_resampled_len = audio_resample(m_resample_engine, (short *)m_audio_resampled_buff, (short *)m_audio_buff, before_resampled_len);
LOGI("%s, re-sampled! length in:%d, length out:%d", __FUNCTION__, before_resampled_len, after_resampled_len);
decoded_audio_len = after_resampled_len * 2 * 2; //convert short to byte, and 2 channels
valid_data_pointer = m_audio_resampled_buff;
}
memcpy(buff+copied_len, valid_data_pointer, decoded_audio_len);
copied_len += decoded_audio_len;
LOGI("%s, copy1, %d bytes has copied to output buff, total:%d!", __FUNCTION__, decoded_audio_len, copied_len); }
(3).释放代码:
if (m_resample_engine)
{
audio_resample_close(m_resample_engine);
m_resample_engine = 0;
}
2.使用差值算法
不多说了,直接上代码,纯C的,可移植性高。
#ifdef __cplusplus
extern "C" {
#endif
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#define DR_WAV_IMPLEMENTATION
#include "dr_wav.h"
#define DR_MP3_IMPLEMENTATION
#include "dr_mp3.h"
#include "timing.h"
void wavWrite_f32(char *filename, float *buffer, int sampleRate, uint32_t totalSampleCount, uint32_t channels) {
drwav_data_format format;
format.container = drwav_container_riff;
format.format = DR_WAVE_FORMAT_IEEE_FLOAT;
format.channels = channels;
format.sampleRate = (drwav_uint32) sampleRate;
format.bitsPerSample = 32;
drwav *pWav = drwav_open_file_write(filename, &format);
if (pWav) {
drwav_uint64 samplesWritten = drwav_write(pWav, totalSampleCount, buffer);
drwav_uninit(pWav);
if (samplesWritten != totalSampleCount) {
fprintf(stderr, "write file [%s] error.\n", filename);
exit(1);
}
}
}
float *wavRead_f32(const char *filename, uint32_t *sampleRate, uint64_t *sampleCount, uint32_t *channels) {
drwav_uint64 totalSampleCount = 0;
float *input = drwav_open_file_and_read_pcm_frames_f32(filename, channels, sampleRate, &totalSampleCount);
if (input == NULL) {
drmp3_config pConfig;
input = drmp3_open_file_and_read_f32(filename, &pConfig, &totalSampleCount);
if (input != NULL) {
*channels = pConfig.outputChannels;
*sampleRate = pConfig.outputSampleRate;
}
}
if (input == NULL) {
fprintf(stderr, "read file [%s] error.\n", filename);
exit(1);
}
*sampleCount = totalSampleCount * (*channels);
return input;
}
void splitpath(const char *path, char *drv, char *dir, char *name, char *ext) {
const char *end;
const char *p;
const char *s;
if (path[0] && path[1] == ':') {
if (drv) {
*drv++ = *path++;
*drv++ = *path++;
*drv = '\0';
}
} else if (drv)
*drv = '\0';
for (end = path; *end && *end != ':';)
end++;
for (p = end; p > path && *--p != '\\' && *p != '/';)
if (*p == '.') {
end = p;
break;
}
if (ext)
for (s = end; (*ext = *s++);)
ext++;
for (p = end; p > path;)
if (*--p == '\\' || *p == '/') {
p++;
break;
}
if (name) {
for (s = p; s < end;)
*name++ = *s++;
*name = '\0';
}
if (dir) {
for (s = path; s < p;)
*dir++ = *s++;
*dir = '\0';
}
}
uint64_t Resample_f32(const float *input, float *output, int inSampleRate, int outSampleRate, uint64_t inputSize,
uint32_t channels
) {
if (input == NULL)
return 0;
uint64_t outputSize = inputSize * outSampleRate / inSampleRate;
if (output == NULL)
return outputSize;
double stepDist = ((double) inSampleRate / (double) outSampleRate);
const uint64_t fixedFraction = (1LL << 32);
const double normFixed = (1.0 / (1LL << 32));
uint64_t step = ((uint64_t) (stepDist * fixedFraction + 0.5));
uint64_t curOffset = 0;
for (uint32_t i = 0; i < outputSize; i += 1) {
for (uint32_t c = 0; c < channels; c += 1) {
*output++ = (float) (input[c] + (input[c + channels] - input[c]) * (
(double) (curOffset >> 32) + ((curOffset & (fixedFraction - 1)) * normFixed)
)
);
}
curOffset += step;
input += (curOffset >> 32) * channels;
curOffset &= (fixedFraction - 1);
}
return outputSize;
}
uint64_t Resample_s16(const int16_t *input, int16_t *output, int inSampleRate, int outSampleRate, uint64_t inputSize,
uint32_t channels
) {
if (input == NULL)
return 0;
uint64_t outputSize = inputSize * outSampleRate / inSampleRate;
if (output == NULL)
return outputSize;
double stepDist = ((double) inSampleRate / (double) outSampleRate);
const uint64_t fixedFraction = (1LL << 32);
const double normFixed = (1.0 / (1LL << 32));
uint64_t step = ((uint64_t) (stepDist * fixedFraction + 0.5));
uint64_t curOffset = 0;
for (uint32_t i = 0; i < outputSize; i += 1) {
for (uint32_t c = 0; c < channels; c += 1) {
*output++ = (int16_t) (input[c] + (input[c + channels] - input[c]) * (
(double) (curOffset >> 32) + ((curOffset & (fixedFraction - 1)) * normFixed)
)
);
}
curOffset += step;
input += (curOffset >> 32) * channels;
curOffset &= (fixedFraction - 1);
}
return outputSize;
}
void printUsage() {
printf("usage:\n");
printf("./Resampler input.wav 48000\n");
printf("./Resampler input.mp3 16000\n");
printf("or\n");
printf("./Resampler input.wav output.wav 8000\n");
printf("./Resampler input.mp3 output.wav 44100\n");
printf("press any key to exit.\n");
getchar();
}
void resampler(char *in_file, char *out_file, uint32_t targetSampleRate) {
if (targetSampleRate == 0) {
printUsage();
return;
}
uint32_t sampleRate = 0;
uint64_t sampleCount = 0;
uint32_t channels = 0;
float *input = wavRead_f32(in_file, &sampleRate, &sampleCount, &channels);
uint64_t targetSampleCount = Resample_f32(input, 0, sampleRate, targetSampleRate, sampleCount, channels);
if (input) {
float *output = (float *) malloc(targetSampleCount * sizeof(float));
if (output) {
double startTime = now();
Resample_f32(input, output, sampleRate, targetSampleRate, sampleCount / channels, channels);
double time_interval = calcElapsed(startTime, now());
printf("time interval: %f ms\n ", (time_interval * 1000));
wavWrite_f32(out_file, output, targetSampleRate, (uint32_t) targetSampleCount, channels);
free(output);
}
free(input);
}
}
int main(int argc, char *argv[]) {
printf("Audio Processing\n");
printf("blog:http://cpuimage.cnblogs.com/\n");
printf("Audio Resampler\n");
if (argc < 3) {
printUsage();
return -1;
}
char *in_file = argv[1];
if (argc > 3) {
char *out_file = argv[2];
uint32_t targetSampleRate = (uint32_t) atoi(argv[3]);
resampler(in_file, out_file, targetSampleRate);
} else {
int32_t targetSampleRate = (uint32_t) atoi(argv[2]);
char drive[3];
char dir[256];
char fname[256];
char ext[256];
char out_file[1024];
splitpath(in_file, drive, dir, fname, ext);
sprintf(out_file, "%s%s%s_out.wav", drive, dir, fname);
resampler(in_file, out_file, targetSampleRate);
}
return 0;
}
#ifdef __cplusplus
}
#endif
参考博客: