iOS 视频直播开发笔记(五)

最新推荐文章于 2022-05-03 17:48:30 发布

大王算法

最新推荐文章于 2022-05-03 17:48:30 发布

阅读量324

点赞数

本文链接：https://blog.csdn.net/fanyun_01/article/details/108890121

版权

C++ SDK开发实战宝典专栏收录该内容

62 篇文章 8 订阅

订阅专栏

在我们的音视频直播中，一般都需要做规范化处理，保证更多机器的兼容性和直播效果。对于音频来说，做的最多的就是重采样了。那么怎么对音频进行重采样呢，一般来说，最常用的就两种，一种是使用现成的库，如FFmpeg，opus， webrtc , sox等。另外一种就是使用差值采样算法，音频重采样其实也就是插值算法。与图像方面的插值算法没有太大的区别。

1.使用FFmpeg进行音频重采样

FFmpeg解码音频数据时，进行重采样（即改变文件原有的采样率），我们使用ffmpeg解码音频的时候，往往需要改变原音频的采样率，即需要重采样。

比如一音乐文件的采样率22050，而播放端往往是固定的采样率，比如44100。在这种情况下，如果把解码出来的数据直接播放，会产生快进的效果。这个时候就需要对解码出来的数据作一次重采样，将数据转化为44100采样率下的数据，才能正确播放。

(1).FFmpeg原生API：

ffmpeg提供了一组用来重采样的API，主要如下：

/**

* Initialize audio resampling context.

*

* @param output_channels number of output channels

* @param input_channels number of input channels

* @param output_rate output sample rate

* @param input_rate input sample rate

* @param sample_fmt_out requested output sample format

* @param sample_fmt_in input sample format

* @param filter_length length of each FIR filter in the filterbank relative to the cutoff frequency

* @param log2_phase_count log2 of the number of entries in the polyphase filterbank

* @param linear if 1 then the used FIR filter will be linearly interpolated

between the 2 closest, if 0 the closest will be used

* @param cutoff cutoff frequency, 1.0 corresponds to half the output sampling rate

* @return allocated ReSampleContext, NULL if error occured

*/

ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,

int output_rate, int input_rate,

enum AVSampleFormat sample_fmt_out,

enum AVSampleFormat sample_fmt_in,

int filter_length, int log2_phase_count,

int linear, double cutoff);



int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples);

/**

* Free resample context.

* @param s a non-NULL pointer to a resample context previously

* created with av_audio_resample_init()

void audio_resample_close(ReSampleContext *s);

函数av_audio_resample_init()用来初始化重采样的参数，前4个参数很好理解；后6个参数基本上是使用缺省参数，分别为：

AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16,16, 10, 0, 1

函数audio_resample()用来重采样，前3个参数都好理解，最后一个参数是指“原数据的采样个数”，而不是input的bytes数。该函数的返回值也是采样个数，不过是重采样之后的。

函数audio_resample_close()用来清理重采样时分配的资源。

相关代码如下：

初始化

// need to do re-sample

if (m_codec_ctx->sample_rate != m_out_samplerate)

{

LOGW("%s, need re-sample, initialize re-sample engine! out channels:%d, out sample rate:%d hz, in channels:%d, in sample rate:%d",__FUNCTION__, 2, m_out_samplerate, m_codec_ctx->channels, m_codec_ctx->sample_rate);

m_resample_engine = av_audio_resample_init( 2, m_codec_ctx->channels, m_out_samplerate, m_codec_ctx->sample_rate, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16, 16, 10, 0, 1);

}

(2).重采样代码：

frame_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;

memset(m_audio_buff, 0, AVCODEC_MAX_AUDIO_FRAME_SIZE);

decoded_len = avcodec_decode_audio3(m_codec_ctx, (short *)m_audio_buff, &frame_size, &m_avpkt);

LOGI("%s, current decoded size:%d", __FUNCTION__, decoded_len);

if (decoded_len > 0)

{

m_avpkt.size -= decoded_len;

m_avpkt.data += decoded_len;



decoded_audio_len = frame_size;

valid_data_pointer = m_audio_buff;



// need to re-sample

if (m_resample_engine)

{

// convert byte to short

int after_resampled_len = 0;

int before_resampled_len = frame_size/(2 * m_codec_ctx->channels);

memset(m_audio_resampled_buff, 0, AVCODEC_MAX_AUDIO_FRAME_SIZE);

after_resampled_len = audio_resample(m_resample_engine, (short *)m_audio_resampled_buff, (short *)m_audio_buff, before_resampled_len);

LOGI("%s, re-sampled! length in:%d, length out:%d", __FUNCTION__, before_resampled_len, after_resampled_len);

decoded_audio_len = after_resampled_len * 2 * 2; //convert short to byte, and 2 channels

valid_data_pointer = m_audio_resampled_buff;

}



memcpy(buff+copied_len, valid_data_pointer, decoded_audio_len);

copied_len += decoded_audio_len;

LOGI("%s, copy1, %d bytes has copied to output buff, total:%d!", __FUNCTION__, decoded_audio_len, copied_len); }

(3).释放代码：

if (m_resample_engine)

{

audio_resample_close(m_resample_engine);

m_resample_engine = 0;

}

2.使用差值算法

不多说了，直接上代码，纯C的，可移植性高。

#ifdef __cplusplus

extern "C" {

#endif

#define _CRT_SECURE_NO_WARNINGS



#include <stdio.h>

#include <stdlib.h>

#include <stdint.h>



#define DR_WAV_IMPLEMENTATION



#include "dr_wav.h"



#define DR_MP3_IMPLEMENTATION





#include "dr_mp3.h"



#include "timing.h"





void wavWrite_f32(char *filename, float *buffer, int sampleRate, uint32_t totalSampleCount, uint32_t channels) {

drwav_data_format format;

format.container = drwav_container_riff;

format.format = DR_WAVE_FORMAT_IEEE_FLOAT;

format.channels = channels;

format.sampleRate = (drwav_uint32) sampleRate;

format.bitsPerSample = 32;

drwav *pWav = drwav_open_file_write(filename, &format);

if (pWav) {

drwav_uint64 samplesWritten = drwav_write(pWav, totalSampleCount, buffer);

drwav_uninit(pWav);

if (samplesWritten != totalSampleCount) {

fprintf(stderr, "write file [%s] error.\n", filename);

exit(1);

}

}

}



float *wavRead_f32(const char *filename, uint32_t *sampleRate, uint64_t *sampleCount, uint32_t *channels) {

drwav_uint64 totalSampleCount = 0;

float *input = drwav_open_file_and_read_pcm_frames_f32(filename, channels, sampleRate, &totalSampleCount);

if (input == NULL) {

drmp3_config pConfig;

input = drmp3_open_file_and_read_f32(filename, &pConfig, &totalSampleCount);

if (input != NULL) {

*channels = pConfig.outputChannels;

*sampleRate = pConfig.outputSampleRate;

}

}

if (input == NULL) {

fprintf(stderr, "read file [%s] error.\n", filename);

exit(1);

}

*sampleCount = totalSampleCount * (*channels);

return input;

}





void splitpath(const char *path, char *drv, char *dir, char *name, char *ext) {

const char *end;

const char *p;

const char *s;

if (path[0] && path[1] == ':') {

if (drv) {

*drv++ = *path++;

*drv++ = *path++;

*drv = '\0';

}

} else if (drv)

*drv = '\0';

for (end = path; *end && *end != ':';)

end++;

for (p = end; p > path && *--p != '\\' && *p != '/';)

if (*p == '.') {

end = p;

break;

}

if (ext)

for (s = end; (*ext = *s++);)

ext++;

for (p = end; p > path;)

if (*--p == '\\' || *p == '/') {

p++;

break;

}

if (name) {

for (s = p; s < end;)

*name++ = *s++;

*name = '\0';

}

if (dir) {

for (s = path; s < p;)

*dir++ = *s++;

*dir = '\0';

}

}





uint64_t Resample_f32(const float *input, float *output, int inSampleRate, int outSampleRate, uint64_t inputSize,

uint32_t channels

) {

if (input == NULL)

return 0;

uint64_t outputSize = inputSize * outSampleRate / inSampleRate;

if (output == NULL)

return outputSize;

double stepDist = ((double) inSampleRate / (double) outSampleRate);

const uint64_t fixedFraction = (1LL << 32);

const double normFixed = (1.0 / (1LL << 32));

uint64_t step = ((uint64_t) (stepDist * fixedFraction + 0.5));

uint64_t curOffset = 0;

for (uint32_t i = 0; i < outputSize; i += 1) {

for (uint32_t c = 0; c < channels; c += 1) {

*output++ = (float) (input[c] + (input[c + channels] - input[c]) * (

(double) (curOffset >> 32) + ((curOffset & (fixedFraction - 1)) * normFixed)

)

);

}

curOffset += step;

input += (curOffset >> 32) * channels;

curOffset &= (fixedFraction - 1);

}

return outputSize;

}





uint64_t Resample_s16(const int16_t *input, int16_t *output, int inSampleRate, int outSampleRate, uint64_t inputSize,

uint32_t channels

) {

if (input == NULL)

return 0;

uint64_t outputSize = inputSize * outSampleRate / inSampleRate;

if (output == NULL)

return outputSize;

double stepDist = ((double) inSampleRate / (double) outSampleRate);

const uint64_t fixedFraction = (1LL << 32);

const double normFixed = (1.0 / (1LL << 32));

uint64_t step = ((uint64_t) (stepDist * fixedFraction + 0.5));

uint64_t curOffset = 0;

for (uint32_t i = 0; i < outputSize; i += 1) {

for (uint32_t c = 0; c < channels; c += 1) {

*output++ = (int16_t) (input[c] + (input[c + channels] - input[c]) * (

(double) (curOffset >> 32) + ((curOffset & (fixedFraction - 1)) * normFixed)

)

);

}

curOffset += step;

input += (curOffset >> 32) * channels;

curOffset &= (fixedFraction - 1);

}

return outputSize;

}



void printUsage() {

printf("usage:\n");

printf("./Resampler input.wav 48000\n");

printf("./Resampler input.mp3 16000\n");

printf("or\n");

printf("./Resampler input.wav output.wav 8000\n");

printf("./Resampler input.mp3 output.wav 44100\n");

printf("press any key to exit.\n");

getchar();

}



void resampler(char *in_file, char *out_file, uint32_t targetSampleRate) {

if (targetSampleRate == 0) {

printUsage();

return;

}

uint32_t sampleRate = 0;

uint64_t sampleCount = 0;

uint32_t channels = 0;

float *input = wavRead_f32(in_file, &sampleRate, &sampleCount, &channels);

uint64_t targetSampleCount = Resample_f32(input, 0, sampleRate, targetSampleRate, sampleCount, channels);

if (input) {

float *output = (float *) malloc(targetSampleCount * sizeof(float));

if (output) {

double startTime = now();

Resample_f32(input, output, sampleRate, targetSampleRate, sampleCount / channels, channels);

double time_interval = calcElapsed(startTime, now());

printf("time interval: %f ms\n ", (time_interval * 1000));

wavWrite_f32(out_file, output, targetSampleRate, (uint32_t) targetSampleCount, channels);

free(output);

}

free(input);

}

}





int main(int argc, char *argv[]) {

printf("Audio Processing\n");

printf("blog:http://cpuimage.cnblogs.com/\n");

printf("Audio Resampler\n");

if (argc < 3) {

printUsage();

return -1;

}

char *in_file = argv[1];

if (argc > 3) {

char *out_file = argv[2];

uint32_t targetSampleRate = (uint32_t) atoi(argv[3]);

resampler(in_file, out_file, targetSampleRate);

} else {

int32_t targetSampleRate = (uint32_t) atoi(argv[2]);

char drive[3];

char dir[256];

char fname[256];

char ext[256];

char out_file[1024];

splitpath(in_file, drive, dir, fname, ext);

sprintf(out_file, "%s%s%s_out.wav", drive, dir, fname);

resampler(in_file, out_file, targetSampleRate);

}



return 0;

}



#ifdef __cplusplus

}

#endif

参考博客：

https://www.jianshu.com/p/bf5e54f553a4

大王算法

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
iOS 视频直播开发笔记(五)

在我们的音视频直播中，一般都需要做规范化处理，保证更多机器的兼容性和直播效果。对于音频来说，做的最多的就是重采样了。那么怎么对音频进行重采样呢，一般来说，最常用的就两种，一种是使用现成的库，如FFmpeg，opus，webrtc , sox等。另外一种就是使用差值采样算法，音频重采样其实也就是插值算法。与图像方面的插值算法没有太大的区别。1.使用FFmpeg进行音频重采样FFmpeg解码音频数据时，进行重采样（即改变文件原有的采样率），我们使用ffmpeg解码音频的时候，往往需要...
复制链接

扫一扫