什么是重采样?
所谓的重采样,就是改变⾳频的采样率、sampleformat、声道数等参数,使之按照我们期望的参数输出。
为什么要重采样?
为什么要重采样?当然是原有的⾳频参数不满⾜我们的需求,⽐如在FFmpeg解码⾳频的时候,不同的⾳源有不同的格式,采样率等,在解码后的数据中的这些参数也会不⼀致(最新FFmpeg解码⾳频后,⾳频格式为AV_SAMPLE_FMT_FLTP,这个参数应该是⼀致的),如果我们接下来需要使⽤解码后的⾳频数据做其他操作,⽽这些参数的不⼀致导致会有很多额外⼯作,此时直接对其进⾏重采样,获取我们制定的⾳频参数,这样就会⽅便很多。再⽐如在将⾳频进⾏SDL播放时候,因为当前的SDL2.0不⽀持planar格式,也不⽀持浮点型的,⽽最新的FFMPEG16年会将⾳频解码为AV_SAMPLE_FMT_FLTP格式,因此此时就需要我们对其重采样,使之可以在SDL2.0上进⾏播放。
怎样重采样?
利用ffmpeg内部的重采样器进行重采样
注意事项
1.关于音频质量损失问题
我们在进行如44.1khz采样率向48khz采样率的音频进行转换时 因为我们要求重采样后音频的时长不能发生变化 而在时长相同时 48khz采样率肯定比44.1khz采样率的采样点要多的多 所以必然会带来精度的损失 但是44.1khz转换为48khz 就算损失了 人耳还是听不出来的
2.ffmpeg中内部支持互相转换的采样格式和声道分布
enum AVSampleFormat { AV_SAMPLE_FMT_NONE = -1, AV_SAMPLE_FMT_U8, ///< unsigned 8 bits AV_SAMPLE_FMT_S16, ///< signed 16 bits AV_SAMPLE_FMT_S32, ///< signed 32 bits AV_SAMPLE_FMT_FLT, ///< float AV_SAMPLE_FMT_DBL, ///< double AV_SAMPLE_FMT_U8P, ///< unsigned 8 bits, planar AV_SAMPLE_FMT_S16P, ///< signed 16 bits, planar AV_SAMPLE_FMT_S32P, ///< signed 32 bits, planar AV_SAMPLE_FMT_FLTP, ///< float, planar AV_SAMPLE_FMT_DBLP, ///< double, planar AV_SAMPLE_FMT_S64, ///< signed 64 bits AV_SAMPLE_FMT_S64P, ///< signed 64 bits, planar AV_SAMPLE_FMT_NB ///< Number of sample formats. DO NOT USE if linking dynamically };
#define AV_CH_LAYOUT_MONO (AV_CH_FRONT_CENTER) #define AV_CH_LAYOUT_STEREO (AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT) #define AV_CH_LAYOUT_2POINT1 (AV_CH_LAYOUT_STEREO|AV_CH_LOW_FREQUENCY) #define AV_CH_LAYOUT_2_1 (AV_CH_LAYOUT_STEREO|AV_CH_BACK_CENTER) #define AV_CH_LAYOUT_SURROUND (AV_CH_LAYOUT_STEREO|AV_CH_FRONT_CENTER) #define AV_CH_LAYOUT_3POINT1 (AV_CH_LAYOUT_SURROUND|AV_CH_LOW_FREQUENCY) #define AV_CH_LAYOUT_4POINT0 (AV_CH_LAYOUT_SURROUND|AV_CH_BACK_CENTER) #define AV_CH_LAYOUT_4POINT1 (AV_CH_LAYOUT_4POINT0|AV_CH_LOW_FREQUENCY) #define AV_CH_LAYOUT_2_2 (AV_CH_LAYOUT_STEREO|AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT) #define AV_CH_LAYOUT_QUAD (AV_CH_LAYOUT_STEREO|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT) #define AV_CH_LAYOUT_5POINT0 (AV_CH_LAYOUT_SURROUND|AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT) #define AV_CH_LAYOUT_5POINT1 (AV_CH_LAYOUT_5POINT0|AV_CH_LOW_FREQUENCY) #define AV_CH_LAYOUT_5POINT0_BACK (AV_CH_LAYOUT_SURROUND|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT) #define AV_CH_LAYOUT_5POINT1_BACK (AV_CH_LAYOUT_5POINT0_BACK|AV_CH_LOW_FREQUENCY) #define AV_CH_LAYOUT_6POINT0 (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_CENTER) #define AV_CH_LAYOUT_6POINT0_FRONT (AV_CH_LAYOUT_2_2|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER) #define AV_CH_LAYOUT_HEXAGONAL (AV_CH_LAYOUT_5POINT0_BACK|AV_CH_BACK_CENTER) #define AV_CH_LAYOUT_6POINT1 (AV_CH_LAYOUT_5POINT1|AV_CH_BACK_CENTER) #define AV_CH_LAYOUT_6POINT1_BACK (AV_CH_LAYOUT_5POINT1_BACK|AV_CH_BACK_CENTER) #define AV_CH_LAYOUT_6POINT1_FRONT (AV_CH_LAYOUT_6POINT0_FRONT|AV_CH_LOW_FREQUENCY) #define AV_CH_LAYOUT_7POINT0 (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT) #define AV_CH_LAYOUT_7POINT0_FRONT (AV_CH_LAYOUT_5POINT0|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER) #define AV_CH_LAYOUT_7POINT1 (AV_CH_LAYOUT_5POINT1|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT) #define AV_CH_LAYOUT_7POINT1_WIDE (AV_CH_LAYOUT_5POINT1|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER) #define AV_CH_LAYOUT_7POINT1_WIDE_BACK (AV_CH_LAYOUT_5POINT1_BACK|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER) #define AV_CH_LAYOUT_OCTAGONAL (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_LEFT|AV_CH_BACK_CENTER|AV_CH_BACK_RIGHT) #define AV_CH_LAYOUT_HEXADECAGONAL (AV_CH_LAYOUT_OCTAGONAL|AV_CH_WIDE_LEFT|AV_CH_WIDE_RIGHT|AV_CH_TOP_BACK_LEFT|AV_CH_TOP_BACK_RIGHT|AV_CH_TOP_BACK_CENTER|AV_CH_TOP_FRONT_CENTER|AV_CH_TOP_FRONT_LEFT|AV_CH_TOP_FRONT_RIGHT) #define AV_CH_LAYOUT_STEREO_DOWNMIX (AV_CH_STEREO_LEFT|AV_CH_STEREO_RIGHT)
3.存储格式的不同 交错模式和平面模式
以双声道为例,带P(plane)的数据格式在存储时,其左声道和右声道的数据是分开存储的,左声道的数据存储在data[0],右声道的数据存储在data[1],每个声道的所占⽤的字节数为linesize[0]和linesize[1];不带P(packed)的⾳频数据在存储时,是按照LRLRLR...的格式交替存储在data[0]中,linesize[0]表示总的数据量。
4.linesize和nb_samples
linesize表示一个平面内总比特数(经过内存对齐) nb_samples表示一帧音频帧的采样点数
5.swr_get_delay
重采样器上下文中会缓冲一些还没有重采样的采样点 通过这个函数拿到
关于重采样的代码具体实现 ffmpeg官方例子加上中文注释
extern "C"
{
#include <libavutil/opt.h>
#include <libavutil/channel_layout.h>
#include <libavutil/samplefmt.h>
#include <libswresample/swresample.h>
}
static int get_format_from_sample_fmt(const char** fmt,
enum AVSampleFormat sample_fmt)
{
int i;
struct sample_fmt_entry {
enum AVSampleFormat sample_fmt; const char* fmt_be, * fmt_le;
} sample_fmt_entries[] = {
{ AV_SAMPLE_FMT_U8, "u8", "u8" },
{ AV_SAMPLE_FMT_S16, "s16be", "s16le" },
{ AV_SAMPLE_FMT_S32, "s32be", "s32le" },
{ AV_SAMPLE_FMT_FLT, "f32be", "f32le" },
{ AV_SAMPLE_FMT_DBL, "f64be", "f64le" },
};
*fmt = NULL;
for (i = 0; i < FF_ARRAY_ELEMS(sample_fmt_entries); i++) {
struct sample_fmt_entry* entry = &sample_fmt_entries[i];
if (sample_fmt == entry->sample_fmt) {
*fmt = AV_NE(entry->fmt_be, entry->fmt_le);
return 0;
}
}
fprintf(stderr,
"Sample format %s not supported as output format\n",
av_get_sample_fmt_name(sample_fmt));
return AVERROR(EINVAL);
}
/**
* Fill dst buffer with nb_samples, generated starting from t. 交错模式的
*/
static void fill_samples(double* dst, int nb_samples, int nb_channels, int sample_rate, double* t)
{
int i, j;
double tincr = 1.0 / sample_rate, * dstp = dst;
const double c = 2 * M_PI * 440.0;
/* generate sin tone with 440Hz frequency and duplicated channels */
for (i = 0; i < nb_samples; i++) {
*dstp = sin(c * *t);
for (j = 1; j < nb_channels; j++)
dstp[j] = dstp[0];
dstp += nb_channels;
*t += tincr;
}
}
int main(int argc, char** argv)
{
// 输入参数
int64_t src_ch_layout = AV_CH_LAYOUT_STEREO;
int src_rate = 48000;
enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_DBL;
int src_nb_channels = 0;
uint8_t** src_data = NULL; // 二级指针
int src_linesize;
int src_nb_samples = 1024;
// 输出参数
int64_t dst_ch_layout = AV_CH_LAYOUT_STEREO;
int dst_rate = 44100;
enum AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_S16;
int dst_nb_channels = 0;
uint8_t** dst_data = NULL; //二级指针
//为什么我们这里是二级指针呢 因为存储格式的不同可能导致存储有多个平面 所以我们需要到dst_data[0]和dst_data[1]拿数据
int dst_linesize;
int dst_nb_samples;
int max_dst_nb_samples;
// 输出文件
const char* dst_filename = NULL; // 保存输出的pcm到本地,然后播放验证
FILE* dst_file;
int dst_bufsize;
const char* fmt;
// 重采样实例
struct SwrContext* swr_ctx;
double t;
int ret;
//这里随便填 我填的是我本地的一个文件
dst_filename = "c://out.pcm";
dst_file = fopen(dst_filename, "wb");
if (!dst_file) {
fprintf(stderr, "Could not open destination file %s\n", dst_filename);
exit(1);
}
// 创建重采样器
/* create resampler context */
swr_ctx = swr_alloc();
if (!swr_ctx) {
fprintf(stderr, "Could not allocate resampler context\n");
ret = AVERROR(ENOMEM);
goto end;
}
// 设置重采样参数
/* set options */
// 输入参数
av_opt_set_int(swr_ctx, "in_channel_layout", src_ch_layout, 0);//源通道数
av_opt_set_int(swr_ctx, "in_sample_rate", src_rate, 0);//源采样率
av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0);//源采样格式
// 输出参数
av_opt_set_int(swr_ctx, "out_channel_layout", dst_ch_layout, 0);
av_opt_set_int(swr_ctx, "out_sample_rate", dst_rate, 0);
av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);
// 初始化重采样
/* initialize the resampling context */
if ((ret = swr_init(swr_ctx)) < 0) {
fprintf(stderr, "Failed to initialize the resampling context\n");
goto end;
}
/* allocate source and destination samples buffers */
// 计算出输入源的通道数量
src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
// 给输入源分配内存空间 为什么会是三级指针呢 因为需要给src[0] src[1]分配内存
ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize, src_nb_channels,
src_nb_samples, src_sample_fmt, 0);
if (ret < 0) {
fprintf(stderr, "Could not allocate source samples\n");
goto end;
}
/* compute the number of converted samples: buffering is avoided
* ensuring that the output buffer will contain at least all the
* converted input samples */
// 计算一帧音频输出采样数量
max_dst_nb_samples = dst_nb_samples =
av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
/* buffer is going to be directly written to a rawaudio file, no alignment */
dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
// 分配输出缓存内存
ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, dst_nb_channels,
dst_nb_samples, dst_sample_fmt, 0);
if (ret < 0) {
fprintf(stderr, "Could not allocate destination samples\n");
goto end;
}
t = 0;
do {
/* generate synthetic audio */
// 生成输入源 这里是自定义的输入源 一个正弦波 你不用管 本来这里也可以用你读出来的AVFrame里的数据来填充
fill_samples((double*)src_data[0], src_nb_samples, src_nb_channels, src_rate, &t);
/* compute destination number of samples */
int64_t delay = swr_get_delay(swr_ctx, src_rate);//拿到swr_ctx中缓存的采样点数量
dst_nb_samples = av_rescale_rnd(delay + src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);//重新计算输出音频帧一帧采样点数量
if (dst_nb_samples > max_dst_nb_samples) {//如果大于 释放内存后重新分配内存
av_freep(&dst_data[0]);
ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels,
dst_nb_samples, dst_sample_fmt, 1);
if (ret < 0)
break;
max_dst_nb_samples = dst_nb_samples;
}
ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t**)src_data, src_nb_samples);//真正的转换函数
if (ret < 0) {
fprintf(stderr, "Error while converting\n");
goto end;
}
//拿到输出字节数
dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
ret, dst_sample_fmt, 1);
if (dst_bufsize < 0) {
fprintf(stderr, "Could not get sample buffer size\n");
goto end;
}
printf("t:%f in:%d out:%d\n", t, src_nb_samples, ret);
fwrite(dst_data[0], 1, dst_bufsize, dst_file);
} while (t < 10);
//拿到swr中剩下的采样点 上文也提及到了 可能swr内部会有缓存的
ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, NULL, 0);
if (ret < 0) {
fprintf(stderr, "Error while converting\n");
goto end;
}
dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
ret, dst_sample_fmt, 1);
if (dst_bufsize < 0) {
fprintf(stderr, "Could not get sample buffer size\n");
goto end;
}
printf("flush in:%d out:%d\n", 0, ret);
fwrite(dst_data[0], 1, dst_bufsize, dst_file);
if ((ret = get_format_from_sample_fmt(&fmt, dst_sample_fmt)) < 0)
goto end;
end:
fclose(dst_file);
if (src_data)
av_freep(&src_data[0]);
av_freep(&src_data);
if (dst_data)
av_freep(&dst_data[0]);
av_freep(&dst_data);
swr_free(&swr_ctx);
return ret < 0;
}