音视频实战---音频重采样

weixin_45673259

已于 2024-03-19 21:13:26 修改

阅读量599

点赞数 6

分类专栏：音视频文章标签：音视频

于 2024-03-18 22:55:22 首次发布

本文链接：https://blog.csdn.net/weixin_45673259/article/details/136824612

版权

音视频专栏收录该内容

19 篇文章 1 订阅

订阅专栏

1、使用swr_alloc()创建重采样实例

/* 作用：创建并初始化音频重采样上下文的函数，它通常用于音频处理和转码过程中
 * out_ch_layout：输出声道布局（channel layout）。
 * out_sample_fmt：输出采样格式（sample format）。
 * out_sample_rate：输出采样率。
 * in_ch_layout：输入声道布局。
 * in_sample_fmt：输入采样格式。
 * in_sample_rate：输入采样率。
 */
const AVClass *swr_get_class(void);

2、使用av_opt_set_int函数设置重采样输入输出参数

/* 作用：用于设置音视频编解码器选项,可以在编解码过程中动态地修改音视频编解码器的参数。
 * obj：要设置选项的对象，通常是一个 AVCodecContext 或 AVFormatContext 结构体。
 * name：选项的名称。
 * val：要设置的整型数值。
 * search_flags：搜索标志，用于指定选项的搜索行为，例如 AV_OPT_SEARCH_CHILDREN 表示也在子对象中搜索。
 */
int av_opt_set_int(void *obj, const char *name, int64_t val, int search_flags);

3、使用swr_init函数初始化重采样器

/* 作用：用于初始化音频重采样上下文，确保上下文已经准备好进行音频重采样操作。
 * s：指向要初始化的音频重采样上下文的指针。
 */ 
int swr_init(struct SwrContext *s);

4、使用av_get_channel_layout_nb_channels函数计算输入源的通道数

/* 作用：获取指定声道布局（channel layout）所包含的声道数量的函数。给定一个声道布局值，该函数会返回该声道布局中包含的声道数量。
 * channel_layout：要查询的声道布局值，通常以64位整数（uint64_t）表示。
 */ 
int av_get_channel_layout_nb_channels(uint64_t channel_layout);

5、给输入源分配内存空间–av_samples_alloc_array_and_samples

/* 作用：分配存储音频样本数据的内存空间的函数，通常在进行音频处理和编解码时使用。这个函数会为音频数据分配所需的内存空间，并返回一个指向音频数据指针数组的指针。
 * audio_data：指向指针数组的指针，用于存储音频样本数据的指针数组。
 * linesize：指向整数数组的指针，用于存储每个声道数据的行大小（即每行的字节数）。
 * nb_channels：声道数，表示音频数据包含的声道数量。
 * nb_samples：每个声道的样本数。
 * sample_fmt：音频样本的格式，使用 AVSampleFormat 枚举类型表示。
 * align：内存对齐方式，通常设置为0表示默认对齐。
 */ 
int av_samples_alloc(uint8_t **audio_data, int *linesize, int nb_channels,
                     int nb_samples, enum AVSampleFormat sample_fmt, int align);

6、计算输出采样数量–av_rescale_rnd

/* 作用：用来将一个有理数按比例缩放到另一个有理数，并根据指定的舍入方式进行舍入。
 * a：要缩放的有理数。
 * b：缩放的比例因子。
 * c：要除以的比例因子。
 * rnd：舍入方式，使用 AVRounding 枚举类型表示，可以选择不同的舍入模式，如向上舍入、向下舍入等。
 */ 
int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd) av_const;

7、分配输出缓存内存–av_samples_alloc_array_and_samples

/* 作用：为音频数据分配所需的内存空间，并返回一个指向音频数据指针数组的指针。
 * audio_data：指向指针数组的指针，用于存储音频样本数据的指针数组。
 * linesize：指向整数数组的指针，用于存储每个声道数据的行大小（即每行的字节数）。
 * nb_channels：声道数，表示音频数据包含的声道数量。
 * nb_samples：每个声道的样本数。
 * sample_fmt：音频样本的格式，使用 AVSampleFormat 枚举类型表示。
 * align：内存对齐方式，通常设置为0表示默认对齐。
 */
int av_samples_alloc_array_and_samples(uint8_t ***audio_data, int *linesize, int nb_channels,int nb_samples, enum AVSampleFormat sample_fmt, int align);

8、计算输出缓冲区所需的延迟大小，以便进行合理的处理和同步。–swr_get_delay

/* 该函数可以获取由于音频重采样引入的延迟，返回的值以 base 为单位。这个延迟值可以帮助用户在处理音频数据时进行适当的时间调整，确保音视频同步等需要精确时间控制的场景得到正确的处理。
 * s：指向 SwrContext 结构体的指针，表示音频重采样上下文。
 * base：基准时间单位，通常是采样率的倒数（1/采样率）。
 */ 
int64_t swr_get_delay(struct SwrContext *s, int64_t base);

9、计算加上延迟采样数后的输出采样数量—av_rescale_rnd

10、将音频进行重采样转换–swr_convert

/* 作用：将特定音频流的采样率转换为指定的目标采样率，通常在音频处理和转码过程中使用
 * swrContext：swr 上下文，包含了执行采样率转换所需的信息。
 * out：输出音频数据缓冲区。
 * out_count：输出缓冲区的大小。
 * in：输入音频数据缓冲区。
 * in_count：输入缓冲区的大小。
*/
int swr_convert(struct SwrContext *s, uint8_t **out, int out_count,
                                const uint8_t **in , int in_count);

11、获取重采样后的音频数据–av_samples_get_buffer_size

/* 该函数可以根据给定的声道数、样本数、样本格式等信息，计算出存储音频样本数据所需的缓冲区大小，并返回计算得到的大小值。这个大小值可以帮助用户在分配内存时确保足够的空间来存储音频数据。
 * linesize：每个声道的行大小（即每行的字节数）。
 * nb_channels：声道数，表示音频数据包含的声道数量。
 * nb_samples：每个声道的样本数。
 * sample_fmt：音频样本的格式，使用 AVSampleFormat 枚举类型表示。
 * align：内存对齐方式，通常设置为0表示默认对齐。
 */ 
int av_samples_get_buffer_size(int *linesize, int nb_channels, int nb_samples,enum AVSampleFormat sample_fmt, int align);

/*
 * Copyright (c) 2012 Stefano Sabatini
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

/**
 * @example resampling_audio.c
 * libswresample API use example.
 */

#include <libavutil/opt.h>
#include <libavutil/channel_layout.h>
#include <libavutil/samplefmt.h>
#include <libswresample/swresample.h>

static int get_format_from_sample_fmt(const char **fmt,
                                      enum AVSampleFormat sample_fmt)
{
    int i;
    struct sample_fmt_entry {
        enum AVSampleFormat sample_fmt; const char *fmt_be, *fmt_le;
    } sample_fmt_entries[] = {
    { AV_SAMPLE_FMT_U8,  "u8",    "u8"    },
    { AV_SAMPLE_FMT_S16, "s16be", "s16le" },
    { AV_SAMPLE_FMT_S32, "s32be", "s32le" },
    { AV_SAMPLE_FMT_FLT, "f32be", "f32le" },
    { AV_SAMPLE_FMT_DBL, "f64be", "f64le" },
};
    *fmt = NULL;

    for (i = 0; i < FF_ARRAY_ELEMS(sample_fmt_entries); i++) {
        struct sample_fmt_entry *entry = &sample_fmt_entries[i];
        if (sample_fmt == entry->sample_fmt) {
            *fmt = AV_NE(entry->fmt_be, entry->fmt_le);
            return 0;
        }
    }

    fprintf(stderr,
            "Sample format %s not supported as output format\n",
            av_get_sample_fmt_name(sample_fmt));
    return AVERROR(EINVAL);
}

/**
 * Fill dst buffer with nb_samples, generated starting from t. 交错模式的
 */
static void fill_samples(double *dst, int nb_samples, int nb_channels, int sample_rate, double *t)
{
    int i, j;
    double tincr = 1.0 / sample_rate, *dstp = dst;
    const double c = 2 * M_PI * 440.0;

    /* generate sin tone with 440Hz frequency and duplicated channels */
    for (i = 0; i < nb_samples; i++) {
        *dstp = sin(c * *t);
        for (j = 1; j < nb_channels; j++)
            dstp[j] = dstp[0];
        dstp += nb_channels;
        *t += tincr;
    }
}

int main(int argc, char **argv)
{
    // 输入参数
    int64_t src_ch_layout = AV_CH_LAYOUT_STEREO;
    int src_rate = 48000;
    enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_DBL;
    int src_nb_channels = 0;
    uint8_t **src_data = NULL;  // 二级指针
    int src_linesize;
    int src_nb_samples = 1024;


    // 输出参数
    int64_t dst_ch_layout = AV_CH_LAYOUT_STEREO;
    int dst_rate = 44100;
    enum AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_S16;
    int dst_nb_channels = 0;
    uint8_t **dst_data = NULL;  //二级指针
    int dst_linesize;
    int dst_nb_samples;
    int max_dst_nb_samples;

    // 输出文件
    const char *dst_filename = NULL;    // 保存输出的pcm到本地，然后播放验证
    FILE *dst_file;


    int dst_bufsize;
    const char *fmt;

    // 重采样实例
    struct SwrContext *swr_ctx;

    double t;
    int ret;

    if (argc != 2) {
        fprintf(stderr, "Usage: %s output_file\n"
                        "API example program to show how to resample an audio stream with libswresample.\n"
                        "This program generates a series of audio frames, resamples them to a specified "
                        "output format and rate and saves them to an output file named output_file.\n",
                argv[0]);
        exit(1);
    }
    dst_filename = argv[1];

    dst_file = fopen(dst_filename, "wb");
    if (!dst_file) {
        fprintf(stderr, "Could not open destination file %s\n", dst_filename);
        exit(1);
    }

    // 创建重采样器
    /* create resampler context */
    swr_ctx = swr_alloc();
    if (!swr_ctx) {
        fprintf(stderr, "Could not allocate resampler context\n");
        ret = AVERROR(ENOMEM);
        goto end;
    }

    // 设置重采样参数
    /* set options */
    // 输入参数
    av_opt_set_int(swr_ctx, "in_channel_layout",    src_ch_layout, 0);
    av_opt_set_int(swr_ctx, "in_sample_rate",       src_rate, 0);
    av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0);
    // 输出参数
    av_opt_set_int(swr_ctx, "out_channel_layout",    dst_ch_layout, 0);
    av_opt_set_int(swr_ctx, "out_sample_rate",       dst_rate, 0);
    av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);

    // 初始化重采样
    /* initialize the resampling context */
    if ((ret = swr_init(swr_ctx)) < 0) {
        fprintf(stderr, "Failed to initialize the resampling context\n");
        goto end;
    }

    /* allocate source and destination samples buffers */
    // 计算出输入源的通道数量
    src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
    // 给输入源分配内存空间
    ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize, src_nb_channels,
                                             src_nb_samples, src_sample_fmt, 0);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate source samples\n");
        goto end;
    }

    /* compute the number of converted samples: buffering is avoided
     * ensuring that the output buffer will contain at least all the
     * converted input samples */
    // 计算输出采样数量
    max_dst_nb_samples = dst_nb_samples =
            av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);

    /* buffer is going to be directly written to a rawaudio file, no alignment */
    dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
    // 分配输出缓存内存
    ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, dst_nb_channels,
                                             dst_nb_samples, dst_sample_fmt, 0);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate destination samples\n");
        goto end;
    }

    t = 0;
    do {
        /* generate synthetic audio */
        // 生成输入源
        fill_samples((double *)src_data[0], src_nb_samples, src_nb_channels, src_rate, &t);

        /* compute destination number of samples */
        int64_t delay = swr_get_delay(swr_ctx, src_rate);
        dst_nb_samples = av_rescale_rnd(delay + src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
        if (dst_nb_samples > max_dst_nb_samples) {
            av_freep(&dst_data[0]);
            ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels,
                                   dst_nb_samples, dst_sample_fmt, 1);
            if (ret < 0)
                break;
            max_dst_nb_samples = dst_nb_samples;
        }
        //        int fifo_size = swr_get_out_samples(swr_ctx,src_nb_samples);
        //        printf("fifo_size:%d\n", fifo_size);
        //        if(fifo_size < 1024)
        //            continue;

        /* convert to destination format */
        //        ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t **)src_data, src_nb_samples);
        ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t **)src_data, src_nb_samples);
        if (ret < 0) {
            fprintf(stderr, "Error while converting\n");
            goto end;
        }
        dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
                                                 ret, dst_sample_fmt, 1);
        if (dst_bufsize < 0) {
            fprintf(stderr, "Could not get sample buffer size\n");
            goto end;
        }
        printf("t:%f in:%d out:%d\n", t, src_nb_samples, ret);
        fwrite(dst_data[0], 1, dst_bufsize, dst_file);
    } while (t < 10);

    ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, NULL, 0);
    if (ret < 0) {
        fprintf(stderr, "Error while converting\n");
        goto end;
    }
    dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
                                             ret, dst_sample_fmt, 1);
    if (dst_bufsize < 0) {
        fprintf(stderr, "Could not get sample buffer size\n");
        goto end;
    }
    printf("flush in:%d out:%d\n", 0, ret);
    fwrite(dst_data[0], 1, dst_bufsize, dst_file);


    if ((ret = get_format_from_sample_fmt(&fmt, dst_sample_fmt)) < 0)
        goto end;
    fprintf(stderr, "Resampling succeeded. Play the output file with the command:\n"
                    "ffplay -f %s -channel_layout %lld -channels %d -ar %d %s\n",
            fmt, dst_ch_layout, dst_nb_channels, dst_rate, dst_filename);

end:
    fclose(dst_file);

    if (src_data)
        av_freep(&src_data[0]);
    av_freep(&src_data);

    if (dst_data)
        av_freep(&dst_data[0]);
    av_freep(&dst_data);

    swr_free(&swr_ctx);
    return ret < 0;
}