ffmpeg api-alac-text.c

jinjie412

已于 2024-01-27 14:30:44 修改

阅读量592

点赞数 7

分类专栏： ffmpeg 4.0.4 示例代码讲解文章标签： ffmpeg c语言 c++

于 2024-01-08 16:23:31 首次发布

本文链接：https://blog.csdn.net/jinjie412/article/details/135460228

版权

ffmpeg 4.0.4 示例代码讲解专栏收录该内容

5 篇文章 0 订阅

订阅专栏

generate_raw_frame

这个函数接受一个 frame_data 数组作为参数，用于存储音频数据。i 参数表示当前帧的索引，sample_rate 是采样率，channels 是声道数，frame_size 是帧大小。函数使用一个简单的算法生成音频数据，然后将其存储在 frame_data 数组中。

具体来说，它遍历帧中的每个采样点，根据公式 10000 * ((j / 10 * i) % 2) 生成音频数据的第一个声道，并根据声道数生成其他声道的数据。最后，它将生成的音频数据存储在 frame_data 数组中，并返回 0 表示成功生成音频帧。

假设我们调用 generate_raw_frame 函数来生成一个采样率为 44100Hz，双声道（立体声）的音频帧，帧大小为 1024 个采样点：

uint16_t frame_data[1024 * 2]; // 1024 个采样点，每个采样点包含双声道数据

generate_raw_frame(frame_data, 0, 44100, 2, 1024); // 生成第一帧的音频数据

此时，frame_data 数组中将包含 1024 个采样点的音频数据，每个采样点包含左右两个声道的数据。


首先，我们声明一个 uint16_t 类型的数组 frame_data，用来存储生成的音频帧数据。这个数组的大小为 1024 * 2，因为我们有 1024 个采样点，每个采样点包含左右两个声道的数据。

然后，我们调用 generate_raw_frame 函数，并传入以下参数：

frame_data：指向要填充的音频帧数据的指针，即我们声明的 frame_data 数组的首地址。
0：当前帧的索引。在这个例子中，我们传入 0，表示生成的是第一帧的音频数据。
44100：采样率为 44100Hz。
2：声道数为 2，表示双声道（立体声）。
1024：帧大小为 1024 个采样点。
generate_raw_frame 函数开始执行。它使用一个嵌套的循环来填充音频帧数据：

外层循环 for (j = 0; j < frame_size; j++) 遍历每个采样点。
内层循环 for (k = 1; k < channels; k++) 遍历每个声道（从第二个声道开始）。
在每个采样点内部，首先计算了第一个声道的采样值 frame_data[channels * j] = 10000 * ((j / 10 * i) % 2)。这里的计算方式是一个简单的模式，用来生成音频波形数据。采样值的范围是 0 到 10000。
然后，对于其他声道，它们的采样值是第一个声道采样值的倍数，frame_data[channels * j + k] = frame_data[channels * j] * (k + 1)。
generate_raw_frame 函数执行完毕后，frame_data 数组中将包含 1024 个采样点的音频数据，每个采样点包含左右两个声道的数据，可以用来表示一帧音频数据。

/**
 * 生成原始音频帧
 * @param frame_data 存储音频数据的数组
 * @param i 当前帧的索引
 * @param sample_rate 采样率
 * @param channels 声道数
 * @param frame_size 帧大小
 * @return 成功返回 0，失败返回其他值
 */
static int generate_raw_frame(uint16_t *frame_data, int i, int sample_rate,
                              int channels, int frame_size)
{
    int j, k;

    for (j = 0; j < frame_size; j++) {
        // 根据公式生成音频数据
        frame_data[channels * j] = 10000 * ((j / 10 * i) % 2);
        for (k = 1; k < channels; k++)
            frame_data[channels * j + k] = frame_data[channels * j] * (k + 1);
    }
    return 0; // 成功生成音频帧
}

init_encoder

这个函数接受一个编码器 enc、一个指向编码器上下文指针的指针 enc_ctx、声道布局 ch_layout 和采样率 sample_rate 作为参数。
它首先使用 av_get_channel_layout_string 函数获取声道布局的字符串表示形式，并打印声道布局和采样率的信息。
然后，它使用 avcodec_alloc_context3 函数为编码器分配上下文内存，并设置上下文的参数。最后，它使用 avcodec_open2 函数打开编码器。

函数返回 0 表示成功初始化编码器，并将分配的编码器上下文存储在传入的指针 enc_ctx 中。如果内存分配失败或打开编码器失败，
函数将返回相应的错误代码。

void av_get_channel_layout_string(char *buf, int buf_size, int nb_channels, uint64_t channel_layout)
参数说明：

buf：指向存储结果的缓冲区的指针。
buf_size：缓冲区的大小。
nb_channels：声道数量。
channel_layout：声道布局。
这个函数会根据传入的 channel_layout 参数生成对应的声道布局字符串，并将结果存储在 buf 缓冲区中。例如，如果我们要获取立体声（左右声道）的声道布局字符串，可以这样使用：

char layout_str[256]; // 声道布局字符串存储缓冲区
uint64_t layout = AV_CH_LAYOUT_STEREO; // 立体声声道布局

av_get_channel_layout_string(layout_str, sizeof(layout_str), 0, layout);

printf("Channel layout: %s\n", layout_str);

Channel layout: stereo

这里 AV_CH_LAYOUT_STEREO 是一个宏，表示立体声声道布局，对应的字符串为 "stereo"。



/*
*
 * 初始化编码器
 * @param enc 编码器
 * @param enc_ctx 指向编码器上下文指针的指针，将分配的编码器上下文存储在这里
 * @param ch_layout 声道布局
 * @param sample_rate 采样率
 * @return 成功返回 0，失败返回错误代码
 */
static int init_encoder(AVCodec *enc, AVCodecContext **enc_ctx,
                        int64_t ch_layout, int sample_rate)
{
    AVCodecContext *ctx;
    int result;
    char name_buff[NAME_BUFF_SIZE]; // 用于存储声道布局字符串的缓冲区

    // 获取声道布局的字符串表示形式
    av_get_channel_layout_string(name_buff, NAME_BUFF_SIZE, 0, ch_layout);
    // 打印声道布局和采样率信息
    av_log(NULL, AV_LOG_INFO, "channel layout: %s, sample rate: %i\n", name_buff, sample_rate);

    // 为编码器分配上下文内存
    ctx = avcodec_alloc_context3(enc);
    if (!ctx) {
        av_log(NULL, AV_LOG_ERROR, "Can't allocate encoder context\n");
        return AVERROR(ENOMEM); // 内存分配失败
    }

    // 设置编码器上下文的参数
    ctx->sample_fmt = AV_SAMPLE_FMT_S16; // 采样格式为 16 位整数
    ctx->sample_rate = sample_rate; // 设置采样率
    ctx->channel_layout = ch_layout; // 设置声道布局

    // 打开编码器
    result = avcodec_open2(ctx, enc, NULL);
    if (result < 0) {
        av_log(ctx, AV_LOG_ERROR, "Can't open encoder\n");
        return result; // 打开编码器失败，返回错误代码
    }

    // 将分配的编码器上下文存储在传入的指针中
    *enc_ctx = ctx;
    return 0; // 初始化编码器成功
}

init_decoder

这个函数接受一个解码器 dec、一个指向解码器上下文指针的指针 dec_ctx 和声道布局 ch_layout 作为参数。
它首先使用 avcodec_alloc_context3 函数为解码器分配上下文内存，并设置上下文的参数。然后，它使用 a
vcodec_open2 函数打开解码器。
函数返回 0 表示成功初始化解码器，并将分配的解码器上下文存储在传入的指针 dec_ctx 中。如果内存分配失
败或打开解码器失败，函数将返回相应的错误代码。
/**
 * 初始化解码器
 * @param dec 解码器
 * @param dec_ctx 指向解码器上下文指针的指针，将分配的解码器上下文存储在这里
 * @param ch_layout 声道布局
 * @return 成功返回 0，失败返回错误代码
 */
static int init_decoder(AVCodec *dec, AVCodecContext **dec_ctx,
                        int64_t ch_layout)
{
    AVCodecContext *ctx;
    int result;

    // 为解码器分配上下文内存
    ctx = avcodec_alloc_context3(dec);
    if (!ctx) {
        av_log(NULL, AV_LOG_ERROR , "Can't allocate decoder context\n");
        return AVERROR(ENOMEM); // 内存分配失败
    }

    // 设置解码器上下文的参数
    ctx->request_sample_fmt = AV_SAMPLE_FMT_S16; // 请求采样格式为 16 位整数
    /* XXX: FLAC ignores it for some reason */
    ctx->request_channel_layout = ch_layout; // 请求声道布局
    ctx->channel_layout = ch_layout; // 设置解码器的声道布局

    // 打开解码器
    result = avcodec_open2(ctx, dec, NULL);
    if (result < 0) {
        av_log(ctx, AV_LOG_ERROR, "Can't open decoder\n");
        return result; // 打开解码器失败，返回错误代码
    }

    // 将分配的解码器上下文存储在传入的指针中
    *dec_ctx = ctx;
    return 0; // 初始化解码器成功
}

run_test

用于执行编码器和解码器的测试的主要函数。它包括对音频数据进行编码和解码，然后比较编码前后的数据是否一致。
这个函数的主要功能是生成原始音频数据，然后将其编码为 FLAC 格式，再解码回原始数据，并比较解码后的数据
与原始数据是否一致。函数首先分配输入和输出音频帧的内存，然后循环进行编码和解码的测试。在每一次循环中，
它使用 generate_raw_frame 函数生成原始音频数据，并将其拷贝到输入原始音频数据缓冲区。接着，它调用 avcodec_encode_audio2 函数对音频帧进行编码，并使用 avcodec_decode_audio4 函数对编码后的数据进行解码。
解码后，它比较解码前后的音频数据是否一致。最后，它释放分配的内存，并返回测试结果。

/**
 * 执行编码器和解码器的测试
 * @param enc 编码器
 * @param dec 解码器
 * @param enc_ctx 编码器上下文
 * @param dec_ctx 解码器上下文
 * @return 成功返回 0，失败返回错误代码
 */
static int run_test(AVCodec *enc, AVCodec *dec, AVCodecContext *enc_ctx,
                    AVCodecContext *dec_ctx)
{
    AVPacket enc_pkt; // 编码后的数据包
    AVFrame *in_frame, *out_frame; // 输入和输出的音频帧
    uint8_t *raw_in = NULL, *raw_out = NULL; // 输入和输出的原始音频数据
    int in_offset = 0, out_offset = 0; // 输入和输出数据的偏移量
    int result = 0; // 返回结果
    int got_output = 0; // 是否得到输出的标志
    int i = 0; // 循环计数器
    int in_frame_bytes, out_frame_bytes; // 输入和输出音频帧的字节数

    // 分配输入音频帧内存
    in_frame = av_frame_alloc();
    if (!in_frame) {
        av_log(NULL, AV_LOG_ERROR, "Can't allocate input frame\n");
        return AVERROR(ENOMEM); // 分配内存失败
    }

    // 设置输入音频帧的参数
    in_frame->nb_samples = enc_ctx->frame_size;
    in_frame->format = enc_ctx->sample_fmt;
    in_frame->channel_layout = enc_ctx->channel_layout;
    if (av_frame_get_buffer(in_frame, 32) != 0) {
        av_log(NULL, AV_LOG_ERROR, "Can't allocate a buffer for input frame\n");
        return AVERROR(ENOMEM); // 分配内存失败
    }

    // 分配输出音频帧内存
    out_frame = av_frame_alloc();
    if (!out_frame) {
        av_log(NULL, AV_LOG_ERROR, "Can't allocate output frame\n");
        return AVERROR(ENOMEM); // 分配内存失败
    }

    // 分配输入和输出的原始音频数据内存
    raw_in = av_malloc(in_frame->linesize[0] * NUMBER_OF_FRAMES);
    if (!raw_in) {
        av_log(NULL, AV_LOG_ERROR, "Can't allocate memory for raw_in\n");
        return AVERROR(ENOMEM); // 分配内存失败
    }

    raw_out = av_malloc(in_frame->linesize[0] * NUMBER_OF_FRAMES);
    if (!raw_out) {
        av_log(NULL, AV_LOG_ERROR, "Can't allocate memory for raw_out\n");
        return AVERROR(ENOMEM); // 分配内存失败
    }

    // 循环进行编码和解码的测试
    for (i = 0; i < NUMBER_OF_FRAMES; i++) {
        av_init_packet(&enc_pkt); // 初始化编码后的数据包
        enc_pkt.data = NULL;
        enc_pkt.size = 0;

        // 生成原始音频帧
        generate_raw_frame((uint16_t*)(in_frame->data[0]), i, enc_ctx->sample_rate,
                           enc_ctx->channels, enc_ctx->frame_size);
        in_frame_bytes = in_frame->nb_samples * in_frame->channels * sizeof(uint16_t);
        if (in_frame_bytes > in_frame->linesize[0]) {
            av_log(NULL, AV_LOG_ERROR, "Incorrect value of input frame linesize\n");
            return 1; // 输入帧的数据不正确
        }
        // 将原始音频数据拷贝到输入原始音频数据缓冲区
        memcpy(raw_in + in_offset, in_frame->data[0], in_frame_bytes);
        in_offset += in_frame_bytes;

        // 编码音频帧
        result = avcodec_encode_audio2(enc_ctx, &enc_pkt, in_frame, &got_output);
        if (result < 0) {
            av_log(NULL, AV_LOG_ERROR, "Error encoding audio frame\n");
            return result; // 编码失败，返回错误代码
        }

        // 如果有编码输出，进行解码
        if (got_output) {
            // 解码音频包
            result = avcodec_decode_audio4(dec_ctx, out_frame, &got_output, &enc_pkt);
            if (result < 0) {
                av_log(NULL, AV_LOG_ERROR, "Error decoding audio packet\n");
                return result; // 解码失败，返回错误代码
            }

            if (got_output) {
                // 检查解码后的音频帧参数
                if (in_frame->nb_samples != out_frame->nb_samples) {
                    av_log(NULL, AV_LOG_ERROR, "Error frames before and after decoding has different number of samples\n");
                    return AVERROR_UNKNOWN; // 解码后的音频帧参数不正确
                }
                if (in_frame->channel_layout != out_frame->channel_layout) {
                    av_log(NULL, AV_LOG_ERROR, "Error frames before and after decoding has different channel layout\n");
                    return AVERROR_UNKNOWN; // 解码后的音频帧参数不正确
                }
                if (in_frame->format != out_frame->format) {
                    av_log(NULL, AV_LOG_ERROR, "Error frames before and after decoding has different sample format\n");
                    return AVERROR_UNKNOWN; // 解码后的音频帧参数不正确
                }
                // 将解码后的音频数据拷贝到输出原始音频数据缓冲区
                out_frame_bytes = out_frame->nb_samples * out_frame->channels * sizeof(uint16_t);
                if (out_frame_bytes > out_frame->linesize[0]) {
                    av_log(NULL, AV_LOG_ERROR, "Incorrect value of output frame linesize\n");
                    return 1; // 输出帧的数据不正确
                }
                memcpy(raw_out + out_offset, out_frame->data[0], out_frame_bytes);
                out_offset += out_frame_bytes;
            }
        }
        av_packet_unref(&enc_pkt); // 释放编码后的数据包
    }

    // 比较编码前后的音频数据是否一致
    if (memcmp(raw_in, raw_out, out_frame_bytes * NUMBER_OF_FRAMES) != 0) {
        av_log(NULL, AV_LOG_ERROR, "Output differs\n");
        return 1; // 编码后的数据与解码前的数据不一致
    }

    av_log(NULL, AV_LOG_INFO, "OK\n"); // 测试通过

	//保存生成的音频文件
    char layout_name_buff[NAME_BUFF_SIZE];
    av_get_channel_layout_string(layout_name_buff, NAME_BUFF_SIZE, 0, enc_ctx->channel_layout);

    char filename[100];
    snprintf(filename, sizeof(filename), "output_%s.raw", layout_name_buff);
    FILE* outfile = fopen(filename, "wb");
    if (!outfile) {
        av_log(NULL, AV_LOG_ERROR, "Can't open output file\n");
        return 1;
    }

    fwrite(raw_out, 1, out_offset, outfile);
    fclose(outfile);



    // 释放分配的内存
    av_freep(&raw_in);
    av_freep(&raw_out);
    av_frame_free(&in_frame);
    av_frame_free(&out_frame);
    return 0; // 测试成功
}

main

/**
 * 程序入口点，执行编码器和解码器的测试
 * @return 成功返回 0，失败返回错误代码

AV_CH_LAYOUT_STEREO: 这表示立体声音频通道布局，即左右两个声道。在这种布局下，左声道通常用于表示音频的左侧声音，右声道用于表示
右侧声音。这是最常见的音频通道布局之一。

AV_CH_LAYOUT_5POINT1_BACK: 这是一个 5.1 声道的音频通道布局，也称为“5.1 后置声道布局”。它包括一个中置声道（Center）、
两个前置声道（Front Left 和 Front Right）、两个环绕声道（Surround Left 和 Surround Right）、一个低频增强声道（LFE）。
此外，还有两个后置环绕声道（Back Left 和 Back Right），用于在 7.1 系统中提供额外的环绕效果。

AV_CH_LAYOUT_SURROUND: 这表示环绕声道布局，通常包括一个中心声道（Center）、一个低频增强声道（LFE）、两个前置声道
（Front Left 和 Front Right）、两个环绕声道（Surround Left 和 Surround Right）。这是一个常见的环绕声道布局。

AV_CH_LAYOUT_STEREO_DOWNMIX: 这是一个混合的立体声音频通道布局，通常用于表示将多声道音频（如 5.1 或 7.1 声道）混合为立
体声的输出。在这种布局下，可能会进行混音、平衡和降噪等处理，以适应立体声播放环境。
 */
int main(void)
{
    AVCodec *enc = NULL, *dec = NULL; // 编码器和解码器
    AVCodecContext *enc_ctx = NULL, *dec_ctx = NULL; // 编码器和解码器上下文
    uint64_t channel_layouts[] = {AV_CH_LAYOUT_STEREO, AV_CH_LAYOUT_5POINT1_BACK, AV_CH_LAYOUT_SURROUND, AV_CH_LAYOUT_STEREO_DOWNMIX}; // 声道布局数组
    int sample_rates[] = {8000, 44100, 48000, 192000}; // 采样率数组
    int cl, sr; // 循环计数器

    // 查找 FLAC 编码器
    enc = avcodec_find_encoder(AV_CODEC_ID_FLAC);
    if (!enc) {
        av_log(NULL, AV_LOG_ERROR, "Can't find encoder\n");
        return 1; // 找不到编码器，返回错误代码
    }

    // 查找 FLAC 解码器
    dec = avcodec_find_decoder(AV_CODEC_ID_FLAC);
    if (!dec) {
        av_log(NULL, AV_LOG_ERROR, "Can't find decoder\n");
        return 1; // 找不到解码器，返回错误代码
    }

    // 循环遍历声道布局和采样率数组，进行编码器和解码器的测试
    for (cl = 0; cl < FF_ARRAY_ELEMS(channel_layouts); cl++) {
        for (sr = 0; sr < FF_ARRAY_ELEMS(sample_rates); sr++) {
            // 初始化编码器上下文
            if (init_encoder(enc, &enc_ctx, channel_layouts[cl], sample_rates[sr]) != 0)
                return 1; // 初始化编码器上下文失败，返回错误代码

            // 初始化解码器上下文
            if (init_decoder(dec, &dec_ctx, channel_layouts[cl]) != 0)
                return 1; // 初始化解码器上下文失败，返回错误代码

            // 执行编码器和解码器的测试
            if (run_test(enc, dec, enc_ctx, dec_ctx) != 0)
                return 1; // 执行测试失败，返回错误代码

            // 关闭编码器和解码器
            close_encoder(&enc_ctx);
            close_decoder(&dec_ctx);
        }
    }

    return 0; // 所有测试执行成功，返回 0
}