FFmpeg AAC编解码、重采样实战

D^ε^S

已于 2024-07-18 15:55:28 修改

阅读量626

点赞数 10

文章标签： ffmpeg aac

于 2024-03-05 16:14:14 首次发布

本文链接：https://blog.csdn.net/weixin_43147845/article/details/136480382

版权

1、AAC解码

1.1 打开解码器

//解码初始化
AVCodecContext* audio_decode_init(){
    //查找解码器
    const AVCodec* audio_codec = avcodec_find_decoder(AV_CODEC_ID_AAC);
    if (!audio_codec) {
        fprintf(stderr, "Codec not found\n");
        exit(1);
    }
    //分配解码上下文
    AVCodecContext* audio_codec_ctx = avcodec_alloc_context3(audio_codec);
    if (!audio_codec_ctx) {
        fprintf(stderr, "Could not allocate audio codec context\n");
        exit(1);
    }

    //将解码器和解码器上下文进行关联
    int ret = avcodec_open2(audio_codec_ctx, audio_codec, NULL);
    if (ret < 0) {
        fprintf(stderr, "Could not open codec\n");
        exit(1);
    }
    return audio_codec_ctx;
}

1.2 读取aac数据

while (av_read_frame(ifmt_ctx, &pkt) >= 0) {
    if (pkt.stream_index == audio_index) {
        //音频

    }
    else if(pkt.stream_index == video_index){
        //视频
    }
    av_packet_unref(&pkt);
}

1.3 送入解码器并获取PCM

ret = avcodec_send_packet(dec_ctx, pkt);
if (ret < 0) {
    fprintf(stderr, "Error submitting the packet to the decoder\n");
    exit(1);
}

while (ret >= 0) {
    ret = avcodec_receive_frame(dec_ctx, frame);
    if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
        return;
    else if (ret < 0) {
        fprintf(stderr, "Error during decoding\n");
        exit(1);
    }
    // frame中保存了PCM数据
}

2、重采样

不同的音频设备或系统可能有不同的采样率要求。如果一个音频文件的采样率与目标设备不匹配，就需要进行重采样以使其适应目标设备。PCM三要素是采样率、量化位数、通道数，重采样就是改变三要素。

2.1 创建一个重采样上下文

// swr_alloc_set_opts 函数是FFmpeg中用于创建 SwrContext（音频重采样上下文）的函数
// struct SwrContext *s: 用于存储音频重采样上下文的指针。这个参数可以传递一个已经存在的 SwrContext，如果为 NULL，则函数会创建一个新的上下文。

// int64_t out_ch_layout: 输出音频的声道布局

// enum AVSampleFormat out_sample_fmt: 输出音频的采样格式。

// int out_sample_rate: 输出音频的采样率

// int64_t in_ch_layout: 输入音频的声道布局

// enum AVSampleFormat in_sample_fmt: 输入音频的采样格式

// int in_sample_rate: 输入音频的采样率

// int log_offset: 用于设置日志偏移，影响日志级别。这个参数通常设置为0。

// void *log_ctx: 用于指定日志上下文的指针，如果为 NULL，则会使用默认的日志上下文。

// 函数的作用是创建一个 SwrContext 上下文，并设置相关的参数。SwrContext 用于进行音频重采样的配置和操作。调用该函数后，需要使用 swr_init 函数来初始化上下文，之后就可以使用 swr_convert 进行实际的重采样操作。
struct SwrContext *swr_alloc_set_opts(struct SwrContext *s,
                                      int64_t out_ch_layout, enum AVSampleFormat out_sample_fmt, int out_sample_rate,
                                      int64_t  in_ch_layout, enum AVSampleFormat  in_sample_fmt, int  in_sample_rate,
                                      int log_offset, void *log_ctx);

2.2 初始重采样上下文

int swr_init(struct SwrContext *s);

2.3 重采样

// swr_convert 函数用于执行音频重采样操作，将输入音频数据转换为输出音频数据。

// struct SwrContext *s: 音频重采样上下文

// uint8_t **out: 指向输出音频数据缓冲区的指针

// int out_count: 输出缓冲区的采样点数，而不是字节数

// const uint8_t **in: 指向输入音频数据缓冲区的指针

// int in_count: 输入缓冲区的采样点数，而不是字节数
int swr_convert(struct SwrContext *s, uint8_t **out, int out_count,
                                const uint8_t **in , int in_count);

3、AAC编码

编码和解码过程类似，只是编码API不一样

3.1 编码器初始化

编码需要指定一些编码参数，解码不需要，因为解码参数都写在aac数据里面，解码器拿到之后根据参数解码即可，但是编码需要指定编码成什么样的

// 编码初始化
AVCodecContext *audio_encode_init()
{
    AVCodecContext *c_ctx = NULL;
    AVCodec *codec = NULL;
    // codec = avcodec_find_encoder(AV_CODEC_ID_AAC);//libfdk_aac和aac的参数不一样
    codec = avcodec_find_encoder_by_name("libfdk_aac");
    if (!codec) {
        printf("EnCodec not found\n");
        exit(1);
    }
    c_ctx = avcodec_alloc_context3(codec);
    c_ctx->sample_fmt = enc_famt; // fdk_aac需要16位的音频输入
    if (!check_sample_fmt(codec, c_ctx->sample_fmt)) {
        printf("Encoder does not support sample format %s\n", av_get_sample_fmt_name(c_ctx->sample_fmt));
        exit(1);
    }
    c_ctx->channel_layout = AV_CH_LAYOUT_STEREO; // 输入音频的CHANNEL LAYOUT
    c_ctx->channels = 2;                         // 输入音频的声道数
    int sample_rate = select_sample_rate(codec);
    printf("enc sample_rate:%d\n", sample_rate);                                          // 44100
    printf("编码器采样格式 sample_fmt: %s\n", av_get_sample_fmt_name(c_ctx->sample_fmt)); // s16
    c_ctx->sample_rate = sample_rate;                                                     // 输入音频的采样率
    c_ctx->bit_rate = 0;                                                                  // AAC : 128K   AAV_HE: 64K  AAC_HE_V2: 32K. bit_rate为0时才会查找profile属性值
    c_ctx->profile = FF_PROFILE_AAC_LOW;                                                  // FF_PROFILE_AAC_LOW FF_PROFILE_AAC_HE_V2
    if (avcodec_open2(c_ctx, codec, NULL) < 0) {
        printf("audio_encode_init error\n");
        exit(1);
    }
    return c_ctx;
    printf("audio_encode_init ok\n");
}

3.2 pcm送入编码器，并从编码器获取aac

ret = avcodec_send_frame(ctx, frame);
if (ret < 0) {
    fprintf(stderr, "Error sending the frame to the encoder\n");
    exit(1);
}
while (ret >= 0) {
    ret = avcodec_receive_packet(ctx, pkt);
    if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
        return;
    else if (ret < 0) {
        fprintf(stderr, "Error encoding audio frame\n");
        exit(1);
    }
    // pkt中保存了编码后的aac
        
    av_packet_unref(pkt);
}

4、编解码缓冲区

编解码过程中编解码器内部会缓冲一些数据，即把数据送入到编解码器之后并不会立刻输出，一般都是会缓冲几帧数据的(可以设置去掉缓冲)，所以要在最后把编解码器缓冲的数据取出来，直接给编解码器传入NULL就可以了

解码：

AVPacket pkt;
pkt.data = NULL;
pkt.size = 0;
avcodec_send_packet(dec_ctx, &pkt);

编码：

avcodec_send_frame(ctx, NULL);

完整代码：

1、从mp4文件中读取aac进行解码

2、解码后的pcm进行第一次重采样,这一步的目的是为了让播放器可以正常播放，直接解码出来的pcm可能播放不正常，有杂音

3、对pcm进行第二次重采样，目的是把pcm转换成编码器需要的格式

4、编码成AAC并写入文件

运行之后会生成四个文件

test_out.aac //从mp4文件中分离出来的aac

test_out.pcm // 解码后的pcm，ffplay -ar 44100 -ac 2 -f s16le -i test_out.pcm 播放有杂音

swr.pcm // 第一次重采样的pcm，ffplay -ar 44100 -ac 2 -f s32le -i swr.pcm 播放正常

swr_encode.aac // 编码之后的aac


/**
 * aac解码->重采样(重采样成播放器需要的格式)->重采样(重采样成编码器需要的格式)->编码
 */
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libavutil/channel_layout.h>
#include <libavutil/log.h>
#include <libavutil/opt.h>
#include <libavutil/samplefmt.h>
#include <libswresample/swresample.h>
#include <stdio.h>

#define ADTS_HEADER_LEN 7;
char *aac_filename = "test_out.aac";
char *pcm_filename = "test_out.pcm";
FILE *aac_fd = NULL;
FILE *pcm_fd = NULL;

// 第一次重采样
uint8_t **dst_data = NULL; // 输出音频数据缓冲区
int dst_linesize;          // 输出音频行数据大小
char *swr_filename = "swr.pcm";
FILE *swr_fd = NULL;
enum AVSampleFormat src_sample_fmt;
enum AVSampleFormat dst_sample_fmt;
int src_nb_channels;
int dst_nb_channels;
int src_ratio;
int dst_ratio;
int src_nb_samples;
int dst_nb_samples;
// 编码
enum AVSampleFormat enc_famt = AV_SAMPLE_FMT_S16;
uint8_t **encode_dst_data = NULL; // 输出音频数据缓冲区
int encode_dst_linesize;          // 输出音频行数据大小
char *encode_filename = "swr_encode.aac";
FILE *encode_fd = NULL;
// 第二次重采样参数和编码器参数一致
enum AVSampleFormat encode_src_sample_fmt;
enum AVSampleFormat encode_dst_sample_fmt;
int encode_src_nb_channels;
int encode_dst_nb_channels;
int encode_src_ratio;
int encode_dst_ratio;
int encode_src_nb_samples;
int encode_dst_nb_samples;

const int sampling_frequencies[] = {
    96000, // 0x0
    88200, // 0x1
    64000, // 0x2
    48000, // 0x3
    44100, // 0x4
    32000, // 0x5
    24000, // 0x6
    22050, // 0x7
    16000, // 0x8
    12000, // 0x9
    11025, // 0xa
    8000   // 0xb
           // 0xc d e f是保留的
};
int adts_header(char *const p_adts_header, const int data_length,
                const int profile, const int samplerate,
                const int channels)
{

    int sampling_frequency_index = 3; // 默认使用48000hz
    int adtsLen = data_length + 7;

    int frequencies_size = sizeof(sampling_frequencies) / sizeof(sampling_frequencies[0]);
    int i = 0;
    for (i = 0; i < frequencies_size; i++) {
        if (sampling_frequencies[i] == samplerate) {
            sampling_frequency_index = i;
            break;
        }
    }
    if (i >= frequencies_size) {
        printf("unsupport samplerate:%d\n", samplerate);
        return -1;
    }

    p_adts_header[0] = 0xff;      // syncword:0xfff                          高8bits
    p_adts_header[1] = 0xf0;      // syncword:0xfff                          低4bits
    p_adts_header[1] |= (0 << 3); // MPEG Version:0 for MPEG-4,1 for MPEG-2  1bit
    p_adts_header[1] |= (0 << 1); // Layer:0                                 2bits
    p_adts_header[1] |= 1;        // protection absent:1                     1bit

    p_adts_header[2] = (profile) << 6; // profile:profile               2bits
    p_adts_header[2] |=
        (sampling_frequency_index & 0x0f) << 2; // sampling frequency index:sampling_frequency_index  4bits
    p_adts_header[2] |= (0 << 1);               // private bit:0                   1bit
    p_adts_header[2] |= (channels & 0x04) >> 2; // channel configuration:channels  高1bit

    p_adts_header[3] = (channels & 0x03) << 6;      // channel configuration:channels 低2bits
    p_adts_header[3] |= (0 << 5);                   // original：0                1bit
    p_adts_header[3] |= (0 << 4);                   // home：0                    1bit
    p_adts_header[3] |= (0 << 3);                   // copyright id bit：0        1bit
    p_adts_header[3] |= (0 << 2);                   // copyright id start：0      1bit
    p_adts_header[3] |= ((adtsLen & 0x1800) >> 11); // frame length：value   高2bits

    p_adts_header[4] = (uint8_t)((adtsLen & 0x7f8) >> 3); // frame length:value    中间8bits
    p_adts_header[5] = (uint8_t)((adtsLen & 0x7) << 5);   // frame length:value    低3bits
    p_adts_header[5] |= 0x1f;                             // buffer fullness:0x7ff 高5bits
    p_adts_header[6] = 0xfc;                              // buffer fullness:0x7ff 低6bits
    // number_of_raw_data_blocks_in_frame：
    //    表示ADTS帧中有number_of_raw_data_blocks_in_frame + 1个AAC原始帧。

    return 0;
}
// 解码初始化
AVCodecContext *audio_decode_init()
{
    // 查找解码器
    const AVCodec *audio_codec = avcodec_find_decoder(AV_CODEC_ID_AAC);
    if (!audio_codec) {
        fprintf(stderr, "Codec not found\n");
        exit(1);
    }
    // 分配解码上下文
    AVCodecContext *audio_codec_ctx = avcodec_alloc_context3(audio_codec);
    if (!audio_codec_ctx) {
        fprintf(stderr, "Could not allocate audio codec context\n");
        exit(1);
    }

    // 将解码器和解码器上下文进行关联
    int ret = avcodec_open2(audio_codec_ctx, audio_codec, NULL);
    if (ret < 0) {
        fprintf(stderr, "Could not open codec\n");
        exit(1);
    }
    return audio_codec_ctx;
}
// 编码初始化
AVCodecContext *audio_encode_init()
{
    AVCodecContext *c_ctx = NULL;
    AVCodec *codec = NULL;
    // codec = avcodec_find_encoder(AV_CODEC_ID_AAC);//libfdk_aac和aac的参数不一样
    codec = avcodec_find_encoder_by_name("libfdk_aac");
    if (!codec) {
        printf("EnCodec not found\n");
        exit(1);
    }
    c_ctx = avcodec_alloc_context3(codec);
    c_ctx->sample_fmt = enc_famt; // fdk_aac需要16位的音频输入
    if (!check_sample_fmt(codec, c_ctx->sample_fmt)) {
        printf("Encoder does not support sample format %s\n", av_get_sample_fmt_name(c_ctx->sample_fmt));
        exit(1);
    }
    c_ctx->channel_layout = AV_CH_LAYOUT_STEREO; // 输入音频的CHANNEL LAYOUT
    c_ctx->channels = 2;                         // 输入音频的声道数
    int sample_rate = select_sample_rate(codec);
    printf("enc sample_rate:%d\n", sample_rate);                                          // 44100
    printf("编码器采样格式 sample_fmt: %s\n", av_get_sample_fmt_name(c_ctx->sample_fmt)); // s16
    c_ctx->sample_rate = sample_rate;                                                     // 输入音频的采样率
    c_ctx->bit_rate = 0;                                                                  // AAC : 128K   AAV_HE: 64K  AAC_HE_V2: 32K. bit_rate为0时才会查找profile属性值
    c_ctx->profile = FF_PROFILE_AAC_LOW;                                                  // FF_PROFILE_AAC_LOW FF_PROFILE_AAC_HE_V2
    if (avcodec_open2(c_ctx, codec, NULL) < 0) {
        printf("audio_encode_init error\n");
        exit(1);
    }
    return c_ctx;
    printf("audio_encode_init ok\n");
}
static void encode(AVCodecContext *ctx, AVFrame *frame, AVPacket *pkt,
                   FILE *output)
{
    int ret;

    ret = avcodec_send_frame(ctx, frame);
    if (ret < 0) {
        fprintf(stderr, "Error sending the frame to the encoder\n");
        exit(1);
    }

    while (ret >= 0) {
        ret = avcodec_receive_packet(ctx, pkt);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
            return;
        else if (ret < 0) {
            fprintf(stderr, "Error encoding audio frame\n");
            exit(1);
        }
        // 解码后的数据已经带了adts
        fwrite(pkt->data, 1, pkt->size, output);
        av_packet_unref(pkt);
    }
}
/*
Packed格式，frame.data[0]或frame.extended_data[0]包含所有的音频数据中。
Planar格式，frame.data[i]或者frame.extended_data[i]表示第i个声道的数据（假设声道0是第一个）, AVFrame.data数组大小固定为8，如果声道数超过8，需要从frame.extended_data获取声道数据。
视频也同理
视频的 packed 格式有 AV_PIX_FMT_YUYV422，AV_PIX_FMT_UYYVYY411 等。
视频的 planar 格式有 AV_PIX_FMT_YUV420P，AV_PIX_FMT_YUV444P 等。
音频的 packed 格式有 AV_SAMPLE_FMT_U8，AV_SAMPLE_FMT_FLT 等。
音频的 planar 格式有 AV_SAMPLE_FMT_S32P，AV_SAMPLE_FMT_FLTP 等。
简单概括，后缀为 P 的则为 planar 格式，反之为 packed 格式。
*/
void decode(AVCodecContext *dec_ctx, AVPacket *pkt, AVFrame *frame, struct SwrContext *swr_ctx,
            struct SwrContext *encode_swr_ctx, AVPacket *pkt_enc, AVFrame *frame_enc, AVCodecContext *enc_ctx)
{
    int i, ch;
    int ret, data_size;

    ret = avcodec_send_packet(dec_ctx, pkt);
    if (ret < 0) {
        fprintf(stderr, "Error submitting the packet to the decoder\n");
        exit(1);
    }

    while (ret >= 0) {
        ret = avcodec_receive_frame(dec_ctx, frame);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
            return;
        else if (ret < 0) {
            fprintf(stderr, "Error during decoding\n");
            exit(1);
        }
        // printf( "audio 采样格式 sample_fmt: %s\n" , av_get_sample_fmt_name(dec_ctx->sample_fmt));
        data_size = av_get_bytes_per_sample(dec_ctx->sample_fmt); // 一次采样的字节数，即一个采样点的数据长度.H264没有类似的操作，H264里面都是int8_t类型的
        if (data_size < 0) {
            fprintf(stderr, "Failed to calculate data size\n");
            exit(1);
        }
        // 保存解码后的pcm数据
        if (av_sample_fmt_is_planar(dec_ctx->sample_fmt)) {  // planer模式
            for (i = 0; i < frame->nb_samples; i++) {        // nb_samples 代表的就是一帧中单个声道的音频样本数量，如果是双声道，那么数据样本总量就是nb_samples*2
                for (ch = 0; ch < dec_ctx->channels; ch++) { // 遍历每个通道
                    fwrite(frame->data[ch] + data_size * i, 1, data_size, pcm_fd);
                }
            }
        } else { // packed模式
            fwrite(frame->data[0], 1, frame->nb_samples * frame->channels * data_size, pcm_fd);
        }

        // 第一次重采样
        int ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (uint8_t **)frame->data, frame->nb_samples);
        int out_spb = av_get_bytes_per_sample(dst_sample_fmt);
        // 音频中的linesize和视频中的linesize有着同样的含义
        if (av_sample_fmt_is_planar(dst_sample_fmt)) { // plannar,dst_linesize=ret*out_spb
            // printf("ret*out_spb:%d dst_linesize:%d\n",ret*out_spb,dst_linesize);
            for (int i = 0; i < ret; i++) {
                for (int c = 0; c < dst_nb_channels; c++)
                    fwrite(dst_data[c] + i * out_spb, 1, out_spb, swr_fd);
            }
        } else { // packed,dst_linesize=ret*out_spb*out_channels
            // printf("ret*out_spb*out_channels:%d dst_linesize:%d\n",ret*out_spb*dst_nb_channels,dst_linesize);
            fwrite(dst_data[0], 1, ret * out_spb * dst_nb_channels, swr_fd);
        }
        /*******************/
        // 第二次重采样，用于编码
        ret = swr_convert(encode_swr_ctx, encode_dst_data, encode_dst_nb_samples, dst_data, dst_nb_samples);
        int enc_out_spb = av_get_bytes_per_sample(encode_dst_sample_fmt);
#if 0
        if(av_sample_fmt_is_planar(encode_dst_sample_fmt)) { // plannar
            for(int i = 0; i < ret; i++) {
                for(int c = 0; c < encode_dst_nb_channels; c++)
                    fwrite(encode_dst_data[c] + i*enc_out_spb, 1, enc_out_spb, encode_fd);
            }
        }
        else {// packed
            fwrite(encode_dst_data[0], 1, ret*enc_out_spb*encode_dst_nb_channels, encode_fd);
        }
#else
        if (av_sample_fmt_is_planar(encode_dst_sample_fmt)) { // plannar
            for (int c = 0; c < encode_dst_nb_channels; c++) {
                memcpy(frame_enc->data[i], encode_dst_data[i], ret * enc_out_spb);
            }
        } else { // packed
            memcpy(frame_enc->data[0], encode_dst_data[0], ret * enc_out_spb * encode_dst_nb_channels);
        }
        if (pkt == NULL) {
            encode(enc_ctx, NULL, pkt_enc, encode_fd);
        }
        encode(enc_ctx, frame_enc, pkt_enc, encode_fd);
#endif
    }
}
// 检查编码器支持的格式
int check_sample_fmt(const AVCodec *codec, enum AVSampleFormat sample_fmt)
{
    const enum AVSampleFormat *p = codec->sample_fmts;

    while (*p != AV_SAMPLE_FMT_NONE) {
        if (*p == sample_fmt)
            return 1;
        p++;
    }
    return 0;
}
// 检查支持的采样频率
int select_sample_rate(const AVCodec *codec)
{
    const int *p;
    int best_samplerate = 0;

    if (!codec->supported_samplerates)
        return 44100;

    p = codec->supported_samplerates;
    while (*p) {
        if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate))
            best_samplerate = *p;
        p++;
    }
    return best_samplerate;
}
int main(int argc, char *argv[])
{
    aac_fd = fopen(aac_filename, "wb");       // 从mp4文件中分离出来的aac
    pcm_fd = fopen(pcm_filename, "wb");       // 解码之后的pcm
    swr_fd = fopen(swr_filename, "wb");       // 第一次重采样之后的pcm
    encode_fd = fopen(encode_filename, "wb"); // 编码之后的aac
    av_register_all();
    int ret = -1;
    char errors[1024];
    char *in_filename = "test.mp4";

    int audio_index = -1;
    int video_index = -1;
    int len = 0;
    // 设置打印级别
    av_log_set_level(AV_LOG_DEBUG);

    // 数据读取
    AVFormatContext *ifmt_ctx = NULL;
    AVPacket pkt;
    av_init_packet(&pkt);
    /*解码*/
    AVPacket pkt_dec;
    av_init_packet(&pkt_dec);
    AVCodecContext *audio_codec_ctx = audio_decode_init();
    AVFrame *Frame = av_frame_alloc();

    if ((ret = avformat_open_input(&ifmt_ctx, in_filename, NULL, NULL)) < 0) {
        av_strerror(ret, errors, 1024);
        av_log(NULL, AV_LOG_DEBUG, "Could not open source file: %s, %d(%s)\n",
               in_filename,
               ret,
               errors);
        return -1;
    }

    if ((ret = avformat_find_stream_info(ifmt_ctx, NULL)) < 0) {
        av_strerror(ret, errors, 1024);
        av_log(NULL, AV_LOG_DEBUG, "failed to find stream information: %s, %d(%s)\n",
               in_filename,
               ret,
               errors);
        return -1;
    }

    // av_dump_format(ifmt_ctx, 0, in_filename, 0);

    audio_index = av_find_best_stream(ifmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
    if (audio_index > 0) {

        AVStream *audio_stream = ifmt_ctx->streams[audio_index];
        printf("audio 编解码其id codec_id: %d\n", audio_stream->codec->codec_id);
        printf("audio 编解码器名字 codec_name: %s\n", avcodec_get_name(audio_stream->codec->codec_id));
        printf("audio 采样率 sample_rate: %d Hz\n", audio_stream->codec->sample_rate);
        printf("audio 通道数 channels: %d\n", audio_stream->codec->channels);
        // int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt); // 一个采样数据占用字节数
        // int av_sample_fmt_is_planar(enum AVSampleFormat sample_fmt); // 采样数据是否为平面格式
        printf("audio 采样格式 sample_fmt: %s\n", av_get_sample_fmt_name(audio_stream->codec->sample_fmt));
        printf("audio 一次采样的字节数 : %d\n", av_get_bytes_per_sample(audio_stream->codec->sample_fmt));

        printf("audio 一帧数据包含的采样点 frame_size: %d\n", audio_stream->codec->frame_size); // AAC_LOW时候frame_size==nb_samples，AAC_HE_V2时候frame_size==2*nb_samples
        printf("audio 音频流包含的帧数 nb_frames: %lld\n", audio_stream->nb_frames);
        printf("audio 比特率 bitrate %lld kb/s\n", (int64_t)audio_stream->codec->bit_rate / 1000);
        printf("audio profile:%d, FF_PROFILE_AAC_LOW:%d\n", ifmt_ctx->streams[audio_index]->codecpar->profile, FF_PROFILE_AAC_LOW);
    }
    video_index = av_find_best_stream(ifmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);

    /*解码后第一次重采样*/
    src_sample_fmt = ifmt_ctx->streams[audio_index]->codec->sample_fmt;
    dst_sample_fmt = AV_SAMPLE_FMT_S32; // AV_SAMPLE_FMT_S32 AV_SAMPLE_FMT_S16

    src_nb_channels = ifmt_ctx->streams[audio_index]->codec->channels;
    dst_nb_channels = 2;

    src_ratio = ifmt_ctx->streams[audio_index]->codec->sample_rate;
    dst_ratio = 44100;
    SwrContext *swr_ctx = swr_alloc_set_opts(NULL, av_get_default_channel_layout(dst_nb_channels), dst_sample_fmt, dst_ratio,
                                             av_get_default_channel_layout(src_nb_channels), src_sample_fmt, src_ratio, 0, NULL);
    ret = swr_init(swr_ctx);
    if (ret != 0) {
        swr_free(&swr_ctx);
        printf("swr_ctx alloc & set error\n");
        exit(1);
    }
    // AAC_LOW时候frame_size = nb_samples，AAC_HE_V2时候frame_size = 2 * nb_samples
    if (ifmt_ctx->streams[audio_index]->codecpar->profile == FF_PROFILE_AAC_LOW) {
        src_nb_samples = ifmt_ctx->streams[audio_index]->codec->frame_size;
    } else if (ifmt_ctx->streams[audio_index]->codecpar->profile == FF_PROFILE_AAC_HE_V2) {
        src_nb_samples = ifmt_ctx->streams[audio_index]->codec->frame_size / 2;
    } else {
        printf("profile error\n");
        exit(1);
    }

    dst_nb_samples; // 当前输出数据包的真实采样数量
    // 计算输出采样数量
    // int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd);
    // 将以  "时钟基c" 表示的  数值a 转换成以  "时钟基b" 来表示。
    dst_nb_samples = av_rescale_rnd(src_nb_samples, dst_ratio, src_ratio, AV_ROUND_UP);
    // 分配输出缓存内存
    ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, dst_nb_channels, dst_nb_samples, dst_sample_fmt, 0);

    /*编码*/
    AVCodecContext *audio_encodec_ctx = audio_encode_init();
    AVPacket pkt_enc;
    av_init_packet(&pkt_enc);
    // 重采样参数，把音频采样为编码器需要的格式
    // 输入格式
    encode_src_sample_fmt = dst_sample_fmt;
    encode_src_nb_channels = dst_nb_channels;
    encode_src_ratio = dst_ratio;
    // 输出格式
    encode_dst_sample_fmt = enc_famt;
    encode_dst_nb_channels = 2;
    encode_dst_ratio = 44100;

    SwrContext *encode_swr_ctx = swr_alloc_set_opts(NULL, av_get_default_channel_layout(encode_dst_nb_channels), encode_dst_sample_fmt, encode_dst_ratio,
                                                    av_get_default_channel_layout(encode_src_nb_channels), encode_src_sample_fmt, encode_src_ratio, 0, NULL);
    ret = swr_init(encode_swr_ctx);
    if (ret != 0) {
        swr_free(&encode_swr_ctx);
        printf("encode_swr_ctx alloc & set error\n");
        exit(1);
    }
    encode_src_nb_samples = dst_nb_samples;
    encode_dst_nb_samples = av_rescale_rnd(encode_src_nb_samples, encode_dst_ratio, encode_src_ratio, AV_ROUND_UP);
    ret = av_samples_alloc_array_and_samples(&encode_dst_data, &encode_dst_linesize, encode_dst_nb_channels, encode_dst_nb_samples, encode_dst_sample_fmt, 0);
    AVFrame *Frame_enc = av_frame_alloc();
    // audio_encodec_ctx->frame_size和encode_dst_nb_sample不总相等，AAC_LOW时候frame_size==nb_samples，AAC_HE_V2时候frame_size==2*nb_samples
    // 所以设置nb_samples时候一定要注意
    Frame_enc->nb_samples = encode_dst_nb_samples;
    Frame_enc->format = audio_encodec_ctx->sample_fmt;
    Frame_enc->channel_layout = audio_encodec_ctx->channel_layout;
    ret = av_frame_get_buffer(Frame_enc, 0);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate audio data buffers\n");
        exit(1);
    }
    printf("encode_dst_nb_samples:%d audio_encodec_ctx->frame_size:%d Frame_enc->nb_samples:%d Frame_enc->format:%s\n",
           encode_dst_nb_samples, audio_encodec_ctx->frame_size, Frame_enc->nb_samples, av_get_sample_fmt_name(Frame_enc->format));

    while (av_read_frame(ifmt_ctx, &pkt) >= 0) {
        if (pkt.stream_index == audio_index) {
            AVStream *audio_stream = ifmt_ctx->streams[audio_index];
            printf("audio 在stream下的原始pts:%lld\n", pkt.pts);
            printf("audio_stream->time_base.num:%d audio_stream->time_base.den:%d\n", audio_stream->time_base.num, audio_stream->time_base.den); // 1 8000HZ
            printf("audio 真实时间戳：%fs\n", pkt.pts * av_q2d(audio_stream->time_base));
            printf("audio 在AV_TIME_BASE下的时间:%lld微秒 另一种方法：%lld微秒\n", (int64_t)(pkt.pts * av_q2d(audio_stream->time_base) * AV_TIME_BASE), av_rescale_q(pkt.pts, audio_stream->time_base, AV_TIME_BASE_Q));
            printf("\n");
            // 保存原始aac数据
            char adts_header_buf[7] = {0};
            adts_header(adts_header_buf, pkt.size,
                        ifmt_ctx->streams[audio_index]->codecpar->profile,     // AAC编码级别
                        ifmt_ctx->streams[audio_index]->codecpar->sample_rate, // 采样率 Hz
                        ifmt_ctx->streams[audio_index]->codecpar->channels);
            fwrite(adts_header_buf, 1, 7, aac_fd);
            len = fwrite(pkt.data, 1, pkt.size, aac_fd);
            if (len != pkt.size) {
                av_log(NULL, AV_LOG_DEBUG, "warning, length of writed data isn't equal pkt.size(%d, %d)\n",
                       len,
                       pkt.size);
            }

            // 解码需要拿到头部的关进信息，AAC编码级别、采样率、通道数，H264解码的时候拿到NALU就可以进行解码了
            char *ptr = malloc(7 + pkt.size);
            memcpy(ptr, adts_header_buf, 7);
            memcpy(ptr + 7, pkt.data, pkt.size);
            pkt_dec.data = ptr;
            pkt_dec.size = 7 + pkt.size;
            decode(audio_codec_ctx, &pkt_dec, Frame, swr_ctx,
                   encode_swr_ctx, &pkt_enc, Frame_enc, audio_encodec_ctx);
            av_packet_unref(&pkt_dec);
            av_packet_unref(&pkt_enc);
            free(ptr);

        } else if (pkt.stream_index == video_index) {
            AVStream *video_stream = ifmt_ctx->streams[video_index];
            printf("video 在stream下的原始pts:%lld\n", pkt.pts);
            printf("video_stream->time_base.num:%d video_stream->time_base.den:%d\n", video_stream->time_base.num, video_stream->time_base.den);
            printf("video 真实时间戳：%fs\n", pkt.pts * av_q2d(video_stream->time_base));
            printf("video 在AV_TIME_BASE下的时间:%lld微秒 另一种方法：%lld微秒\n", (int64_t)(pkt.pts * av_q2d(video_stream->time_base) * AV_TIME_BASE), av_rescale_q(pkt.pts, video_stream->time_base, AV_TIME_BASE_Q));
            printf("\n");
        }
        av_packet_unref(&pkt);
    }
    pkt_dec.data = NULL;
    pkt_dec.size = 0;
    decode(audio_codec_ctx, &pkt_dec, Frame, swr_ctx,
           encode_swr_ctx, &pkt_enc, Frame_enc, audio_encodec_ctx);

    avcodec_free_context(&audio_codec_ctx);
    av_frame_free(&Frame);
    av_frame_free(&Frame_enc);
    swr_free(&swr_ctx);
    swr_free(&encode_swr_ctx);
    fclose(swr_fd);
    fclose(encode_fd);
    fclose(aac_fd);
    fclose(pcm_fd);
    avformat_close_input(&ifmt_ctx);
    return 0;
}

我的开源：

         1、Nvidia视频硬解码、渲染、软/硬编码并写入MP4文件。项目地址：https://github.com/BreakingY/Nvidia-Video-Codec
        2、Jetson Jetpack5.x视频编解码。项目地址：https://github.com/BreakingY/jetpack-dec-enc
        3、ffmpeg音视频(H264/H265/AAC)封装、解封装、编解码pipeline，支持NVIDIA硬编解码。项目地址：https://github.com/BreakingY/FFmpeg-Media-Codec-Pipeline
        4、simple rtsp server，小而高效的rtsp服务器，支持H264、H265、AAC、PCMA；支持TCP、UDP；支持鉴权。项目地址：https://github.com/BreakingY/simple-rtsp-server

5、simple rtp client，rtsp客户端，支持TCP、UDP、H264、H265、AAC、PCMA，支持鉴权。项目地址：https://github.com/BreakingY/simple-rtsp-client