【FFmpeg实战】音频解码与编码流程

最新推荐文章于 2024-06-18 16:54:12 发布

攻城狮百里

最新推荐文章于 2024-06-18 16:54:12 发布

阅读量834

点赞数 2

分类专栏：音视频文章标签： ffmpeg 音视频

本文链接：https://blog.csdn.net/weixin_52622200/article/details/131398925

版权

音视频专栏收录该内容

122 篇文章 22 订阅

订阅专栏

解码流程

音频编解码流程与视频编解码流程一致，我们可以对 mp4 文件的音频流进行解码，并将解码后的音频数据保存到 PCM 文件中，后续我们可以通过读取 PCM 文件中的数据实现音频流的编码操作

FFmpeg音频解码流程

extern"C" {
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavutil/imgutils.h"
}
#include <iostream>
using namespace std;

//将解码后的数据写入输出文件中
void savePCMDecode(AVCodecContext* codecCtx, AVPacket* pkt, AVFrame* frame, FILE* file) {
    //发送包数据去进行解析获得帧数据
    if (avcodec_send_packet(codecCtx, pkt) >= 0) {
        //接收的帧数据
        while (avcodec_receive_frame(codecCtx, frame) >= 0) {
            /*
              Planar（平面），其数据格式排列方式为 (特别记住，该处是以点nb_samples采样点来交错，不是以字节交错）:
              [LLLLLLRRRRRR][LLLLLLRRRRRR][LLLLLLRRRRRR]...
              每个LLLLLLRRRRRR为一个音频帧
              而非Planar的数据格式（即交错排列）排列方式为：
              LRLRLRLRLRLRLRLRLRLRLRLRLRLRLRLRLRLRL...（每个LR为一个音频样本）
            */
            if (av_sample_fmt_is_planar(codecCtx->sample_fmt)) {
                //pcm播放时是LRLRLR格式，所以要交错保存数据
                int numBytes = av_get_bytes_per_sample(codecCtx->sample_fmt);
                for (int i = 0; i < frame->nb_samples; i++) {
                    for (int ch = 0; ch < codecCtx->channels; ch++) {
                        fwrite(frame->data[ch] + numBytes * i, 1, numBytes, file);
                    }
                }
            }else {
                fwrite(frame->data[0], 1, frame->linesize[0], file);
            }
        }
    }
}

void savePCM() {
    //声明所需的变量名
    AVFormatContext* fmtCtx = NULL;
    AVCodec* codec = NULL;
    AVCodecContext* codecCtx = NULL;
    AVPacket* pkt = av_packet_alloc();
    AVFrame* frame = NULL;

    //输入与输出文件
    const char* inFile = "target.mp4";
    const char* outFile = "result.pcm";
    FILE* file = NULL;
    int ret;

    do {
        //打开输入文件
        fopen_s(&file, outFile, "w+b");
        if (file == NULL) {
            cout << "not open file" << endl;
            break;
        }

        //----------------- 创建AVFormatContext结构体 -------------------
        ret = avformat_open_input(&fmtCtx, inFile, NULL, NULL);
        if (ret < 0) {
            cout << "not open input" << endl;
            break;
        }
        
        //----------------- 获取多媒体文件信息 -------------------
        ret = avformat_find_stream_info(fmtCtx, NULL);
        if (ret < 0) {
            cout << "not open stream info" << endl;
            break;
        }

        av_dump_format(fmtCtx, 0, inFile, 0);

        //----------------- 查找解码器 -------------------
        //通过查找多媒体文件中包含的流信息，找到音频类型的流，并返回该索引值
        //还会根据流类型查找对应的解码器
        int audioIndex = av_find_best_stream(fmtCtx, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);

        if (audioIndex < 0) {
            cout << "not find audio stream" << endl;
            break;
        }else if (codec == NULL) {
            cout << "not find codec" << endl;
            break;
        }
        
        AVCodecParameters* param = fmtCtx->streams[audioIndex]->codecpar;
        
        //创建解码器上下文
        codecCtx = avcodec_alloc_context3(codec);
        if (codecCtx == NULL) {
            cout << "not alloc codec context" << endl;
            break;
        }

        //传递相关参数
        ret = avcodec_parameters_to_context(codecCtx, param);
        if (ret < 0) {
            cout << "parameters to context fail" << endl;
            break;
        }

        codecCtx->time_base = fmtCtx->streams[audioIndex]->time_base;
        
        //----------------- 打开解码器 -------------------
        ret = avcodec_open2(codecCtx, codec, NULL);
        if (ret < 0) {
            cout << "not open codec" << endl;
            break;
        }
        
        //打印音频流的相关信息，在后续的编码中会使用
        cout << "sample_rate:" << codecCtx->sample_rate << endl;        //音频采样频率
        cout << "nb_sample:" << codecCtx->frame_size<< endl;            //音频采样数
        cout << "channel:" << codecCtx->channels<< endl;                //音频声道数
        cout << "channel_layout:" << codecCtx->channel_layout<< endl;   //音频声道格式
        cout << "format:" << codecCtx->sample_fmt << endl;              //音频采样格式

        frame = av_frame_alloc();
        while (av_read_frame(fmtCtx, pkt) >= 0) {
            //是否对应音频流的帧
            if (pkt->stream_index == audioIndex) {
                //执行解码操作
                savePCMDecode(codecCtx, pkt, frame, file);
            }
            //解引用
            av_packet_unref(pkt);
        }
        //刷新解码器中的缓存
        savePCMDecode(codecCtx, NULL, frame, file);
    } while (0);
    
    //----------------- 释放所有指针 ------------------- 
    av_frame_free(&frame);
    av_packet_free(&pkt);
    avcodec_close(codecCtx);
    avcodec_free_context(&codecCtx);
    avformat_free_context(fmtCtx);

    fclose(file);
}
Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'target.mp4':
  Metadata:
    major_brand     : isom
    minor_version   : 512
    compatible_brands: isomiso2avc1mp41
    encoder         : Lavf58.48.100
  Duration: 00:03:10.36, start: 0.000000, bitrate: 773 kb/s
    Stream #0:0(und): Video: h264 (High) (avc1 / 0x31637661), yuv420p(tv, bt709), 1280x720, 442 kb/s, 25 fps, 25 tbr, 90k tbn, 50 tbc (default)
    Metadata:
      handler_name    : VideoHandler
    Stream #0:1(und): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, stereo, fltp, 325 kb/s (default)
    Metadata:
      handler_name    : SoundHandler
sample_rate:48000
nb_sample:1024
channel:2
channel_layout:3
format:8

可以看到音频流的解码方式与流程与视频流的解码方式与流程一致，只是在视频流中我们将视频帧读取为图片进行显示，而音频流中我们将音频帧读取到 PCM 文件中，可以通过 SDL或 ffplay 对 PCM 文件进行播放

请记住打印的参数，因为PCM文件中只存放了音频数据，没有存放音频数据相关的参数，这些参数需要我们在编码时给AVCodecContext进行设置

根据打印信息可以知道target.mp4音频流格式为：

采样率：48kHz 采样数：1024 双声道立体声(3->AV_CH_LAYOUT_STEREO) 采样格式：32位-float planar格式(8->AV_SAMPLE_FMT_FLTP)

由此可以得出PCM文件大小：48000 * 2 * 32 / 8 * 190.36 = 73,098,240，实际PCM文件为 73,097,216，相差1024应该是 mp4 中有一帧没有音频数据

注意：我们保存PCM文件时已经将 Planar 格式转换为 packet 格式，所以PCM中音频格式为 AV_SAMPLE_FMT_FLT

代码分析

av_find_best_stream

av_find_best_stream根据输入的参数查找对应流的索引并查找对应解码器，其实和之前使用循环查找视频流/音频流，然后根据ID查找解码器实现一样，不过使用该函数可以减少代码量

int av_find_best_stream(AVFormatContext *ic,
                        enum AVMediaType type,
                        int wanted_stream_nb,
                        int related_stream,
                        AVCodec **decoder_ret,
                        int flags);

参数：

AVFormatContext *ic：AVFormatContext结构体，提供streams、nb_streams参数
enum AVMediaType type：要查找的流类型，音频流、视频流等
int wanted_stream_nb：用户指定流的索引，判断该索引的流是否符合其余参数指定的条件，该参数为 -1 时函数自动选择
int related_stream：相关的流的索引，查看源码应该是查找同属一个AVProgram中的流，如果不需要该参数则填-1
AVCodec **decoder_ret：会返回对应流的解码器，如果不需要则填NULL
int flags：保留字段，目前无用

return:

成功则返回流对应的索引值，失败则会返回一个负数：

AVERROR_STREAM_NOT_FOUND：没有找到请求类型的流
AVERROR_DECODER_NOT_FOUND：找到对应流，但是没有找到对应的解码器

编码流程

音频的编码流程与视频的编码流程一致，只是音频帧格式转换和在AVCodecContext中需要填写的参数有所变化

void doEncode(AVFormatContext* fmtCtx, AVCodecContext* cCtx, AVPacket* packet, AVFrame* srcFrame){
    if (fmtCtx == NULL || cCtx == NULL || packet == NULL) {
        return;
    }

    int ret = 0;
    // 开始编码;对于音频编码来说，可以不设置pts的值(但是会出现警告)
    // 如果frame 为NULL，则代表将编码缓冲区中的所有剩余数据全部编码完
    ret = avcodec_send_frame(cCtx, srcFrame);
    /*
    * 在编码时可能会遇到下面的错误提示，虽然不会影响结果，但也不好看
    * Application provided invalid, non monotonically increasing dts to muxer in stream 0:XXX
    * 上述的错误为packet的dts没有线性增长，而pts在Frame中设置过会给packet的dts和pts赋值且为线性增长，错误出现原因不明
    * 猜测为格式转换缓存中的packet记录了最开始转换的Frame中的pts导致的问题
    * 解决方案：使用 prePts 记录上一个packet的pts，当packet小于prePts时，将packet的pts、dts赋值为prePts+1
    */
    static int prePts = 0;
    while (ret >= 0) {
        ret = avcodec_receive_packet(cCtx, packet);
        // EAGAIN 有可能是编码器需要先缓冲一部分数据，并不是真正的编码错误
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { 
            //cout<<"encode error "<<ret<<endl;
            return;
        }else if (ret < 0) {
            // 产生了真正的编码错误
            return;
        }
        packet->pos = -1;
        av_packet_rescale_ts(packet,cCtx->time_base,fmtCtx->streams[0]->time_base);
        if (packet->pts <= prePts) {
            packet->pts = prePts + 1;
            packet->dts = prePts + 1;
        }
        prePts = packet->pts;
        
        cout << "packet size" << packet->size << " dts:" << packet->dts << " pts:" << packet->pts << " duration:" << packet->duration << endl;
        
        //该方法等同于下面注释的代码
        av_interleaved_write_frame(fmtCtx, packet);

        /* 每次编码avcodec_receive_packet都会重新为packet分配内存，所以这里用完之后要主动释放
        av_write_frame(fmtCtx, packet);
        av_packet_unref(packet);
        */
    }
}

/**
 *  判断采样格式对于指定的编码器是否支持，如果支持则返回该采样格式；否则返回编码器支持的枚举值最大的采样格式
 */
static enum AVSampleFormat select_sample_fmt(const AVCodec* codec, enum AVSampleFormat sample_fmt){
    const enum AVSampleFormat* p = codec->sample_fmts;
    enum AVSampleFormat rfmt = AV_SAMPLE_FMT_NONE;
    while (*p != AV_SAMPLE_FMT_NONE) {
        if (*p == sample_fmt) {
            return sample_fmt;
        }
        if (rfmt == AV_SAMPLE_FMT_NONE) {
            rfmt = *p;
        }
        p++;
    }

    return rfmt;
}

/**
 *  返回指定编码器接近尽量接近44100的采样率
 */
static int select_sample_rate(const AVCodec* codec, int default_sample_rate){
    const int* p = 0;
    int best_samplerate = 0;
    if (!codec->supported_samplerates) {
        return 44100;
    }

    p = codec->supported_samplerates;
    while (*p) {
        if (*p == defalt_sample_rate) {
            return *p;
        }

        if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate)) {
            best_samplerate = *p;
        }

        p++;
    }

    return best_samplerate;
}

static int select_bit_rate(AVCodec* codec){
    // 对于不同的编码器最优码率不一样，单位bit/s;对于mp3来说，192kbps可以获得较好的音质效果。
    int bit_rate = 64000;
    AVCodecID id = codec->id;
    if (id == AV_CODEC_ID_MP3) {
        bit_rate = 192000;
    }else if (id == AV_CODEC_ID_AC3) {
        bit_rate = 192000;
    }

    return bit_rate;
}

void pcm2mp3() {
    //声明所需的变量名
    AVFormatContext* fmtCtx = NULL;
    AVCodec* codec = NULL;
    AVCodecContext* codecCtx = NULL;
    AVPacket* pkt = av_packet_alloc();
    AVFrame* srcFrame = NULL,* dstFrame=NULL;
    int ret;

    pkt->data = NULL;
    pkt->size = 0;

    const char* inFile = "result.pcm";
    const char* outFile = "result.mp3";

    //PCM与输出的MP3的音频格式相关参数
    //音频采样频率
    int  src_sample_rate = 48000, dst_sample_rate = 44100;
    //音频采样数
    int  src_nb_samples = 1024;
    //音频声道数
    int  src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
    //音频声道格式
    uint64_t src_ch_layout = AV_CH_LAYOUT_STEREO, dst_ch_layout = AV_CH_LAYOUT_STEREO;
    //音频采样格式
    enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_FLT;
    enum AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_FLT;

    do {
        //----------------- 打开输出文件 -------------------   
        ret = avformat_alloc_output_context2(&fmtCtx, NULL, NULL, outFile);
        if (ret) {
            cout << "Cannot alloc output file context" << endl;
            break;
        }

        ret = avio_open(&fmtCtx->pb, outFile, AVIO_FLAG_READ_WRITE);
        if (ret) {
            cout << "Cannot open output file" << endl;
            break;
        }

        //----------------- 查找编码器 -------------------   
        codec = avcodec_find_encoder(fmtCtx->oformat->audio_codec);
        if (codec == NULL) {
            cout << "Cannot find any endcoder" << endl;
            break;
        }
        //----------------- 申请编码器上下文结构体 -------------------  
        codecCtx = avcodec_alloc_context3(codec);
        if (codecCtx == NULL) {
            cout << "Cannot alloc context" << endl;
            break;
        }
        
        //设置相关参数
        // 对于不同的编码器最优码率不一样，单位bit/s
        codecCtx->bit_rate = select_bit_rate(codec);
        // 采样率
        codecCtx->sample_rate = select_sample_rate(codec,dst_sample_rate);
        // 采样格式
        codecCtx->sample_fmt = select_sample_fmt(codec, dst_sample_fmt);
        // 声道格式
        codecCtx->channel_layout = dst_ch_layout;
        // 声道数
        codecCtx->channels = av_get_channel_layout_nb_channels(codecCtx->channel_layout);
        
        //----------------- 创建音频流 -------------------  
        AVStream* vStream = avformat_new_stream(fmtCtx, NULL);
            
        AVCodecParameters* param = vStream->codecpar;
        
        //----------------- 打开解码器 -------------------  
        ret = avcodec_open2(codecCtx, codec, NULL);
        if (ret < 0) {
            cout << "avcodec_open2 fail" << endl;
            break;
        }

        //打开PCM文件
        FILE* file = NULL;
        fopen_s(&file,inFile, "rb");
        if (file == NULL) {
            cout<<("fopen fail")<<endl;
            break;
        }

        //是否进行音频数据格式转换
        bool needConvert = false;
        //申请Packet、Frame
        pkt = av_packet_alloc();
        srcFrame = av_frame_alloc();
        dstFrame = av_frame_alloc();

        //设置音频帧参数
        srcFrame->nb_samples = src_nb_samples;
        dstFrame->nb_samples = codecCtx->frame_size;

        srcFrame->format = src_sample_fmt;
        dstFrame->format = codecCtx->sample_fmt;

        srcFrame->channel_layout = src_ch_layout;
        dstFrame->channel_layout = codecCtx->channel_layout;

        srcFrame->sample_rate = src_sample_rate;
        dstFrame->sample_rate = codecCtx->sample_rate;

        // 分配srcFrame对应的内存块
        ret = av_frame_get_buffer(srcFrame, 0);
        if (ret < 0) {
            cout << ("av_frame_get_buffer fail %d", ret) << endl;;
            break;
        }
        // 使得srcFrame可写
        av_frame_make_writable(srcFrame);

        // 判断是否需要格式转换
        if (codecCtx->sample_fmt != srcFrame->format) {
            needConvert = true;
        }else if (codecCtx->channel_layout != srcFrame->channel_layout) {
            needConvert = true;
        }else if (codecCtx->sample_rate != srcFrame->sample_rate) {
            needConvert = true;
        }

        //与SwsContext类似的结构体，用于记录变换所需的参数
        SwrContext* swrCtx = NULL;
        if (needConvert) {
            // 申请进行对应转换所需的SwrContext
            swrCtx = swr_alloc_set_opts(NULL,
                codecCtx->channel_layout, codecCtx->sample_fmt, codecCtx->sample_rate,
                srcFrame->channel_layout, (enum AVSampleFormat)srcFrame->format, srcFrame->sample_rate, 
                0, NULL);
            
            /*
            * 如果使用swr_convert_frame进行格式转换，则swr_init可以不用写
            * 如果使用swr_convert进行格式转换，则需要使用swr_init函数进行初始化
            */
            swr_init(swrCtx);

            if (swrCtx == NULL) {
                cout << ("swr_alloc_set_opts() fail")<<endl;
                return;
            }

            // 分配dstFrame对应的内存块
            ret = av_frame_get_buffer(dstFrame, 0);
            if (ret < 0) {
                cout << ("av_frame_get_buffer fail %d", ret) << endl;
                return;
            }
            // 使得dstFrame可写
            ret = av_frame_make_writable(dstFrame);
            if (ret < 0) {
                cout << ("av_frame_make_writable %d", ret) << endl;
                return;
            }
        }

        // 将codecCtx设置的参数传给param，用于写入头文件信息
        avcodec_parameters_from_context(param, codecCtx);

        ret = avformat_write_header(fmtCtx, NULL);
        if (ret < 0) {
            cout << ("avformat_write_header %d", ret) << endl;;
            break;
        }

        av_dump_format(fmtCtx, 0, NULL, 1);

        int ptsIndex = 0;
        // 获取一帧所需的缓冲区大小
        int require_size = av_samples_get_buffer_size(NULL, src_nb_channels, src_nb_samples, src_sample_fmt, 0);

        // 读取PCM文件中的音频数据
        while (fread(srcFrame->data[0], 1, require_size, file) > 0) {
            pkt->stream_index = vStream->index;
            if (needConvert) {
                // 进行格式转换
                ret = swr_convert_frame(swrCtx, dstFrame, srcFrame);
                if (ret < 0) {
                    cout << "swr_convert_frame fail "<< ret << endl;
                    continue;
                }
                dstFrame->pts = ptsIndex;
                //编码
                doEncode(fmtCtx, codecCtx, pkt, dstFrame);
            }else {
                srcFrame->pts = ptsIndex;
                doEncode(fmtCtx, codecCtx, pkt, srcFrame);
            }
            ptsIndex++;
        }

        if (needConvert) {
            //刷新缓存,格式不同，帧对应的数据大小也不同
            while (swr_convert_frame(swrCtx, dstFrame, NULL) == 0) {
                if (dstFrame->nb_samples > 0) {
                    cout << "清除剩余的 "<< dstFrame->nb_samples << endl;;
                    dstFrame->pts = ptsIndex++;
                    doEncode1(fmtCtx, codecCtx, pkt, dstFrame);
                }else {
                    break;
                }
            }
        }
        //刷新缓存
        doEncode(fmtCtx, codecCtx, pkt, NULL);

        if (swrCtx) swr_free(&swrCtx);
        // 写入收尾信息，必须要与，否则文件无法播放
        av_write_trailer(fmtCtx);
        fclose(file);

    } while (0);

    if (fmtCtx) {
        avformat_free_context(fmtCtx);
        avio_closep(&fmtCtx->pb);
    }
    if (codecCtx) avcodec_free_context(&codecCtx);
    if (srcFrame) av_frame_free(&srcFrame);
    if (dstFrame) av_frame_free(&dstFrame);
    av_packet_free(&pkt);
}

代码分析

SwrContext结构体与SwsContext结构体类似，都是用于记录变换所需的参数

// 其中截取出部分较为重要的数据 
struct SwrContext {
    enum AVSampleFormat  in_sample_fmt;             //输入的采样格式
    enum AVSampleFormat out_sample_fmt;             //输出的采样格式
    int64_t  in_ch_layout;                          //输入的声道格式
    int64_t out_ch_layout;                          //输出的声道格式
    int      in_sample_rate;                        //输入的采样频率
    int     out_sample_rate;                        //输出的采样频率
    int flags;                                      //其他标志，如SWR_FLAG_RESAMPLE
    const int *channel_map;                         //声道map
    int used_ch_count;                              //已经使用的声道数

    int user_in_ch_count;                           //用户设置的输入的声道数
    int user_out_ch_count;                          //用户设置的输出的声道数
    int64_t user_in_ch_layout;                      //用户设置的输入的声道格式
    int64_t user_out_ch_layout;                     //用户设置的输出的声道格式
    ...
};

av_frame_get_buffer为音频或视频数据分配新的缓冲区，在调用该函数之前，需为AVFrame设置以下字段：

视频Frame
format（视频为AVPixelFormat）
width 、height
音频Frame
format（音频为AVSampleFormat）
nb_samples 、 channel_layout

int av_frame_get_buffer(AVFrame *frame, int align);

参数：

AVFrame *frame：用于存储新缓冲区的帧
int align：缓冲区中数据大小对齐方式，如果填0，则会根据cpu的架构自动选择对齐位数，建议填0 (有特殊需求当我没说)

return:

返回0表示成功，失败会返回一个负数

可以用该函数替换视频编码中计算缓存大小、申请缓存、av_image_fill_arrays一系列的代码，其内部的实现原理差不多

swr_alloc_set_opts

swr_alloc_set_opts函数会根据需要创建SwrContext，设置参数。该函数不需要使用swr_alloc()来分配，当struct SwrContext *s为 NULL 时函数体内部会调用swr_alloc()方法

该函数实际上是一系列av_opt_set_int的封装

struct SwrContext *swr_alloc_set_opts(struct SwrContext *s,
                                      int64_t out_ch_layout, enum AVSampleFormat out_sample_fmt, int out_sample_rate,
                                      int64_t  in_ch_layout, enum AVSampleFormat  in_sample_fmt, int  in_sample_rate,
                                      int log_offset, void *log_ctx){
    if(!s) s= swr_alloc();
    if(!s) return NULL;

    s->log_level_offset= log_offset;
    s->log_ctx= log_ctx;

    if (av_opt_set_int(s, "ocl", out_ch_layout,   0) < 0) goto fail;
    if (av_opt_set_int(s, "osf", out_sample_fmt,  0) < 0) goto fail;
    if (av_opt_set_int(s, "osr", out_sample_rate, 0) < 0) goto fail;
    if (av_opt_set_int(s, "icl", in_ch_layout,    0) < 0) goto fail;
    if (av_opt_set_int(s, "isf", in_sample_fmt,   0) < 0) goto fail;
    if (av_opt_set_int(s, "isr", in_sample_rate,  0) < 0) goto fail;
    if (av_opt_set_int(s, "ich", av_get_channel_layout_nb_channels(s-> user_in_ch_layout), 0) < 0) goto fail;
    if (av_opt_set_int(s, "och", av_get_channel_layout_nb_channels(s->user_out_ch_layout), 0) < 0) goto fail;

    av_opt_set_int(s, "uch", 0, 0);
    return s;
fail:
    av_log(s, AV_LOG_ERROR, "Failed to set option\n");
    swr_free(&s);
    return NULL;
}

参数：

struct SwrContext *s：已经创建完成的SwrContext指针。为 NULL 时，函数内部会创建一个SwrContext结构体
int64_t out_ch_layout：输出的声道格式
enum AVSampleFormat out_sample_fmt：输出的采样格式
int out_sample_rate：输出的采样频率
int64_t in_ch_layout：输入的声道格式
enum AVSampleFormat in_sample_fmt：输入的采样格式
int in_sample_rate：输入的采样格式
int log_offset：日志等级
void *log_ctx：日志上下文，可以为NULL

return:

成功返回对应的SwrContext指针，失败则返回NULL

其中的"ocl"、“osf”、"osr"等字符参数可以在文件中可以找到定义

swr_convert_frame

swr_convert_frame函数会转换AVFrame input的数据并写入AVFrame output中，但输入与输出Frame**都必须有channel_layout**,** sample_rate和format****参数

如果输出Frame没有分配数据指针，则会根据会使用av_frame_get_buffer函数申请数据内存

如果输出Frame为 NULL 或获取的数据比其所需的数据量多。这种情况下，多余的数据则会被添加到内部FIFO缓冲区中，在下一次调用此函数或swr_convert函数时返回

转换采样率，可能会导致有数据留在内部缓存区中。可以通过Swr_get_delay返回保留在缓存区中的数据大小，要获取剩余数据作为输出数据，可以重新调用此函数或swr_convert函数，并将AVFrame *input设置为NULL

int swr_convert_frame(SwrContext *swr,
                      AVFrame *output, const AVFrame *input);

参数：

SwrContext *swr：SwrContext结构体指针，用于提供重采样所需的参数
AVFrame *output：输出的Frame
const AVFrame *input：输入的Frame

return:

返回 0 表示成功，失败时返回一个负数 (AVERROR)

看函数描述中可以知道还有一个函数swr_convert可以实现重采样的功能，不过该函数的使用比swr_convert_frame麻烦一些

//使用案例：
if (!swr_is_initialized(swrCtx)) {
    //不能重复初始化SwrContext，可能会导致SwrContext中的值发生变化
    ret = swr_init(swrCtx);
    if (ret < 0) {
        cout<< "swr_init fail" <<endl;
        break;
    }
}
//const uint8_t** srcdata = (const uint8_t**)srcFrame->data;      
const uint8_t** srcdata = (const uint8_t**)srcFrame->extended_data;       //可以与上方注释代码进行替换
uint8_t** outdata = dstFrame->extended_data;
ret = swr_convert(swrCtx, outdata, dstFrame->nb_samples, srcdata, srcFrame->nb_samples);
if (ret < 0) {
    cout << "convert fail" << endl;
    break;
}
dstFrame->nb_samples = ret;

swr_convert

int swr_convert(struct SwrContext *s, uint8_t **out, int out_count,
                                const uint8_t **in , int in_count);

参数：

struct SwrContext *s：SwrContext结构体指针，用于提供重采样所需的参数
uint8_t **out：存放输出数据的二维数组
int out_count：输出音频采样数
const uint8_t **in：存放要转换数据的二维数组
int in_count：输入音频采样数

return:

成功则返回转换后Frame应采用的音频采样数，失败则返回一个负数

查看源码会发现swr_convert_frame内部也是调用swr_convert进行格式转换的，其内部代码逻辑与上述代码相似，还增加了一些参数合法性判断。比较推荐使用swr_convert_frame，可以简化代码和增加代码的健壮性

作者：oddly
链接：https://www.jianshu.com/p/0f6a7581f172

 >>> 音视频开发 视频教程： https://ke.qq.com/course/3202131?flowToken=1031864 
 >>> 音视频开发学习资料、教学视频，免费分享有需要的可以自行添加学习交流群： 739729163  领取

攻城狮百里

关注

2
点赞
踩
9

收藏

觉得还不错? 一键收藏
0
评论
【FFmpeg实战】音频解码与编码流程

可以通过Swr_get_delay返回保留在缓存区中的数据大小，要获取剩余数据作为输出数据，可以重新调用此函数或swr_convert函数，并将AVFrame *input设置为NULL。音频编解码流程与视频编解码流程一致，我们可以对 mp4 文件的音频流进行解码，并将解码后的音频数据保存到 PCM 文件中，后续我们可以通过读取 PCM 文件中的数据实现音频流的编码操作。可以用该函数替换视频编码中计算缓存大小、申请缓存、av_image_fill_arrays一系列的代码，其内部的实现原理差不多。
复制链接

扫一扫

专栏目录