新版FFmpeg音频编解码处理

最新推荐文章于 2021-10-31 11:17:28 发布

windragon0419

最新推荐文章于 2021-10-31 11:17:28 发布

阅读量3.3k

点赞数 1

分类专栏：多媒体开发文章标签： ffmpeg 音频编码

本文链接：https://blog.csdn.net/windragon0419/article/details/18732377

版权

多媒体开发专栏收录该内容

1 篇文章 0 订阅

订阅专栏

ffmpeg版本更新近一年跟打了鸡血一样，刷刷刷的往上升（貌似现在很多软件版本升级都相当快，而且都是大版本升级），应用软件也就算了，升就升吧，最多新版可能运行速度稍慢些，可API的版本升级太快，对开发来说，就不是一件让人愉快的事。
用ffmpeg也有快两年的时间，前段时间将项目中的ffmpeg库从1.0升级到了2.1（老实说，这样升级API库风险太大），发现不少API发生了变化，以前项目中写的音频编码部分完全不能工作了，认真研究了两天，才发现，新版的ffmpeg对音频编码处理已经有了很大的变化，记录在此，做个备忘。
早期ffmpeg编码音频，输入数据一般都是S16格式，解码输出一般也是S16，也就是说PCM数据是存储在连续的buffer中，对一个双声道（左右）音频来说，存储格式可能就为
LRLRLR.........（左声道在前还是右声道在前没有认真研究过）。所以以往编码部分的代码基本形如：

    int sample_bytes = av_get_bytes_per_sample(pCodecCtx->sample_fmt);

    int frame_bytes = pCodecCtx->frame_size * sample_bytes * pCodecCtx->channels;

   // AVFifoBuffer* fifo;    存放pcm数据
    while(av_fifo_size(fifo) >= frame_bytes) {
        av_fifo_generic_read(fifo, inputBuf, frame_bytes, NULL);

        AVPacket pkt = {0};
        av_init_packet(&pkt);
        pkt.data = encodeBuf;
        pkt.size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
        int got_packet = 0;

        audioframe->nb_samples = pCodecCtx->frame_size;
        int samples_size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,
                                                  audioframe->nb_samples,
                                                  pCodecCtx->sample_fmt, 0);
        avcodec_fill_audio_frame(audioframe, pCodecCtx->channels, pCodecCtx->sample_fmt,
                inputBuf, samples_size, 0);
        audioframe->pts = audio_sync_opts;
        audio_sync_opts = audioframe->pts + audioframe->nb_samples;

        avcodec_encode_audio2(pCodecCtx, &pkt, audioframe, &got_packet);
        if (got_packet ) {
            //处理pkt，封装存储、流输出或交由上层应用
        }

    }
项目中需要对音视频流进行转码输出，音频处理部分一般是先解码（得到PCM S16数据），再交由编码（MP3、AAC）

ffmpeg升级到2.1后（具体哪个版本开始的没去查，可能早几个版本就已经这样做了），音频格式增加了plane概念（呃，不是灰机，是平面）
enum AVSampleFormat {
    AV_SAMPLE_FMT_NONE = -1,
    AV_SAMPLE_FMT_U8,          ///< unsigned 8 bits
    AV_SAMPLE_FMT_S16,         ///< signed 16 bits
    AV_SAMPLE_FMT_S32,         ///< signed 32 bits
    AV_SAMPLE_FMT_FLT,         ///< float
    AV_SAMPLE_FMT_DBL,         ///< double

       // 以下都是带平面格式
    AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar
    AV_SAMPLE_FMT_S16P,        ///< signed 16 bits, planar
    AV_SAMPLE_FMT_S32P,        ///< signed 32 bits, planar
    AV_SAMPLE_FMT_FLTP,        ///< float, planar
    AV_SAMPLE_FMT_DBLP,        ///< double, planar

    AV_SAMPLE_FMT_NB           ///< Number of sample formats. DO NOT USE if linking dynamically
};
这就有点像视频部分的YUV数据，有的带P，有的是不带P的，同样对双声道音频PCM数据，以S16P为例，存储就可能是
plane 0: LLLLLLLLLLLLLLLLLLLLLLLLLL...
plane 1: RRRRRRRRRRRRRRRRRRRRRRRRRR...
而不再是以前的连续buffer。
如mp3编码就明确规定了只使用平面格式的数据
AVCodec ff_libmp3lame_encoder = {
       .....
    .capabilities          = CODEC_CAP_DELAY | CODEC_CAP_SMALL_LAST_FRAME,
    .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S32P,
                                                             AV_SAMPLE_FMT_FLTP,
                                                             AV_SAMPLE_FMT_S16P,
                                                             AV_SAMPLE_FMT_NONE },
       ....
};
而AAC编码依旧使用 AV_SAMPLE_FMT_S16格式
也就说，音频编码不能再像以前那样简单的处理，统一输入S16数据，而要根据具体的codec转化为其支持的格式，否则无论是编码还是解码输出的声音会莫名其妙，幸好，转换工作不用自己做，ffmpeg提供了相应的API，swr_convert（类似以前的audio_resample，只是audio_resample目前已不再推荐使用，因为swr_convert更强大）
基于此，对音频编码部分做了相应修改，主要用的数据结构为 struct SwrContext* m_SwrCtx;
step 1：判断是否需要进行convert，初始化阶段
       if (pCodecCtx->channels != pInputCtx->channels
           || pCodecCtx->sample_rate != pInputCtx->sample_rate
           || pCodecCtx->sample_fmt != pInputCtx->sample_fmt)
       {
           u::Log::write(get_log_file(), "Audio need resample!");
           if ( NULL == m_SwrCtx ) {
               m_SwrCtx = swr_alloc();
           }
#if LIBSWRESAMPLE_VERSION_MINOR >= 17               // 根据版本不同，选用适当函数
           av_opt_set_int(m_SwrCtx, "ich", pInputCtx->channels, 0);
           av_opt_set_int(m_SwrCtx, "och", pCodecCtx->channels, 0);
           av_opt_set_int(m_SwrCtx, "in_sample_rate", pInputCtx->sample_rate, 0);
           av_opt_set_int(m_SwrCtx, "out_sample_rate", pCodecCtx->sample_rate, 0);
           av_opt_set_sample_fmt(m_SwrCtx, "in_sample_fmt", pInputCtx->sample_fmt, 0);
           av_opt_set_sample_fmt(m_SwrCtx, "out_sample_fmt", pCodecCtx->sample_fmt, 0);

#else
           m_SwrCtx = swr_alloc_set_opts(m_SwrCtx,
                   pInputCtx->channel_layout, AV_SAMPLE_FMT_S16, pInputCtx->sample_rate,
                   pInputCtx->channel_layout, pInputCtx->sample_fmt, pInputCtx->sample_rate,
                   0, NULL);
#endif
           swr_init(m_SwrCtx);
           if (av_sample_fmt_is_planar(pCodecCtx->sample_fmt)) {
           //如果是分平面数据，为每一声道分配一个fifo，单独存储各平面数据
               for (int i = 0; i < pCodecCtx->channels; i++){
                   m_fifo[i] = av_fifo_alloc(BUF_SIZE_20K);
               }
           } else {
           //不分平面，所有的数据只要一个fifo就够了，其实用不用fifo完全看个人了，只是我觉得方便些
               fifo = av_fifo_alloc(BUF_SIZE_20K);
           }

       }

step 2：进行转换
//以下代码部分抄自ffmpeg自带的例子
   if (m_SwrCtx != NULL) {
       if ( !m_audioOut ) {
           ret = av_samples_alloc_array_and_samples(&m_audioOut,
                   &dst_samples_linesize, pCodecCtx->channels, max_dst_nb_samples, pCodecCtx->sample_fmt, 0);
           if (ret < 0){
               av_log(NULL, AV_LOG_WARNING, "[%s.%d %s() Could not allocate destination samples\n", __FILE__, __LINE__, __FUNCTION__);
               return -1;
           }
       }

       dst_nb_samples = av_rescale_rnd(swr_get_delay(m_SwrCtx, pCodecCtx->sample_rate) + src_nb_samples,
               pCodecCtx->sample_rate, pCodecCtx->sample_rate, AV_ROUND_UP);
       if (dst_nb_samples > max_dst_nb_samples) {
           av_free(m_audioOut[0]);
           ret = av_samples_alloc(m_audioOut, &dst_samples_linesize, pCodecCtx->channels, dst_nb_samples, pCodecCtx->sample_fmt, 0);
           if (ret < 0){
               av_log(NULL, AV_LOG_WARNING, "[%s.%d %s() Could not allocate samples Buffer\n", __FILE__, __LINE__, __FUNCTION__);
               return -1;
           }
           max_dst_nb_samples = dst_nb_samples;
       }

       //输入也可能是分平面的，所以要做如下处理
       uint8_t* m_ain[SWR_CH_MAX];
       setup_array(m_ain, (uint8_t*)input_buf, data->ctx.sample_fmt, src_nb_samples);

       len = swr_convert(m_SwrCtx, m_audioOut, dst_nb_samples, (const uint8_t**)m_ain, src_nb_samples);

       if (len < 0) {
           char errmsg[BUF_SIZE_1K];
           av_strerror(len, errmsg, sizeof(errmsg));
           av_log(NULL, AV_LOG_WARNING, "[%s:%d] swr_convert!(%d)(%s)", __FILE__, __LINE__, len, errmsg);
           return -1;
       }

       paudiobuf = m_audioOut[0];
       decode_size = len * pCodecCtx->channels * av_get_bytes_per_sample(pCodecCtx->sample_fmt);

   } else {
       paudiobuf = (uint8_t*)input_buf;
       decode_size = input_size;
   }

   //存储PCM数据，注意：m_SwrCtx即使进行了转换，也要判断转换后的数据是否分平面
   if (m_SwrCtx && av_sample_fmt_is_planar(pCodecCtx->sample_fmt) ) {
       for (int i = 0; i < pCodecCtx->channels; i++){
           if (av_fifo_realloc2(m_fifo[i], av_fifo_size(m_fifo[i]) + len*av_get_bytes_per_sample(pCodecCtx->sample_fmt)) < 0){
               av_log(NULL, AV_LOG_FATAL, "av_fifo_realloc2() failed\n");
               return -1;
           }
           av_fifo_generic_write(m_fifo[i], m_audioOut[0]+i*dst_samples_linesize, len*av_get_bytes_per_sample(pCodecCtx->sample_fmt), NULL);
       }
   } else {
       if (av_fifo_realloc2(fifo, av_fifo_size(fifo) + decode_size) < 0) {
           av_log(NULL, AV_LOG_FATAL, "av_fifo_realloc2() failed\n");
           return -1;
       }
       av_fifo_generic_write(fifo, paudiobuf, decode_size, NULL);
   }

setup_array函数摘自ffmpeg例程
static void setup_array(uint8_t* out[SWR_CH_MAX], uint8_t* in, int format, int samples){
   if (av_sample_fmt_is_planar((AVSampleFormat)format)) {
       int i;
       int plane_size = av_get_bytes_per_sample((AVSampleFormat)(format & 0xFF)) * samples;
       format &= 0xFF;
       for (i = 0; i < SWR_CH_MAX; i++) {
           out[i] = in + i*plane_size;
       }
   } else {
       out[0] = in;
   }
}

step 3：进行编码
   //编码格式要求是分平面数据
   if (m_SwrCtx && ( av_sample_fmt_is_planar(pCodecCtx->sample_fmt) )) {
      //这里为简单示例，只判断第一个声道（因为左右声道数据大小是一致的），实际应用中应考虑每个声道具体情况
       while(av_fifo_size(m_fifo[0]) >= pCodecCtx->frame_size * sample_bytes){
           for (int i = 0; i < pCodecCtx->channels; i++) {
              //inputBuf是一块连续内存
               av_fifo_generic_read(m_fifo[i], inputBuf+i*pCodecCtx->frame_size * sample_bytes, pCodecCtx->frame_size * sample_bytes, NULL);
           }
           AVPacket pkt = {0};
           av_init_packet(&pkt);
           pkt.data = encodeBuf;
           pkt.size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
           int got_packet = 0;

           audioframe->nb_samples = pCodecCtx->frame_size;
           int samples_size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,
                                                      audioframe->nb_samples,
                                                      pCodecCtx->sample_fmt, 0);
           avcodec_fill_audio_frame(audioframe, pCodecCtx->channels, pCodecCtx->sample_fmt,
                   inputBuf, samples_size, 0);

           int ret = avcodec_encode_audio2(pCodecCtx, &pkt, audioframe, &got_packet);

           if (got_packet ) {
               //处理pkt
           }

       }

   } else {
   //不分平面
       while(av_fifo_size(fifo) >= frame_bytes) {
           av_fifo_generic_read(fifo, inputBuf, frame_bytes, NULL);

           AVPacket pkt = {0};
           av_init_packet(&pkt);
           pkt.data = encodeBuf;
           pkt.size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
           int got_packet = 0;

           audioframe->nb_samples = pCodecCtx->frame_size;
           int samples_size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,
                                                      audioframe->nb_samples,
                                                      pCodecCtx->sample_fmt, 0);

           avcodec_fill_audio_frame(audioframe, pCodecCtx->channels, pCodecCtx->sample_fmt,
                   inputBuf, samples_size, 0);

           int ret = avcodec_encode_audio2(pCodecCtx, &pkt, audioframe, &got_packet);

           if (got_packet ) {
               //处理pkt
           }

       }
   }


另：
对于解码也可能需要做swr_convert，比如做播放器，很多时候我们是将S16格式数据丢给声卡，而新版ffmpeg解码音频输出的格式可能不满足S16，如AAC解码后得到的是FLT（浮点型），AC3解码是FLTP（带平面）等，需要根据具体的情况决定是否需要convert，转换过程与上类似

windragon0419

关注

1
点赞
踩
6

收藏

觉得还不错? 一键收藏
9
评论
新版FFmpeg音频编解码处理

ffmpeg版本更新近一年跟打了鸡血一样，刷刷刷的往上升（貌似现在很多软件版本升级都相当快，而且都是大版本升级），应用软件也就算了，升就升吧，最多新版可能运行速度稍慢些，可API的版本升级太快，对开发来说，就不是一件让人愉快的事。用ffmpeg也有快两年的时间，前段时间将项目中的ffmpeg库从1.0升级到了2.1（老实说，这样升级API库风险太大），发现不少API发生了变化，以前项目中写的音
复制链接

扫一扫