FFmpeg从入门到牛掰（三）：音频解码(decode)讲解

最新推荐文章于 2024-08-01 07:49:38 发布

im-pingo

最新推荐文章于 2024-08-01 07:49:38 发布

阅读量2.4k

点赞数 2

分类专栏： ffmpeg 文章标签： ffmpeg

本文链接：https://blog.csdn.net/impingo/article/details/103774916

版权

ffmpeg 专栏收录该内容

7 篇文章 19 订阅

订阅专栏

转载请注明出处：https://blog.csdn.net/impingo
项目地址：https://github.com/im-pingo/pingos

PCM音频介绍

声音被拾音器采集到之后模拟信号转换成数字信号（PCM 全称是 Pulse-code-modulation），在转换过程中是以固定频率打点转换的，而每个点被称为一个采样。

采样率（Sample Rate）
采样频率单位：Hz。采样频率越高，音频质量越好，占用空间也越大，上段中已经提到这个概念。例如48000hz采样率，代表的含义就是一秒钟将会产生48000个采样数据，那么播放的时候也要在一秒钟以内将48000个采样数据匀速传递给声卡播放。
Sign :
音频数据是否是有符号的。通常情况下都是有符号的。若是将有符号的数据当做无符号的数据来处理将会使声音听来很刺
Sample Size :
表示每一个采样数据的大小。通常该值为16-bit。
ffmpeg提供了以下几种选项：

enum AVSampleFormat {
    AV_SAMPLE_FMT_NONE = -1,
    AV_SAMPLE_FMT_U8,          ///< unsigned 8 bits
    AV_SAMPLE_FMT_S16,         ///< signed 16 bits
    AV_SAMPLE_FMT_S32,         ///< signed 32 bits
    AV_SAMPLE_FMT_FLT,         ///< float
    AV_SAMPLE_FMT_DBL,         ///< double

    AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar
    AV_SAMPLE_FMT_S16P,        ///< signed 16 bits, planar
    AV_SAMPLE_FMT_S32P,        ///< signed 32 bits, planar
    AV_SAMPLE_FMT_FLTP,        ///< float, planar
    AV_SAMPLE_FMT_DBLP,        ///< double, planar
    AV_SAMPLE_FMT_S64,         ///< signed 64 bits
    AV_SAMPLE_FMT_S64P,        ///< signed 64 bits, planar

    AV_SAMPLE_FMT_NB           ///< Number of sample formats. DO NOT USE if linking dynamically
};

Byte Ordering :
字节序指的是little-endian还是big-endian。表示音频数据的存储字节序。通常均为little-endian。
Number of Channels :
标识音频是单声道(mono，1 channel)还是立体声(stereo，2 channels)。

通过以上五个数据我们就可以描述一个PCM数据，播放一个PCM数据需要的就是以上五个数据。

音频编码介绍

通过上面了解到声音信号数字化之后是pcm数据，那么一秒钟将会有多少音频数据产生呢，以48000采样的音频编码为例：

数据长度 = 单个采样大小 * 声道数 * 48000

由此可见pcm的数据量还是非常大的，为了降低数据空间，就需要音频编码算法的协助（对pcm数据压缩），常见的音频编码算法有aac、mp3和opus。
此篇文章始终以aac编码为例进行讲解，aac编码会将1024个采样数据输入给编码器压缩，生成一个aac包，在ffmpeg中可通过结构体AVCodecContext 中的frame_size来指定。
所谓音频解码，就是将aac这类压缩过的数据还原成pcm采样数据的过程。

ffmpeg 函数接口

AVCodec *avcodec_find_decoder(enum AVCodecID id);

AVCodecContext *avcodec_alloc_context3(const AVCodec *codec);

从之前的文章中已经知道，对媒体文件解封装需要创建一个AVFormatContext上下文指针。在解码操作中也是同样的思路，只不过解码需要AVCodecContext 解码上下文指针。
其中AVCodec编码器指针可通过avcodec_find_decoder函数查询获取，每种音频编码都对应一个AVCodec，例如aac编码的AVCodecID就是AV_CODEC_ID_AAC。除了这个函数之外你也可以使用AVCodec *avcodec_find_encoder_by_name(const char *name);函数获取解码器，如
avcodec_find_decoder_by_name(“libopus”);就可以获取到 libopus 解码器。

int avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options);

初始化编码器，一定要调用这个函数

int avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt);
int avcodec_receive_frame(AVCodecContext *avctx, AVFrame *frame);

这两个函数要搭配着使用，将AVPacket数据传递给avcodec_send_packet函数，然后从avcodec_receive_frame函数中获取解码后的数据（保存在frame中）

int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt);

这个函数用来返回每个采样有几个字节长度

示例代码

/**
 * @file
 * audio decoding with libavcodec API example
 *
 * @example decode_audio.c
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <libavutil/frame.h>
#include <libavutil/mem.h>

#include <libavcodec/avcodec.h>

#define AUDIO_INBUF_SIZE 20480
#define AUDIO_REFILL_THRESH 4096

static int get_format_from_sample_fmt(const char **fmt,
                                      enum AVSampleFormat sample_fmt)
{
    int i;
    struct sample_fmt_entry {
        enum AVSampleFormat sample_fmt; const char *fmt_be, *fmt_le;
    } sample_fmt_entries[] = {
        { AV_SAMPLE_FMT_U8,  "u8",    "u8"    },
        { AV_SAMPLE_FMT_S16, "s16be", "s16le" },
        { AV_SAMPLE_FMT_S32, "s32be", "s32le" },
        { AV_SAMPLE_FMT_FLT, "f32be", "f32le" },
        { AV_SAMPLE_FMT_DBL, "f64be", "f64le" },
    };
    *fmt = NULL;

    for (i = 0; i < FF_ARRAY_ELEMS(sample_fmt_entries); i++) {
        struct sample_fmt_entry *entry = &sample_fmt_entries[i];
        if (sample_fmt == entry->sample_fmt) {
            *fmt = AV_NE(entry->fmt_be, entry->fmt_le);
            return 0;
        }
    }

    fprintf(stderr,
            "sample format %s is not supported as output format\n",
            av_get_sample_fmt_name(sample_fmt));
    return -1;
}

static void decode(AVCodecContext *dec_ctx, AVPacket *pkt, AVFrame *frame,
                   FILE *outfile)
{
    int i, ch;
    int ret, data_size;

    /* send the packet with the compressed data to the decoder */
    ret = avcodec_send_packet(dec_ctx, pkt);
    if (ret < 0) {
        fprintf(stderr, "Error submitting the packet to the decoder\n");
        exit(1);
    }

    /* read all the output frames (in general there may be any number of them */
    while (ret >= 0) {
        ret = avcodec_receive_frame(dec_ctx, frame);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
            return;
        else if (ret < 0) {
            fprintf(stderr, "Error during decoding\n");
            exit(1);
        }
        data_size = av_get_bytes_per_sample(dec_ctx->sample_fmt);
        if (data_size < 0) {
            /* This should not occur, checking just for paranoia */
            fprintf(stderr, "Failed to calculate data size\n");
            exit(1);
        }
        for (i = 0; i < frame->nb_samples; i++)
            for (ch = 0; ch < dec_ctx->channels; ch++)
                fwrite(frame->data[ch] + data_size*i, 1, data_size, outfile);
    }
}

int main(int argc, char **argv)
{
    const char *outfilename, *filename;
    const AVCodec *codec;
    AVCodecContext *c= NULL;
    AVCodecParserContext *parser = NULL;
    int len, ret;
    FILE *f, *outfile;
    uint8_t inbuf[AUDIO_INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
    uint8_t *data;
    size_t   data_size;
    AVPacket *pkt;
    AVFrame *decoded_frame = NULL;
    enum AVSampleFormat sfmt;
    int n_channels = 0;
    const char *fmt;

    if (argc <= 2) {
        fprintf(stderr, "Usage: %s <input file> <output file>\n", argv[0]);
        exit(0);
    }
    filename    = argv[1];
    outfilename = argv[2];

    pkt = av_packet_alloc();

    /* find the MPEG audio decoder */
    codec = avcodec_find_decoder(AV_CODEC_ID_MP2);
    if (!codec) {
        fprintf(stderr, "Codec not found\n");
        exit(1);
    }

    parser = av_parser_init(codec->id);
    if (!parser) {
        fprintf(stderr, "Parser not found\n");
        exit(1);
    }

    c = avcodec_alloc_context3(codec);
    if (!c) {
        fprintf(stderr, "Could not allocate audio codec context\n");
        exit(1);
    }

    /* open it */
    if (avcodec_open2(c, codec, NULL) < 0) {
        fprintf(stderr, "Could not open codec\n");
        exit(1);
    }

    f = fopen(filename, "rb");
    if (!f) {
        fprintf(stderr, "Could not open %s\n", filename);
        exit(1);
    }
    outfile = fopen(outfilename, "wb");
    if (!outfile) {
        av_free(c);
        exit(1);
    }

    /* decode until eof */
    data      = inbuf;
    data_size = fread(inbuf, 1, AUDIO_INBUF_SIZE, f);

    while (data_size > 0) {
        if (!decoded_frame) {
            if (!(decoded_frame = av_frame_alloc())) {
                fprintf(stderr, "Could not allocate audio frame\n");
                exit(1);
            }
        }

        ret = av_parser_parse2(parser, c, &pkt->data, &pkt->size,
                               data, data_size,
                               AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
        if (ret < 0) {
            fprintf(stderr, "Error while parsing\n");
            exit(1);
        }
        data      += ret;
        data_size -= ret;

        if (pkt->size)
            decode(c, pkt, decoded_frame, outfile);

        if (data_size < AUDIO_REFILL_THRESH) {
            memmove(inbuf, data, data_size);
            data = inbuf;
            len = fread(data + data_size, 1,
                        AUDIO_INBUF_SIZE - data_size, f);
            if (len > 0)
                data_size += len;
        }
    }

    /* flush the decoder */
    pkt->data = NULL;
    pkt->size = 0;
    decode(c, pkt, decoded_frame, outfile);

    /* print output pcm infomations, because there have no metadata of pcm */
    sfmt = c->sample_fmt;

    if (av_sample_fmt_is_planar(sfmt)) {
        const char *packed = av_get_sample_fmt_name(sfmt);
        printf("Warning: the sample format the decoder produced is planar "
               "(%s). This example will output the first channel only.\n",
               packed ? packed : "?");
        sfmt = av_get_packed_sample_fmt(sfmt);
    }

    n_channels = c->channels;
    if ((ret = get_format_from_sample_fmt(&fmt, sfmt)) < 0)
        goto end;

    printf("Play the output audio file with the command:\n"
           "ffplay -f %s -ac %d -ar %d %s\n",
           fmt, n_channels, c->sample_rate,
           outfilename);
end:
    fclose(outfile);
    fclose(f);

    avcodec_free_context(&c);
    av_parser_close(parser);
    av_frame_free(&decoded_frame);
    av_packet_free(&pkt);

    return 0;
}