FFmpeg从入门到牛掰（四）：音频编码(encode )讲解

最新推荐文章于 2024-08-20 12:01:35 发布

im-pingo

最新推荐文章于 2024-08-20 12:01:35 发布

阅读量2.6k

点赞数 3

分类专栏： ffmpeg 文章标签： ffmpeg

本文链接：https://blog.csdn.net/impingo/article/details/103794389

版权

ffmpeg 专栏收录该内容

7 篇文章 19 订阅

订阅专栏

转载请注明出处：https://blog.csdn.net/impingo
项目地址：https://github.com/im-pingo/pingos

流程描述

上篇文章【FFmpeg从入门到牛掰（三）：音频解码(decode)讲解】介绍了音频解码的过程，所谓解码就是将AAC、mp3这类音频压缩算法处理过的数据还原成pcm数据的过程，那么音频编码就是用AAC、mp3这类音频压缩算法处理pcm数据的过程。
在ffmpeg中，AVPacket用来保存一个编码后的数据，AVFrame结构用来保存pcm数据和yuv数据。

函数接口和流程

获得你所需的编解码器，可以使用以下两个函数，前者是通过AVCodecID（如AV_CODEC_ID_OPUS）获取ffmpeg支持的编解码器，后者是通过编解码器的名称（如：“libopus”）返回ffmpeg支持的编解码器。

AVCodec codec = avcodec_find_decoder(AV_CODEC_ID_OPUS);
//等价于
AVCodec codec = avcodec_find_encoder_by_name("libopus");

根据获得的AVCodec指针，创建编码器上下文指针，后续的编码操作都是围绕着这个指针进行操作。

AVCodecContext *c = avcodec_alloc_context3(codec);

设置编码器参数

c->bit_rate = 48000;

c->sample_fmt = AV_SAMPLE_FMT_S16;
if (!check_sample_fmt(codec, c->sample_fmt)) {
    fprintf(stderr, "Encoder does not support sample format %s",
            av_get_sample_fmt_name(c->sample_fmt));
    exit(1);
}

c->sample_rate    = select_sample_rate(codec);
c->channel_layout = select_channel_layout(codec);
c->channels       = av_get_channel_layout_nb_channels(c->channel_layout);

打开编码器

/*
* int avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options)
*
*/
AVDictionary* opts = NULL;
av_dict_set(&opts, "frame_duration", "10.0", 0);
if (avcodec_open2(c, codec, opts) < 0) {
    fprintf(stderr, "Could not open codec\n");
    av_dict_free(&opts);
    exit(1);
}
av_dict_free(&opts);

pcm编码opus的过程

/*
* int avcodec_send_frame(AVCodecContext *avctx, const AVFrame *frame)
* int avcodec_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
*/
static void encode(AVCodecContext *ctx, AVFrame *frame, AVPacket *pkt,
                   FILE *output)
{
    int ret;

    /* send the frame for encoding */
    ret = avcodec_send_frame(ctx, frame);
    if (ret < 0) {
        fprintf(stderr, "Error sending the frame to the encoder\n");
        exit(1);
    }

    /* read all the available output packets (in general there may be any
     * number of them */
    while (ret >= 0) {
        ret = avcodec_receive_packet(ctx, pkt);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
            return;
        else if (ret < 0) {
            fprintf(stderr, "Error encoding audio frame\n");
            exit(1);
        }

        fwrite(pkt->data, 1, pkt->size, output);
        av_packet_unref(pkt);
    }
}

还记得上篇文章里介绍音频解码的函数里有两个类似的函数吗

int avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt);
int avcodec_receive_frame(AVCodecContext *avctx, AVFrame *frame);

是的，这两个解码的时候通过avcodec_send_packet 和 avcodec_receive_frame函数将AVPacket（保存编码后的数据）转换成AVFrame（保存的是编码前的原始数据pcm）。
那么编码的过程就是通过调用avcodec_send_frame和avcodec_receive_packet函数将AVFrame转换成AVPacket。
前面的文章里已经讲解过如何通过AVFormatContext将AVPacket数据封装在容器（mp4、flv这类封装格式）里，后续我将写一篇完整的串讲用来介绍，解复用->解码->重采样->编码->转封装流程。

音频编码代码示例

/**
 * @file
 * audio encoding with libavcodec API example.
 *
 * @example encode_audio.c
 */

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

#include <libavcodec/avcodec.h>

#include <libavutil/channel_layout.h>
#include <libavutil/common.h>
#include <libavutil/frame.h>
#include <libavutil/samplefmt.h>

/* check that a given sample format is supported by the encoder */
static int check_sample_fmt(const AVCodec *codec, enum AVSampleFormat sample_fmt)
{
    const enum AVSampleFormat *p = codec->sample_fmts;

    while (*p != AV_SAMPLE_FMT_NONE) {
        if (*p == sample_fmt)
            return 1;
        p++;
    }
    return 0;
}

/* just pick the highest supported samplerate */
static int select_sample_rate(const AVCodec *codec)
{
    const int *p;
    int best_samplerate = 0;

    if (!codec->supported_samplerates)
        return 44100;

    p = codec->supported_samplerates;
    while (*p) {
        if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate))
            best_samplerate = *p;
        p++;
    }
    return best_samplerate;
}

/* select layout with the highest channel count */
static int select_channel_layout(const AVCodec *codec)
{
    const uint64_t *p;
    uint64_t best_ch_layout = 0;
    int best_nb_channels   = 0;

    if (!codec->channel_layouts)
        return AV_CH_LAYOUT_STEREO;

    p = codec->channel_layouts;
    while (*p) {
        int nb_channels = av_get_channel_layout_nb_channels(*p);

        if (nb_channels > best_nb_channels) {
            best_ch_layout    = *p;
            best_nb_channels = nb_channels;
        }
        p++;
    }
    return best_ch_layout;
}

static void encode(AVCodecContext *ctx, AVFrame *frame, AVPacket *pkt,
                   FILE *output)
{
    int ret;

    /* send the frame for encoding */
    ret = avcodec_send_frame(ctx, frame);
    if (ret < 0) {
        fprintf(stderr, "Error sending the frame to the encoder\n");
        exit(1);
    }

    /* read all the available output packets (in general there may be any
     * number of them */
    while (ret >= 0) {
        ret = avcodec_receive_packet(ctx, pkt);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
            return;
        else if (ret < 0) {
            fprintf(stderr, "Error encoding audio frame\n");
            exit(1);
        }

        fwrite(pkt->data, 1, pkt->size, output);
        av_packet_unref(pkt);
    }
}

int main(int argc, char **argv)
{
    const char *filename;
    const AVCodec *codec;
    AVCodecContext *c= NULL;
    AVFrame *frame;
    AVPacket *pkt;
    int i, j, k, ret;
    FILE *f;
    uint16_t *samples;
    float t, tincr;

    if (argc <= 1) {
        fprintf(stderr, "Usage: %s <output file>\n", argv[0]);
        return 0;
    }
    filename = argv[1];

    /* find the AAC encoder */
    codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
    if (!codec) {
        fprintf(stderr, "Codec not found\n");
        exit(1);
    }

    c = avcodec_alloc_context3(codec);
    if (!c) {
        fprintf(stderr, "Could not allocate audio codec context\n");
        exit(1);
    }

    /* put sample parameters */
    c->bit_rate = 64000;

    /* check that the encoder supports s16 pcm input */
    c->sample_fmt = AV_SAMPLE_FMT_S16;
    if (!check_sample_fmt(codec, c->sample_fmt)) {
        fprintf(stderr, "Encoder does not support sample format %s",
                av_get_sample_fmt_name(c->sample_fmt));
        exit(1);
    }

    /* select other audio parameters supported by the encoder */
    c->sample_rate    = select_sample_rate(codec);
    c->channel_layout = select_channel_layout(codec);
    c->channels       = av_get_channel_layout_nb_channels(c->channel_layout);

    /* open it */
    if (avcodec_open2(c, codec, NULL) < 0) {
        fprintf(stderr, "Could not open codec\n");
        exit(1);
    }

    f = fopen(filename, "wb");
    if (!f) {
        fprintf(stderr, "Could not open %s\n", filename);
        exit(1);
    }

    /* packet for holding encoded output */
    pkt = av_packet_alloc();
    if (!pkt) {
        fprintf(stderr, "could not allocate the packet\n");
        exit(1);
    }

    /* frame containing input raw audio */
    frame = av_frame_alloc();
    if (!frame) {
        fprintf(stderr, "Could not allocate audio frame\n");
        exit(1);
    }

    frame->nb_samples     = c->frame_size;
    frame->format         = c->sample_fmt;
    frame->channel_layout = c->channel_layout;

    /* allocate the data buffers */
    ret = av_frame_get_buffer(frame, 0);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate audio data buffers\n");
        exit(1);
    }

    /* 生成200个测试用的frame，每个frame都有frame_size */
    t = 0;
    tincr = 2 * M_PI * 440.0 / c->sample_rate;
    for (i = 0; i < 200; i++) {
        /* make sure the frame is writable -- makes a copy if the encoder
         * kept a reference internally */
        ret = av_frame_make_writable(frame);
        if (ret < 0)
            exit(1);
        samples = (uint16_t*)frame->data[0];

        for (j = 0; j < c->frame_size; j++) {
            samples[2*j] = (int)(sin(t) * 10000);

            for (k = 1; k < c->channels; k++)
                samples[2*j + k] = samples[2*j];
            t += tincr;
        }
        encode(c, frame, pkt, f);
    }

    /* flush the encoder */
    encode(c, NULL, pkt, f);

    fclose(f);

    av_frame_free(&frame);
    av_packet_free(&pkt);
    avcodec_free_context(&c);

    return 0;
}