ffmpeg api实现aac语音文件转pcm

feifagehao

已于 2023-08-23 10:32:51 修改

阅读量1.5k

点赞数

分类专栏：音视频文章标签：语音识别

于 2022-07-07 17:56:58 首次发布

本文链接：https://blog.csdn.net/feifagehao/article/details/125663004

版权

音视频专栏收录该内容

3 篇文章 0 订阅

订阅专栏

主要函数介绍

1，avformat_open_input

int avformat_open_input    (AVFormatContext ** ps,
                         const char *         url,
                         AVInputFormat *     fmt,
                         AVDictionary **     options 
)        
打开输入媒体流，读取头部信息
输入流编解码完成后需要使用函数avformat_close_input()关闭输入流；

参数
ps         AVFormatContext 的指针，通过此函数将媒体相关信息写入指针；
url        输入媒体路径
fmt        输入媒体格式，可以指定为NULL.
options    默认可设置为NULL

2，avformat_find_stream_info

int avformat_find_stream_info	(AVFormatContext * 	ic,
                                 AVDictionary ** 	options)		
从媒体文件中读取媒体流，获取媒体信息.

参数
ic	    媒体文件上下文句柄
options	默认填充NULL

3，av_find_best_stream

int av_find_best_stream	(	AVFormatContext * 	ic,
							enum AVMediaType 	type,
							int 	wanted_stream_nb,
							int 	related_stream,
							AVCodec ** 	decoder_ret,
							int 	flags 
							)		
从媒体文件中找到“最佳”流。

参数
ic               媒体文件句柄
type             流类型：视频、音频、字幕等
Wanted_stream_nb 用户请求的流号，或 -1 用于自动选择
related_stream   尝试查找与此相关的流（例如，在同一程序中），如果没有，则为 -1
decoder_ret      如果非 NULL，则返回所选流的解码器
flag             目前没有定义

4，avcodec_find_decoder

AVCodec* avcodec_find_decoder	(enum AVCodecID 	id)	
	
根据解码器id查找解码器

参数
id   解码器id

5，avcodec_alloc_context3

AVCodecContext* avcodec_alloc_context3	(const AVCodec * 	codec)	
为AVCodecContext结构体分配内存并设置默认值，
结果返回的结构体需要使用avcodec_free_context()进行释放内存

参数
codec	如果非 NULL，则分配私有数据并初始化给定编解码器的默认值。 如果使用不同的编解码器调用 avcodec_open2() 是非法的。
        如果为 NULL，则不会初始化编解码器特定的默认值，这可能会导致不理想的默认设置（这主要对编码器很重要，例如 libx264）。

解码流程：

avformat_open_input()：打开音频文件，获取里面的内容(解封装)
avformat_find_stream_info()：获取语音信息
av_find_best_stream()：查找语音流索引
avcodec_find_decoder()：寻找解码器
avcodec_alloc_context3()：根据解码器申请解码器相关上下文
avcodec_open2()：打开解码器
av_read_frame()：从语音文件中读取语音帧
avcodec_send_packet()：发送一帧音频给解码器。
avcodec_receive_frame()：接收解码器解码后的一帧音频(AVFrame)
swr_convert()：语音重采样

代码：

#include <iostream>
#include <string>

extern "C"{
#include "libavutil/samplefmt.h"
#include "libavutil/timestamp.h"
#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
#include "libswresample/swresample.h"

}

using namespace std;

string filename         = "test.aac";
string dst_filename     = "test.wav";
FILE   *audio_dst_file  = NULL;
AVFormatContext *fmt_ctx   = NULL;
AVCodecContext  *codec_ctx = NULL;
int audio_stream_idx           = -1;
AVFrame *decoded_frame         = NULL;
int audio_frame_count = 0;
SwrContext *actx  = NULL;
AVPacket *pPacket = NULL;
int got_frame;
    
int main()
{
    cout<<"ffmpeg"<<endl;
    AVCodec *dec                   = NULL;
    AVStream *audio_stream         = NULL;
    
	//打开输入文件
    if (avformat_open_input(&fmt_ctx, filename.c_str(), NULL, NULL) < 0) {
        fprintf(stderr, "Could not open source file %s\n", filename.c_str());
        exit(1);
    }
	//获取语音信息
    if (avformat_find_stream_info(fmt_ctx, NULL) < 0) {
        fprintf(stderr, "Could not find stream information\n");
        exit(1);
    }
    
    //打印输入语音文件信息
    av_dump_format(fmt_ctx, 0, filename.c_str(), 0);
    
	//获取语音流索引
    int ret = av_find_best_stream(fmt_ctx,  AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
    if (ret < 0) 
    {
        fprintf(stderr, "Could not find %s stream in input file '%s'\n",
                av_get_media_type_string( AVMEDIA_TYPE_AUDIO), filename.c_str());
        return ret;
    }
    
    audio_stream_idx = ret;
    audio_stream = fmt_ctx->streams[audio_stream_idx];
    //查找解码器
    dec = avcodec_find_decoder(audio_stream->codecpar->codec_id);
    if (!dec) 
    {
        fprintf(stderr, "Failed to find %s codec\n",
                av_get_media_type_string(AVMEDIA_TYPE_AUDIO));
        return AVERROR(EINVAL);
    }
    /* Allocate a codec context for the decoder */
    codec_ctx = avcodec_alloc_context3(dec);
    if (!codec_ctx) 
    {
        fprintf(stderr, "Failed to allocate the %s codec context\n",
                av_get_media_type_string(AVMEDIA_TYPE_AUDIO));
        return AVERROR(ENOMEM);
    }
    /* Copy codec parameters from input stream to output codec context */
    if ((ret = avcodec_parameters_to_context(codec_ctx, audio_stream->codecpar)) < 0) 
    {
        fprintf(stderr, "Failed to copy %s codec parameters to decoder context\n",
                av_get_media_type_string(AVMEDIA_TYPE_AUDIO));
        return ret;
    }
    if ((ret = avcodec_open2(codec_ctx, dec, NULL)) < 0) {
        fprintf(stderr, "Failed to open %s codec\n",
                av_get_media_type_string(AVMEDIA_TYPE_AUDIO));
        return ret;
    }
    audio_dst_file = fopen(dst_filename.c_str(), "wb");
    if (!audio_dst_file) {
        fprintf(stderr, "Could not open destination file %s\n", dst_filename.c_str());
        ret = 1;
        return ret;
    }
        
    
    if (!audio_stream) {
        fprintf(stderr, "Could not find audio stream in the input, aborting\n");
        ret = 1;
        return ret;
    }
    
    decoded_frame = av_frame_alloc();
    if (!decoded_frame) {
        fprintf(stderr, "Could not allocate frame\n");
        ret = AVERROR(ENOMEM);
        return -1;
    }
    
    printf("channel layout=%d,frame_size=%d\n",audio_stream->codecpar->channel_layout,audio_stream->codecpar->frame_size);
    actx = swr_alloc();
    actx = swr_alloc_set_opts(
            actx,
            av_get_default_channel_layout(1),
            AV_SAMPLE_FMT_S16,
            8000,
            av_get_default_channel_layout(codec_ctx->channels),
            codec_ctx->sample_fmt,
            codec_ctx->sample_rate,
            0, 0);
    ret = swr_init(actx);
    if (ret != 0) {
        printf("swr_init failed\n");
    } else {
        printf("swr_init success\n");
    }

    pPacket  = av_packet_alloc();
    uint8_t *output=(uint8_t *) av_malloc(16000);
    while(av_read_frame(fmt_ctx, pPacket) >= 0)
    {
        ret = avcodec_send_packet(codec_ctx, pPacket);
        if (ret < 0 || ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
            std::cout << "avcodec_send_packet: " << ret << std::endl;
            break;
        }
        while (ret  >= 0) {
            ret = avcodec_receive_frame(codec_ctx, decoded_frame);
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
                //std::cout << "avcodec_receive_frame: " << ret << std::endl;
                break;
            }
            std::cout << "frame: " << codec_ctx->frame_number << std::endl;
            int out_samples = swr_convert(actx, &output, 8000,
                                     (const uint8_t **)&decoded_frame->data, decoded_frame->nb_samples);
            printf("decoded_frame->nb_samples=%d\n",decoded_frame->nb_samples);
            int dst_bufsize = av_samples_get_buffer_size(NULL, 1,out_samples, AV_SAMPLE_FMT_S16, 1);
            printf("dst_bufsize=%d\n",dst_bufsize);

            fwrite(output, 1, dst_bufsize, audio_dst_file);
        }
        av_packet_unref(pPacket);
    }
     av_packet_unref(pPacket);
    printf("Demuxing succeeded.\n");
    av_free(output);
    avcodec_free_context(&codec_ctx);
    avformat_close_input(&fmt_ctx);
    if (audio_dst_file)
        fclose(audio_dst_file);
    av_frame_free(&decoded_frame);
    av_packet_free(&pPacket);
    swr_free(&actx);
    return 0;
}