学习就是要学习基础,学习原理。
所以大家想学习下面的内容,最好看下下面几篇我写的文章,然后再看下面的内容就会so easy.
https://blog.csdn.net/qq_15255121/article/details/115168456
https://blog.csdn.net/qq_15255121/article/details/115301516
https://blog.csdn.net/qq_15255121/article/details/115348454
抽取音频数据
音频最重要的三个元素 采样率 采样大小(位深) 通道数
AVPacket
AVPacket使用完要进行释放
涉及到四个函数
av_init_packet(<AVPacket *pkt)
av_packet_unref(AVPacket *pkt)
av_packet_alloc() 先分配空间,再进行初始化
av_packet_free(AVPacket **pkt) 内部实现先av_packet_unref,再释放空间
av_init_packet和av_packet_unref是一对
av_packet_alloc和av_packet_free是一对。
要成对出现,不然会内存泄漏
查找流
av_find_best_stream
av_read_frame 读取数据到AVPacket
接下来我们实现一个功能从mp4当中提取出aac数据
我们简单说明下怎样提取出aac数据。
- avformat_open_input获取到AVFormatContext
- 我们从要提取音频数据的多媒体文件中,找到音频流的索引,通过av_find_best_stream
- 音频数据最重要的就是采样率,位深,通道,编码格式。我们通过音频流的编码结构体AVCodecParameters进行获取
- 通过av_read_frame获取数据存到AVPacket当中
- 如果当前的AVPacket是索引是音频数据的索引那么我们继续下面的步骤,如果不是回到4
- 我们知道aac数据分为header和data两部分,header分为ADTS和ADIF.具体解析查看我前面的讲解https://blog.csdn.net/qq_15255121/article/details/115348454。接下来我们根据3部获取到的信息构建header我们这里构建ADTS头,构建成功后,写入aac文件
- 将AVPacket当中的data数据写入aac文件
- 写完后要将AVPacket释放
- 现在回到4继续读取数据,如果读不到数据,执行10
- 释放资源
大体流程就是这样,接下来我们讲解如何获取音频的重要参数采样率,位深,通道,编码格式,并构建ADTS头
获取参数:
当找到音频流索引后,我们通过下面方式进行获取
int aac_type = fmt_ctx->streams[audio_stream_index]->codecpar->profile;
int channels = fmt_ctx->streams[audio_stream_index]->codecpar->channels;
int sample_rate = fmt_ctx->streams[audio_stream_index]->codecpar->sample_rate;
我们知道AAC的头的格式
Structure
AAAAAAAA AAAABCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP (QQQQQQQQ QQQQQQQQ)
Header consists of 7 or 9 bytes (without or with CRC).
Letter Length (bits) Description
A 12 syncword 0xFFF, all bits must be 1
B 1 MPEG Version: 0 for MPEG-4, 1 for MPEG-2
C 2 Layer: always 0
D 1 protection absent, Warning, set to 1 if there is no CRC and 0 if there is CRC
E 2 profile, the MPEG-4 Audio Object Type minus 1
F 4 MPEG-4 Sampling Frequency Index (15 is forbidden)
G 1 private stream, set to 0 when encoding, ignore when decoding
H 3 MPEG-4 Channel Configuration (in the case of 0, the channel configuration is sent via an inband PCE)
I 1 originality, set to 0 when encoding, ignore when decoding
J 1 home, set to 0 when encoding, ignore when decoding
K 1 copyrighted stream, set to 0 when encoding, ignore when decoding
L 1 copyright start, set to 0 when encoding, ignore when decoding
M 13 frame length, this value must include 7 or 9 bytes of header length: FrameLength = (ProtectionAbsent == 1 ? 7 : 9) + size(AACFrame)
O 11 Buffer fullness
P 2 Number of AAC frames (RDBs) in ADTS frame minus 1, for maximum compatibility always use 1 AAC frame per ADTS frame
Q 16 CRC if protection absent is 0
接下来我们进行按照上面的说明进行构建
我们要知道ffmpeg当中对aacprofile的定义
官方文档对audio_obj_type的定义
所以也就有了下面profile到audio_obj_type的转化
static int get_audio_obj_type(int aactype)
{
//AAC HE V2 = AAC LC + SBR + PS
//AAV HE = AAC LC + SBR
//所以无论是 AAC_HEv2 还是 AAC_HE 都是 AAC_LC
switch (aactype)
{
case 0:
case 2:
case 3:
return aactype + 1;
case 1:
case 4:
case 28:
return 2;
default:
return 2;
}
}
static int get_sample_rate_index(int freq, int aactype)
{
int i = 0;
int freq_arr[13] = {
96000, 88200, 64000, 48000, 44100, 32000,
24000, 22050, 16000, 12000, 11025, 8000, 7350};
//如果是 AAC HEv2 或 AAC HE, 则频率减半
if (aactype == 28 || aactype == 4)
{
freq /= 2;
}
for (i = 0; i < 13; i++)
{
if (freq == freq_arr[i])
{
return i;
}
}
return 4; //默认是44100
}
static int get_channel_config(int channels, int aactype)
{
//如果是 AAC HEv2 通道数减半
if (aactype == 28)
{
return (channels / 2);
}
return channels;
}
static void adts_header(char *szAdtsHeader, int dataLen, int aactype, int frequency, int channels)
{
int audio_object_type = get_audio_obj_type(aactype);
int sampling_frequency_index = get_sample_rate_index(frequency, aactype);
int channel_config = get_channel_config(channels, aactype);
printf("aot=%d, freq_index=%d, channel=%d\n", audio_object_type, sampling_frequency_index, channel_config);
int adtsLen = dataLen + 7;
szAdtsHeader[0] = 0xff; //syncword:0xfff 高8bits
szAdtsHeader[1] = 0xf0; //syncword:0xfff 低4bits
szAdtsHeader[1] |= (0 << 3); //MPEG Version:0 for MPEG-4,1 for MPEG-2 1bit
szAdtsHeader[1] |= (0 << 1); //Layer:0 2bits
szAdtsHeader[1] |= 1; //protection absent:1 1bit
szAdtsHeader[2] = (audio_object_type - 1) << 6; //profile:audio_object_type - 1 2bits
szAdtsHeader[2] |= (sampling_frequency_index & 0x0f) << 2; //sampling frequency index:sampling_frequency_index 4bits
szAdtsHeader[2] |= (0 << 1); //private bit:0 1bit
szAdtsHeader[2] |= (channel_config & 0x04) >> 2; //channel configuration:channel_config 高1bit
szAdtsHeader[3] = (channel_config & 0x03) << 6; //channel configuration:channel_config 低2bits
szAdtsHeader[3] |= (0 << 5); //original:0 1bit
szAdtsHeader[3] |= (0 << 4); //home:0 1bit
szAdtsHeader[3] |= (0 << 3); //copyright id bit:0 1bit
szAdtsHeader[3] |= (0 << 2); //copyright id start:0 1bit
szAdtsHeader[3] |= ((adtsLen & 0x1800) >> 11); //frame length:value 高2bits
szAdtsHeader[4] = (uint8_t)((adtsLen & 0x7f8) >> 3); //frame length:value 中间8bits
szAdtsHeader[5] = (uint8_t)((adtsLen & 0x7) << 5); //frame length:value 低3bits
szAdtsHeader[5] |= 0x1f; //buffer fullness:0x7ff 高5bits
szAdtsHeader[6] = 0xfc;
}
下面是完整代码,来自李超老师提供的demo。
/**
* extract audio from media file
*
* copyright lichao 2020.4.10
*/
#include <stdio.h>
#include <libavutil/log.h>
#include <libavformat/avio.h>
#include <libavformat/avformat.h>
#define ADTS_HEADER_LEN 7;
static int get_audio_obj_type(int aactype)
{
//AAC HE V2 = AAC LC + SBR + PS
//AAV HE = AAC LC + SBR
//所以无论是 AAC_HEv2 还是 AAC_HE 都是 AAC_LC
switch (aactype)
{
case 0:
case 2:
case 3:
return aactype + 1;
case 1:
case 4:
case 28:
return 2;
default:
return 2;
}
}
static int get_sample_rate_index(int freq, int aactype)
{
int i = 0;
int freq_arr[13] = {
96000, 88200, 64000, 48000, 44100, 32000,
24000, 22050, 16000, 12000, 11025, 8000, 7350};
//如果是 AAC HEv2 或 AAC HE, 则频率减半
if (aactype == 28 || aactype == 4)
{
freq /= 2;
}
for (i = 0; i < 13; i++)
{
if (freq == freq_arr[i])
{
return i;
}
}
return 4; //默认是44100
}
static int get_channel_config(int channels, int aactype)
{
//如果是 AAC HEv2 通道数减半
if (aactype == 28)
{
return (channels / 2);
}
return channels;
}
static void adts_header(char *szAdtsHeader, int dataLen, int aactype, int frequency, int channels)
{
int audio_object_type = get_audio_obj_type(aactype);
int sampling_frequency_index = get_sample_rate_index(frequency, aactype);
int channel_config = get_channel_config(channels, aactype);
printf("aot=%d, freq_index=%d, channel=%d\n", audio_object_type, sampling_frequency_index, channel_config);
int adtsLen = dataLen + 7;
szAdtsHeader[0] = 0xff; //syncword:0xfff 高8bits
szAdtsHeader[1] = 0xf0; //syncword:0xfff 低4bits
szAdtsHeader[1] |= (0 << 3); //MPEG Version:0 for MPEG-4,1 for MPEG-2 1bit
szAdtsHeader[1] |= (0 << 1); //Layer:0 2bits
szAdtsHeader[1] |= 1; //protection absent:1 1bit
szAdtsHeader[2] = (audio_object_type - 1) << 6; //profile:audio_object_type - 1 2bits
szAdtsHeader[2] |= (sampling_frequency_index & 0x0f) << 2; //sampling frequency index:sampling_frequency_index 4bits
szAdtsHeader[2] |= (0 << 1); //private bit:0 1bit
szAdtsHeader[2] |= (channel_config & 0x04) >> 2; //channel configuration:channel_config 高1bit
szAdtsHeader[3] = (channel_config & 0x03) << 6; //channel configuration:channel_config 低2bits
szAdtsHeader[3] |= (0 << 5); //original:0 1bit
szAdtsHeader[3] |= (0 << 4); //home:0 1bit
szAdtsHeader[3] |= (0 << 3); //copyright id bit:0 1bit
szAdtsHeader[3] |= (0 << 2); //copyright id start:0 1bit
szAdtsHeader[3] |= ((adtsLen & 0x1800) >> 11); //frame length:value 高2bits
szAdtsHeader[4] = (uint8_t)((adtsLen & 0x7f8) >> 3); //frame length:value 中间8bits
szAdtsHeader[5] = (uint8_t)((adtsLen & 0x7) << 5); //frame length:value 低3bits
szAdtsHeader[5] |= 0x1f; //buffer fullness:0x7ff 高5bits
szAdtsHeader[6] = 0xfc;
}
int main(int argc, char *argv[])
{
int err_code;
char errors[1024];
char *src_filename = NULL;
char *dst_filename = NULL;
FILE *dst_fd = NULL;
int audio_stream_index = -1;
int len;
AVFormatContext *ofmt_ctx = NULL;
AVOutputFormat *output_fmt = NULL;
AVStream *out_stream = NULL;
AVFormatContext *fmt_ctx = NULL;
AVFrame *frame = NULL;
AVPacket pkt;
av_log_set_level(AV_LOG_DEBUG);
// if (argc < 3)
// {
// av_log(NULL, AV_LOG_DEBUG, "the count of parameters should be more than three!\n");
// return -1;
// }
// src_filename = argv[1];
// dst_filename = argv[2];
src_filename = "/Users/yuanxuzhen/study/mac/ffmpeg_demo/output/sync_714251_3900875.mp4";
dst_filename = "/Users/yuanxuzhen/study/mac/ffmpeg_demo/output/test.aac";
if (src_filename == NULL || dst_filename == NULL)
{
av_log(NULL, AV_LOG_DEBUG, "src or dts file is null, plz check them!\n");
return -1;
}
dst_fd = fopen(dst_filename, "wb");
if (!dst_fd)
{
av_log(NULL, AV_LOG_DEBUG, "Could not open destination file %s\n", dst_filename);
return -1;
}
/*open input media file, and allocate format context*/
if ((err_code = avformat_open_input(&fmt_ctx, src_filename, NULL, NULL)) < 0)
{
av_strerror(err_code, errors, 1024);
av_log(NULL, AV_LOG_DEBUG, "Could not open source file: %s, %d(%s)\n",
src_filename,
err_code,
errors);
return -1;
}
/*retrieve audio stream*/
if ((err_code = avformat_find_stream_info(fmt_ctx, NULL)) < 0)
{
av_strerror(err_code, errors, 1024);
av_log(NULL, AV_LOG_DEBUG, "failed to find stream information: %s, %d(%s)\n",
src_filename,
err_code,
errors);
return -1;
}
/*dump input information*/
av_dump_format(fmt_ctx, 0, src_filename, 0);
frame = av_frame_alloc();
if (!frame)
{
av_log(NULL, AV_LOG_DEBUG, "Could not allocate frame\n");
return AVERROR(ENOMEM);
}
/*initialize packet*/
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
/*find best audio stream*/
audio_stream_index = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
if (audio_stream_index < 0)
{
av_log(NULL, AV_LOG_DEBUG, "Could not find %s stream in input file %s\n",
av_get_media_type_string(AVMEDIA_TYPE_AUDIO),
src_filename);
return AVERROR(EINVAL);
}
/*
#define FF_PROFILE_AAC_MAIN 0
#define FF_PROFILE_AAC_LOW 1
#define FF_PROFILE_AAC_SSR 2
#define FF_PROFILE_AAC_LTP 3
#define FF_PROFILE_AAC_HE 4
#define FF_PROFILE_AAC_HE_V2 28
#define FF_PROFILE_AAC_LD 22
#define FF_PROFILE_AAC_ELD 38
#define FF_PROFILE_MPEG2_AAC_LOW 128
#define FF_PROFILE_MPEG2_AAC_HE 131
*/
int aac_type = fmt_ctx->streams[audio_stream_index]->codecpar->profile;
int channels = fmt_ctx->streams[1]->codecpar->channels;
int sample_rate = fmt_ctx->streams[1]->codecpar->sample_rate;
if (fmt_ctx->streams[1]->codecpar->codec_id != AV_CODEC_ID_AAC)
{
av_log(NULL, AV_LOG_ERROR, "the audio type is not AAC!\n");
goto __ERROR;
}
else
{
av_log(NULL, AV_LOG_INFO, "the audio type is AAC!\n");
}
/*read frames from media file*/
while (av_read_frame(fmt_ctx, &pkt) >= 0)
{
if (pkt.stream_index == audio_stream_index)
{
char adts_header_buf[7];
adts_header(adts_header_buf, pkt.size, aac_type, sample_rate, channels);
fwrite(adts_header_buf, 1, 7, dst_fd);
len = fwrite(pkt.data, 1, pkt.size, dst_fd);
if (len != pkt.size)
{
av_log(NULL, AV_LOG_DEBUG, "warning, length of writed data isn't equal pkt.size(%d, %d)\n",
len,
pkt.size);
}
}
av_packet_unref(&pkt);
}
__ERROR:
/*close input media file*/
avformat_close_input(&fmt_ctx);
if (dst_fd)
{
fclose(dst_fd);
}
return 0;
}
提取出数据后,我们使用ffplay xxxx.aac进行播放。
提取完aac数据后,接下来几篇。
我们谈下如何从aac到pcm?如何将pcm转成aac?如何采集音频数据再转成pcm再转成aac?
不停的复习,不停地举一反三,不停的复盘总结才能真正学习会一门新技能。
用慢对抗快;
用体系对抗碎片;
用原理对抗招式。
一起共勉!!!!!加油!!!!!!!