转载请注明出处:https://blog.csdn.net/impingo
项目地址:https://github.com/im-pingo/pingos
PCM音频介绍
声音被拾音器采集到之后模拟信号转换成数字信号(PCM 全称是 Pulse-code-modulation),在转换过程中是以固定频率打点转换的,而每个点被称为一个采样。
-
采样率(Sample Rate)
采样频率单位:Hz。采样频率越高,音频质量越好,占用空间也越大,上段中已经提到这个概念。例如48000hz采样率,代表的含义就是一秒钟将会产生48000个采样数据,那么播放的时候也要在一秒钟以内将48000个采样数据匀速传递给声卡播放。 -
Sign :
音频数据是否是有符号的。通常情况下都是有符号的。若是将有符号的数据当做无符号的数据来处理将会使声音听来很刺 -
Sample Size :
表示每一个采样数据的大小。通常该值为16-bit。
ffmpeg提供了以下几种选项:
enum AVSampleFormat {
AV_SAMPLE_FMT_NONE = -1,
AV_SAMPLE_FMT_U8, ///< unsigned 8 bits
AV_SAMPLE_FMT_S16, ///< signed 16 bits
AV_SAMPLE_FMT_S32, ///< signed 32 bits
AV_SAMPLE_FMT_FLT, ///< float
AV_SAMPLE_FMT_DBL, ///< double
AV_SAMPLE_FMT_U8P, ///< unsigned 8 bits, planar
AV_SAMPLE_FMT_S16P, ///< signed 16 bits, planar
AV_SAMPLE_FMT_S32P, ///< signed 32 bits, planar
AV_SAMPLE_FMT_FLTP, ///< float, planar
AV_SAMPLE_FMT_DBLP, ///< double, planar
AV_SAMPLE_FMT_S64, ///< signed 64 bits
AV_SAMPLE_FMT_S64P, ///< signed 64 bits, planar
AV_SAMPLE_FMT_NB ///< Number of sample formats. DO NOT USE if linking dynamically
};
-
Byte Ordering :
字节序指的是little-endian还是big-endian。表示音频数据的存储字节序。通常均为little-endian。 -
Number of Channels :
标识音频是单声道(mono,1 channel)还是立体声(stereo,2 channels)。
通过以上五个数据我们就可以描述一个PCM数据,播放一个PCM数据需要的就是以上五个数据。
音频编码介绍
通过上面了解到声音信号数字化之后是pcm数据,那么一秒钟将会有多少音频数据产生呢,以48000采样的音频编码为例:
数据长度 = 单个采样大小 * 声道数 * 48000
由此可见pcm的数据量还是非常大的,为了降低数据空间,就需要音频编码算法的协助(对pcm数据压缩),常见的音频编码算法有aac、mp3和opus。
此篇文章始终以aac编码为例进行讲解,aac编码会将1024个采样数据输入给编码器压缩,生成一个aac包,在ffmpeg中可通过结构体AVCodecContext 中的frame_size来指定。
所谓音频解码,就是将aac这类压缩过的数据还原成pcm采样数据的过程。
ffmpeg 函数接口
AVCodec *avcodec_find_decoder(enum AVCodecID id);
AVCodecContext *avcodec_alloc_context3(const AVCodec *codec);
从之前的文章中已经知道,对媒体文件解封装需要创建一个AVFormatContext上下文指针。在解码操作中也是同样的思路,只不过解码需要AVCodecContext 解码上下文指针。
其中AVCodec编码器指针可通过avcodec_find_decoder函数查询获取,每种音频编码都对应一个AVCodec,例如aac编码的AVCodecID就是AV_CODEC_ID_AAC。除了这个函数之外你也可以使用AVCodec *avcodec_find_encoder_by_name(const char *name);函数获取解码器,如
avcodec_find_decoder_by_name(“libopus”);就可以获取到 libopus 解码器。
int avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options);
初始化编码器,一定要调用这个函数
int avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt);
int avcodec_receive_frame(AVCodecContext *avctx, AVFrame *frame);
这两个函数要搭配着使用,将AVPacket数据传递给avcodec_send_packet函数,然后从avcodec_receive_frame函数中获取解码后的数据(保存在frame中)
int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt);
这个函数用来返回每个采样有几个字节长度
示例代码
/**
* @file
* audio decoding with libavcodec API example
*
* @example decode_audio.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libavutil/frame.h>
#include <libavutil/mem.h>
#include <libavcodec/avcodec.h>
#define AUDIO_INBUF_SIZE 20480
#define AUDIO_REFILL_THRESH 4096
static int get_format_from_sample_fmt(const char **fmt,
enum AVSampleFormat sample_fmt)
{
int i;
struct sample_fmt_entry {
enum AVSampleFormat sample_fmt; const char *fmt_be, *fmt_le;
} sample_fmt_entries[] = {
{ AV_SAMPLE_FMT_U8, "u8", "u8" },
{ AV_SAMPLE_FMT_S16, "s16be", "s16le" },
{ AV_SAMPLE_FMT_S32, "s32be", "s32le" },
{ AV_SAMPLE_FMT_FLT, "f32be", "f32le" },
{ AV_SAMPLE_FMT_DBL, "f64be", "f64le" },
};
*fmt = NULL;
for (i = 0; i < FF_ARRAY_ELEMS(sample_fmt_entries); i++) {
struct sample_fmt_entry *entry = &sample_fmt_entries[i];
if (sample_fmt == entry->sample_fmt) {
*fmt = AV_NE(entry->fmt_be, entry->fmt_le);
return 0;
}
}
fprintf(stderr,
"sample format %s is not supported as output format\n",
av_get_sample_fmt_name(sample_fmt));
return -1;
}
static void decode(AVCodecContext *dec_ctx, AVPacket *pkt, AVFrame *frame,
FILE *outfile)
{
int i, ch;
int ret, data_size;
/* send the packet with the compressed data to the decoder */
ret = avcodec_send_packet(dec_ctx, pkt);
if (ret < 0) {
fprintf(stderr, "Error submitting the packet to the decoder\n");
exit(1);
}
/* read all the output frames (in general there may be any number of them */
while (ret >= 0) {
ret = avcodec_receive_frame(dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return;
else if (ret < 0) {
fprintf(stderr, "Error during decoding\n");
exit(1);
}
data_size = av_get_bytes_per_sample(dec_ctx->sample_fmt);
if (data_size < 0) {
/* This should not occur, checking just for paranoia */
fprintf(stderr, "Failed to calculate data size\n");
exit(1);
}
for (i = 0; i < frame->nb_samples; i++)
for (ch = 0; ch < dec_ctx->channels; ch++)
fwrite(frame->data[ch] + data_size*i, 1, data_size, outfile);
}
}
int main(int argc, char **argv)
{
const char *outfilename, *filename;
const AVCodec *codec;
AVCodecContext *c= NULL;
AVCodecParserContext *parser = NULL;
int len, ret;
FILE *f, *outfile;
uint8_t inbuf[AUDIO_INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
uint8_t *data;
size_t data_size;
AVPacket *pkt;
AVFrame *decoded_frame = NULL;
enum AVSampleFormat sfmt;
int n_channels = 0;
const char *fmt;
if (argc <= 2) {
fprintf(stderr, "Usage: %s <input file> <output file>\n", argv[0]);
exit(0);
}
filename = argv[1];
outfilename = argv[2];
pkt = av_packet_alloc();
/* find the MPEG audio decoder */
codec = avcodec_find_decoder(AV_CODEC_ID_MP2);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
parser = av_parser_init(codec->id);
if (!parser) {
fprintf(stderr, "Parser not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate audio codec context\n");
exit(1);
}
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
f = fopen(filename, "rb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
outfile = fopen(outfilename, "wb");
if (!outfile) {
av_free(c);
exit(1);
}
/* decode until eof */
data = inbuf;
data_size = fread(inbuf, 1, AUDIO_INBUF_SIZE, f);
while (data_size > 0) {
if (!decoded_frame) {
if (!(decoded_frame = av_frame_alloc())) {
fprintf(stderr, "Could not allocate audio frame\n");
exit(1);
}
}
ret = av_parser_parse2(parser, c, &pkt->data, &pkt->size,
data, data_size,
AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0) {
fprintf(stderr, "Error while parsing\n");
exit(1);
}
data += ret;
data_size -= ret;
if (pkt->size)
decode(c, pkt, decoded_frame, outfile);
if (data_size < AUDIO_REFILL_THRESH) {
memmove(inbuf, data, data_size);
data = inbuf;
len = fread(data + data_size, 1,
AUDIO_INBUF_SIZE - data_size, f);
if (len > 0)
data_size += len;
}
}
/* flush the decoder */
pkt->data = NULL;
pkt->size = 0;
decode(c, pkt, decoded_frame, outfile);
/* print output pcm infomations, because there have no metadata of pcm */
sfmt = c->sample_fmt;
if (av_sample_fmt_is_planar(sfmt)) {
const char *packed = av_get_sample_fmt_name(sfmt);
printf("Warning: the sample format the decoder produced is planar "
"(%s). This example will output the first channel only.\n",
packed ? packed : "?");
sfmt = av_get_packed_sample_fmt(sfmt);
}
n_channels = c->channels;
if ((ret = get_format_from_sample_fmt(&fmt, sfmt)) < 0)
goto end;
printf("Play the output audio file with the command:\n"
"ffplay -f %s -ac %d -ar %d %s\n",
fmt, n_channels, c->sample_rate,
outfilename);
end:
fclose(outfile);
fclose(f);
avcodec_free_context(&c);
av_parser_close(parser);
av_frame_free(&decoded_frame);
av_packet_free(&pkt);
return 0;
}