ffmpeg-4.2.2：音频编码流程（pcm编码成MP3）

最新推荐文章于 2024-09-29 10:09:58 发布

鱼儿-1226

最新推荐文章于 2024-09-29 10:09:58 发布

阅读量1.6k

点赞数

分类专栏： c++ 文章标签：音视频

原文链接：https://blog.csdn.net/weixin_44721044/article/details/104576751

版权

c++ 专栏收录该内容

104 篇文章 11 订阅

订阅专栏

基于FFMPEG的音频编码器，可以将pcm数据编码成MP3。
主要是记录一下自己学习FFMPEG时总结的音频编码流程。
ffmpeg版本：ffmpeg-4.2.2
libmp3lame-version:3.100

流程图
简单介绍下各个函数的功能：
avcodec_find_encoder()：通过编码器ID查找编码器
avcodec_alloc_context3()：初始化AVCodecContext
av_get_channel_layout()：通过名字获取通道值(mono:单通道；stereo:双通道)
av_get_channel_layout_nb_channels()：通过值获取通道数
avcodec_open2()：打开编码器
av_packet_alloc()：初始化AVPacket
av_frame_alloc()：初始化AVFrame
av_frame_get_buffer()：为AVFrame->data等分配内存
av_get_bytes_per_sample()：计算一个采样的字节数
av_samples_get_buffer_size()：计算一帧音频的字节数
av_frame_make_writable()：检查AVFrame->data是否可写
avcodec_send_frame()：编码音频：将一帧音频元数据发送给编码器
avcodec_receive_packet()：编码音频：接收编码完成的AVPacket数据包

代码
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libavcodec/avcodec.h>
#include <libavutil/opt.h>
#include <libavutil/imgutils.h>

static void encode(AVCodecContext *cdc_ctx, AVFrame *frame, AVPacket *pkt, FILE *fp_out)
{
int ret = 0;

   if ((ret = avcodec_send_frame(cdc_ctx, frame)) < 0)
   {
       fprintf(stderr, "avcodec_send_frame failed.\n");
       exit(1);
   }

   while ((ret = avcodec_receive_packet(cdc_ctx, pkt)) >= 0)
   {
       printf("Write (size=%d) packet.\n", pkt->size);
       fwrite(pkt->data, 1, pkt->size, fp_out);
       av_packet_unref(pkt);
   }

   if ((ret != AVERROR(EAGAIN)) && (ret != AVERROR_EOF))
   {
       fprintf(stderr, "avcodec_receive_packet failed.\n");
       exit(1);
   }
}

void encode_audio(const char *input_file, const char *output_file)
{
   int ret = 0;
   int data_size = 0;
   AVCodec *codec = NULL;
   AVCodecContext *cdc_ctx = NULL;
   AVPacket *pkt = NULL;
   AVFrame *frame = NULL;
   FILE *fp_in, *fp_out;

   if ((codec = avcodec_find_encoder(AV_CODEC_ID_MP3)) == NULL)
   {
       fprintf(stderr, "avcodec_find_encoder_by_name failed.\n");
       goto ret1;
   }

   if ((cdc_ctx = avcodec_alloc_context3(codec)) == NULL)
   {
       fprintf(stderr, "avcodec_alloc_context3 failed.\n");
       goto ret1;
   }

#if 1    /*encode zhu.pcm*/
   cdc_ctx->bit_rate = 192000;
   cdc_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
   cdc_ctx->sample_rate = 44100;
   cdc_ctx->channel_layout = av_get_channel_layout("stereo");
   cdc_ctx->channels = av_get_channel_layout_nb_channels(cdc_ctx->channel_layout);

#else    /*encode 16k.pcm*/
   cdc_ctx->bit_rate = 64000;
   cdc_ctx->sample_fmt = AV_SAMPLE_FMT_S16P;
   cdc_ctx->sample_rate = 16000;
   cdc_ctx->channel_layout = av_get_channel_layout("mono");
   cdc_ctx->channels = av_get_channel_layout_nb_channels(cdc_ctx->channel_layout);
#endif

   if ((ret = avcodec_open2(cdc_ctx, codec, NULL)) < 0)
   {
       fprintf(stderr, "avcodec_open2 failed.\n");
       goto ret2;
   }

   if ((pkt = av_packet_alloc()) == NULL)
   {
       fprintf(stderr, "av_packet_alloc failed.\n");
       goto ret3;
   }

   if ((frame = av_frame_alloc()) == NULL)
   {
       fprintf(stderr, "av_frame_alloc failed.\n");
       goto ret4;
   }
   frame->nb_samples = cdc_ctx->frame_size;
   frame->format = cdc_ctx->sample_fmt;
   frame->channel_layout = cdc_ctx->channel_layout;

   if ((ret = av_frame_get_buffer(frame, 0)) < 0)
   {
       fprintf(stderr, "av_frame_get_buffer failed.\n");
       goto ret5;
   }

   if ((fp_in = fopen(input_file, "rb")) == NULL)
   {
       fprintf(stderr, "fopen %s failed.\n", input_file);
       goto ret5;
   }
   if ((fp_out = fopen(output_file, "wb")) == NULL)
   {
       fprintf(stderr, "fopen %s failed.\n", output_file);
       goto ret6;
   }

#if 1 /*cdc_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP*/
data_size = av_get_bytes_per_sample(cdc_ctx->sample_fmt);

   while (feof(fp_in) == 0)
   {
       int i = 0, ch = 0;

       if ((ret = av_frame_make_writable(frame)) < 0)
       {
           fprintf(stderr, "frame is not writable.\n");
           goto ret7;
       }

       for (i = 0; i < frame->nb_samples; i++)
       {
           for (ch = 0; ch < cdc_ctx->channels; ch++)
           {
               fread(frame->data[ch] + data_size * i, 1, data_size, fp_in);
           }
       }

encode(cdc_ctx, frame, pkt, fp_out);
}

#else        /*cdc_ctx->sample_fmt = AV_SAMPLE_FMT_S16P*/
   data_size = av_samples_get_buffer_size(NULL, cdc_ctx->channels, cdc_ctx->frame_size, cdc_ctx->sample_fmt, 1);
   printf("data_size = %d\n", data_size);
   while (feof(fp_in) == 0)
   {
       if ((ret = av_frame_make_writable(frame)) < 0)
       {
           fprintf(stderr, "frame is not writable.\n");
           goto ret7;
       }

       fread(frame->data[0], 1, data_size, fp_in);
       encode(cdc_ctx, frame, pkt, fp_out);
   }
#endif

encode(cdc_ctx, NULL, pkt, fp_out);

   fclose(fp_out);
   fclose(fp_in);
   av_frame_free(&frame);
   av_packet_free(&pkt);
   avcodec_close(cdc_ctx);
   avcodec_free_context(&cdc_ctx);
   return;
ret7:
   fclose(fp_out);
ret6:
   fclose(fp_in);
ret5:
   av_frame_free(&frame);
ret4:
   av_packet_free(&pkt);
ret3:
   avcodec_close(cdc_ctx);
ret2:
   avcodec_free_context(&cdc_ctx);
ret1:
   exit(1);
}

int main(int argc, const char *argv[])
{
   if (argc < 3)
   {
       fprintf(stderr, "Uage:<input file> <output file>\n");
       exit(0);
   }

   encode_audio(argv[1], argv[2]);

   return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
注：

音频的一帧等于若干个采样，AVCodecContext->frame_size就是每帧的采样数。采样率为44100，表示一秒钟有44100个采样，那一秒钟音频就有44100 / frame_size帧。
AVPacket和pcm文件中左右声道的数据是LRLRLR储存的，但是AVFrame中不同的采样格式，储存不一样。如S16P，LR都保存在AVFrame->data[0]中，所以计算出一帧的字节数，从pcm文件中读出拷贝到data[0]中即可；FLTP格式，则L在AVFrame->data[0]中，R在AVFrame->data[1]中，则需要一个采样一个采样的读取，LR分别储存在data[0]和data[1]中
下载
项目主页
Github：https://github.com/newbie-plan/encode_audio
————————————————
版权声明：本文为CSDN博主「newbie-plan」的原创文章，遵循CC 4.0 BY-SA版权协议，转载请附上原文出处链接及本声明。
原文链接：https://blog.csdn.net/weixin_44721044/article/details/104576751