转自https://www.cnblogs.com/ansersion/p/5265033.html
一、概述
最近在学习ffmpeg解码的内容,参考了官方的教程http://dranger.com/ffmpeg/tutorial03.html,结果发现这个音频解码的教程有点问题。参考了各种博客,并同时啃ffplay.c的源码,发现avcodec_decode_audio4多了一个resample(重采样)的概念。
其解码以及播放音频的思路为:
首先,ffmpeg设置本机的audio播放参数(target format),如freq(频率)为44100,format为AV_SAMPLE_FMT_S16,channels为2。这个播放参数是SDL实际播放音频时使用的参数。
但是!但是我们的audio file(如mp3文件)的audio数据很可能有其自己的audio播放参数(source format),而这些参数不同于我们实际的SDL播放参数,于是ffmpeg在其中插入resample(重采用)的过程,将source format转换成target format。
简单的说就是一个audio参数设置思路的转变:
这个思路转变最大的好处,就是本机播放的格式可以不用再迁就audio file,而是可以根据自己的需要自行设定,缺点很显然就是ffmpeg的CPU开销会增大。
二、代码示例(源码见“附录”)
源码在官方教程基础上把其中视频部分删除,在main函数最后加上一个无限循环,并添加resample函数,最后将resample插入到sdl的回调函数之中。
源码中关于queue的代码为官网教程原版复制,其主要作用就是让main函数和SDL audio线程互斥的push queue和get queue,以下不再赘述。
1、main函数代码结构
main函数伪代码结构如下:
1 SDL Initialization 2 ffmpeg open audio file 3 Set SDL audio parameters 4 Set ffmpeg audio parameters(target format) 5 while(ffmpeg_read_frame(pkt)) { 6 packet_queue_put(pkt); 7 } 8 while(1) { 9 sleep(1); 10 }
ffmpeg从audio file中不停的读取数据,并将读出的packet放入queue中。此时我们要清楚,另外还有一个SDL audio线程在等待queue中的数据。
2、SDL audio线程
SDL audio线程主要执行一个回调函数,对应源码中的函数为audio_callback(void * userdata, Uint8 * stream, int len)。这个函数的使命就是将解码后的数据放入参数stream这个缓冲区中,以便SDL audio线程从stream缓冲区中获取数据play。这个缓冲区的大小为参数len,而userdata则是用户自定的参数。其伪代码结构如下:
1 audio_buf_index = 0; 2 while(len > 0) { 3 audio_size = audio_decode_frame(audio_buf_tmp); 4 memcpy(stream, audio_buf_tmp, audio_size); 5 len -= audio_size; 6 stream += audio_size; 7 audio_buf_index += audio_size; 8 }
其中audio_decode_frame函数会从queue中取出packet,并对packet中的frame进行解码和resample,然后将数据放入audio_buf_tmp缓冲区中。
3、Resample函数
Resample的过程和结构体SwrContext息息相关。使用这个结构体共需要2步。
1、先初始化SwrContex,指定target format和source format;
2、使用已初始化的SwrContext,对frame进行resample。
Resample的伪代码如下:
1 struct SwrContext * swr_ctx = NULL; 2 audio_hw_params_src = audio_hw_params_tgt 3 int resample(AVFrame * af, uint8_t * audio_buf, int * audio_buf_size) 4 { 5 if(audio_hw_params_src != audio_hw_params(af)) { 6 swr_ctx = swr_alloc_set_opts(audio_hw_params_tgt, audio_hw_params(af)); 7 audio_hw_params_src = audio_hw_params(af); 8 } 9 in = af; 10 swr_convert(swr_ctx, out, in); 11 audio_buf = out; 12 }
一开始,audio_hw_parames_src(source format)被初始化为target format,在resample获得第一个frame后,会从该frame中提取source format,并将其赋值给audio_hw_params_src,同时初始化SwrContext这个结构体,指定target format和source format。然后swr_convert对输入的frame进行resample(swr_convert),然后将resample后得到的数据放进resample函数指定的缓冲区(audio_buf)中。
附录:
1 #include <libavcodec/avcodec.h> 2 #include <libavformat/avformat.h> 3 #include <libswscale/swscale.h> 4 #include <libswresample/swresample.h> 5 6 #include <SDL.h> 7 #include <SDL_thread.h> 8 9 #ifdef __MINGW32__ 10 #undef main /* Prevents SDL from overriding main() */ 11 #endif 12 13 #include <stdio.h> 14 #include <assert.h> 15 #include <sys/types.h> 16 #include <sys/stat.h> 17 #include <fcntl.h> 18 19 // compatibility with newer API 20 #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55,28,1) 21 #define av_frame_alloc avcodec_alloc_frame 22 #define av_frame_free avcodec_free_frame 23 #endif 24 25 #define SDL_AUDIO_BUFFER_SIZE 1024 26 #define MAX_AUDIO_FRAME_SIZE 192000 27 28 #include <signal.h> 29 30 typedef struct AudioParams { 31 int freq; 32 int channels; 33 int64_t channel_layout; 34 enum AVSampleFormat fmt; 35 int frame_size; 36 int bytes_per_sec; 37 } AudioParams; 38 int sample_rate, nb_channels; 39 int64_t channel_layout; 40 AudioParams audio_hw_params_tgt; 41 AudioParams audio_hw_params_src; 42 43 int resample(AVFrame * af, uint8_t * audio_buf, int * audio_buf_size); 44 45 struct SwrContext * swr_ctx = NULL; 46 47 int resample(AVFrame * af, uint8_t * audio_buf, int * audio_buf_size) 48 { 49 int data_size = 0; 50 int resampled_data_size = 0; 51 int64_t dec_channel_layout; 52 data_size = av_samples_get_buffer_size(NULL, 53 av_frame_get_channels(af), 54 af->nb_samples, 55 af->format, 56 1); 57 58 dec_channel_layout = 59 (af->channel_layout && av_frame_get_channels(af) == av_get_channel_layout_nb_channels(af->channel_layout)) ? 60 af->channel_layout : av_get_default_channel_layout(av_frame_get_channels(af)); 61 if( af->format != audio_hw_params_src.fmt || 62 af->sample_rate != audio_hw_params_src.freq || 63 dec_channel_layout != audio_hw_params_src.channel_layout || 64 !swr_ctx) { 65 swr_free(&swr_ctx); 66 swr_ctx = swr_alloc_set_opts(NULL, 67 audio_hw_params_tgt.channel_layout, audio_hw_params_tgt.fmt, audio_hw_params_tgt.freq, 68 dec_channel_layout, af->format, af->sample_rate, 69 0, NULL); 70 if (!swr_ctx || swr_init(swr_ctx) < 0) { 71 av_log(NULL, AV_LOG_ERROR, 72 "Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n", 73 af->sample_rate, av_get_sample_fmt_name(af->format), av_frame_get_channels(af), 74 audio_hw_params_tgt.freq, av_get_sample_fmt_name(audio_hw_params_tgt.fmt), audio_hw_params_tgt.channels); 75 swr_free(&swr_ctx); 76 return -1; 77 } 78 printf("swr_init\n"); 79 audio_hw_params_src.channels = av_frame_get_channels(af); 80 audio_hw_params_src.fmt = af->format; 81 audio_hw_params_src.freq = af->sample_rate; 82 } 83 84 if (swr_ctx) { 85 const uint8_t **in = (const uint8_t **)af->extended_data; 86 uint8_t **out = &audio_buf; 87 int out_count = (int64_t)af->nb_samples * audio_hw_params_tgt.freq / af->sample_rate + 256; 88 int out_size = av_samples_get_buffer_size(NULL, audio_hw_params_tgt.channels, out_count, audio_hw_params_tgt.fmt, 0); 89 int len2; 90 if (out_size < 0) { 91 av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed\n"); 92 return -1; 93 } 94 av_fast_malloc(&audio_buf, audio_buf_size, out_size); 95 if (!audio_buf) 96 return AVERROR(ENOMEM); 97 len2 = swr_convert(swr_ctx, out, out_count, in, af->nb_samples); 98 if (len2 < 0) { 99 av_log(NULL, AV_LOG_ERROR, "swr_convert() failed\n"); 100 return -1; 101 } 102 if (len2 == out_count) { 103 av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small\n"); 104 if (swr_init(swr_ctx) < 0) 105 swr_free(&swr_ctx); 106 } 107 resampled_data_size = len2 * audio_hw_params_tgt.channels * av_get_bytes_per_sample(audio_hw_params_tgt.fmt); 108 } else { 109 audio_buf = af->data[0]; 110 resampled_data_size = data_size; 111 } 112 113 return resampled_data_size; 114 } 115 116 static void sigterm_handler(int sig) 117 { 118 exit(123); 119 } 120 121 typedef struct PacketQueue { 122 AVPacketList *first_pkt, *last_pkt; 123 int nb_packets; 124 int size; 125 SDL_mutex *mutex; 126 SDL_cond *cond; 127 } PacketQueue; 128 129 PacketQueue audioq; 130 131 int quit = 0; 132 133 void packet_queue_init(PacketQueue *q) { 134 memset(q, 0, sizeof(PacketQueue)); 135 q->mutex = SDL_CreateMutex(); 136 q->cond = SDL_CreateCond(); 137 } 138 int packet_queue_put(PacketQueue *q, AVPacket *pkt) { 139 140 AVPacketList *pkt1; 141 if(av_dup_packet(pkt) < 0) { 142 return -1; 143 } 144 pkt1 = av_malloc(sizeof(AVPacketList)); 145 if (!pkt1) 146 return -1; 147 pkt1->pkt = *pkt; 148 pkt1->next = NULL; 149 150 151 SDL_LockMutex(q->mutex); 152 153 if (!q->last_pkt) 154 q->first_pkt = pkt1; 155 else 156 q->last_pkt->next = pkt1; 157 q->last_pkt = pkt1; 158 q->nb_packets++; 159 q->size += pkt1->pkt.size; 160 SDL_CondSignal(q->cond); 161 162 SDL_UnlockMutex(q->mutex); 163 return 0; 164 } 165 static int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block) 166 { 167 AVPacketList *pkt1; 168 int ret; 169 170 SDL_LockMutex(q->mutex); 171 172 for(;;) { 173 174 if(quit) { 175 ret = -1; 176 break; 177 } 178 179 pkt1 = q->first_pkt; 180 if (pkt1) { 181 q->first_pkt = pkt1->next; 182 if (!q->first_pkt) 183 q->last_pkt = NULL; 184 q->nb_packets--; 185 q->size -= pkt1->pkt.size; 186 *pkt = pkt1->pkt; 187 av_free(pkt1); 188 ret = 1; 189 break; 190 } else if (!block) { 191 ret = 0; 192 break; 193 } else { 194 SDL_CondWait(q->cond, q->mutex); 195 } 196 } 197 SDL_UnlockMutex(q->mutex); 198 return ret; 199 } 200 201 AVFrame frame; 202 int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size) { 203 204 static AVPacket pkt; 205 static uint8_t *audio_pkt_data = NULL; 206 static int audio_pkt_size = 0; 207 208 int len1, data_size = 0; 209 210 for(;;) { 211 while(audio_pkt_size > 0) { 212 int got_frame = 0; 213 len1 = avcodec_decode_audio4(aCodecCtx, &frame, &got_frame, &pkt); 214 if(len1 < 0) { 215 /* if error, skip frame */ 216 audio_pkt_size = 0; 217 break; 218 } 219 audio_pkt_data += len1; 220 audio_pkt_size -= len1; 221 data_size = 0; 222 if(got_frame) { 223 data_size = resample(&frame, audio_buf, &buf_size); 224 // data_size = av_samples_get_buffer_size(NULL, 225 // aCodecCtx->channels, 226 // frame.nb_samples, 227 // aCodecCtx->sample_fmt, 228 // 1); 229 assert(data_size <= buf_size); 230 // memcpy(audio_buf, frame.data[0], data_size); 231 } 232 if(data_size <= 0) { 233 /* No data yet, get more frames */ 234 continue; 235 } 236 // memcpy(audio_buf, frame.data[0], data_size); 237 238 /* We have data, return it and come back for more later */ 239 return data_size; 240 } 241 if(pkt.data) 242 av_free_packet(&pkt); 243 244 if(quit) { 245 return -1; 246 } 247 248 if(packet_queue_get(&audioq, &pkt, 1) < 0) { 249 return -1; 250 } 251 audio_pkt_data = pkt.data; 252 audio_pkt_size = pkt.size; 253 } 254 } 255 256 void audio_callback(void *userdata, Uint8 *stream, int len) { 257 258 AVCodecContext *aCodecCtx = (AVCodecContext *)userdata; 259 int len1, audio_size; 260 261 static uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2]; 262 static unsigned int audio_buf_size = 0; 263 static unsigned int audio_buf_index = 0; 264 265 while(len > 0) { 266 if(audio_buf_index >= audio_buf_size) { 267 /* We have already sent all our data; get more */ 268 audio_size = audio_decode_frame(aCodecCtx, audio_buf, sizeof(audio_buf)); 269 if(audio_size < 0) { 270 /* If error, output silence */ 271 audio_buf_size = 1024; // arbitrary? 272 memset(audio_buf, 0, audio_buf_size); 273 } else { 274 audio_buf_size = audio_size; 275 } 276 audio_buf_index = 0; 277 } 278 len1 = audio_buf_size - audio_buf_index; 279 if(len1 > len) 280 len1 = len; 281 memcpy(stream, (uint8_t *)audio_buf + audio_buf_index, len1); 282 len -= len1; 283 stream += len1; 284 audio_buf_index += len1; 285 } 286 } 287 288 int main(int argc, char *argv[]) { 289 290 AVFormatContext *pFormatCtx = NULL; 291 int i, audioStream; 292 AVPacket packet; 293 294 AVCodecContext *aCodecCtxOrig = NULL; 295 AVCodecContext *aCodecCtx = NULL; 296 AVCodec *aCodec = NULL; 297 298 SDL_Event event; 299 SDL_AudioSpec wanted_spec, spec; 300 301 signal(SIGINT , sigterm_handler); /* Interrupt (ANSI). */ 302 signal(SIGTERM, sigterm_handler); /* Termination (ANSI). */ 303 304 if(argc < 2) { 305 fprintf(stderr, "Usage: test <file>\n"); 306 exit(1); 307 } 308 // Register all formats and codecs 309 av_register_all(); 310 311 if(SDL_Init(SDL_INIT_AUDIO)) { 312 fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError()); 313 exit(1); 314 } 315 316 // Open video file 317 if(avformat_open_input(&pFormatCtx, argv[1], NULL, NULL)!=0) 318 return -1; // Couldn't open file 319 320 // Retrieve stream information 321 if(avformat_find_stream_info(pFormatCtx, NULL)<0) 322 return -1; // Couldn't find stream information 323 324 // Dump information about file onto standard error 325 av_dump_format(pFormatCtx, 0, argv[1], 0); 326 327 // Find the first video stream 328 audioStream=-1; 329 for(i=0; i<pFormatCtx->nb_streams; i++) { 330 if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO && 331 audioStream < 0) { 332 audioStream=i; 333 } 334 } 335 // if(videoStream==-1) 336 // return -1; // Didn't find a video stream 337 if(audioStream==-1) 338 return -1; 339 340 aCodecCtxOrig=pFormatCtx->streams[audioStream]->codec; 341 aCodec = avcodec_find_decoder(aCodecCtxOrig->codec_id); 342 if(!aCodec) { 343 fprintf(stderr, "Unsupported codec!\n"); 344 return -1; 345 } 346 347 // Copy context 348 aCodecCtx = avcodec_alloc_context3(aCodec); 349 if(avcodec_copy_context(aCodecCtx, aCodecCtxOrig) != 0) { 350 fprintf(stderr, "Couldn't copy codec context"); 351 return -1; // Error copying codec context 352 } 353 354 avcodec_open2(aCodecCtx, aCodec, NULL); 355 356 sample_rate = aCodecCtx->sample_rate; 357 nb_channels = aCodecCtx->channels; 358 channel_layout = aCodecCtx->channel_layout; 359 360 printf("channel_layout=%" PRId64 "\n", channel_layout); 361 printf("nb_channels=%d\n", nb_channels); 362 printf("freq=%d\n", sample_rate); 363 364 if (!channel_layout || nb_channels != av_get_channel_layout_nb_channels(channel_layout)) { 365 channel_layout = av_get_default_channel_layout(nb_channels); 366 channel_layout &= ~AV_CH_LAYOUT_STEREO_DOWNMIX; 367 printf("correction\n"); 368 } 369 370 // Set audio settings from codec info 371 wanted_spec.freq = sample_rate; 372 wanted_spec.format = AUDIO_S16SYS; 373 wanted_spec.channels = nb_channels; 374 wanted_spec.silence = 0; 375 wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE; 376 wanted_spec.callback = audio_callback; 377 wanted_spec.userdata = aCodecCtx; 378 379 if(SDL_OpenAudio(&wanted_spec, &spec) < 0) { 380 fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError()); 381 return -1; 382 } 383 printf("freq: %d\tchannels: %d\n", spec.freq, spec.channels); 384 385 audio_hw_params_tgt.fmt = AV_SAMPLE_FMT_S16; 386 audio_hw_params_tgt.freq = spec.freq; 387 audio_hw_params_tgt.channel_layout = channel_layout; 388 audio_hw_params_tgt.channels = spec.channels; 389 audio_hw_params_tgt.frame_size = av_samples_get_buffer_size(NULL, audio_hw_params_tgt.channels, 1, audio_hw_params_tgt.fmt, 1); 390 audio_hw_params_tgt.bytes_per_sec = av_samples_get_buffer_size(NULL, audio_hw_params_tgt.channels, audio_hw_params_tgt.freq, audio_hw_params_tgt.fmt, 1); 391 if (audio_hw_params_tgt.bytes_per_sec <= 0 || audio_hw_params_tgt.frame_size <= 0) { 392 printf("size error\n"); 393 return -1; 394 } 395 audio_hw_params_src = audio_hw_params_tgt; 396 397 // audio_st = pFormatCtx->streams[index] 398 packet_queue_init(&audioq); 399 SDL_PauseAudio(0); 400 401 // Read frames and save first five frames to disk 402 i=0; 403 while(av_read_frame(pFormatCtx, &packet)>=0) { 404 if(packet.stream_index==audioStream) { 405 packet_queue_put(&audioq, &packet); 406 } else { 407 av_free_packet(&packet); 408 } 409 // Free the packet that was allocated by av_read_frame 410 SDL_PollEvent(&event); 411 switch(event.type) { 412 case SDL_QUIT: 413 quit = 1; 414 SDL_Quit(); 415 exit(0); 416 break; 417 default: 418 break; 419 } 420 421 } 422 423 while(1) SDL_Delay(1000); 424 425 // Close the codecs 426 avcodec_close(aCodecCtxOrig); 427 avcodec_close(aCodecCtx); 428 429 // Close the video file 430 avformat_close_input(&pFormatCtx); 431 432 return 0; 433 }