Tutorial 03: Playing Sound-CSDN博客

本文链接：https://blog.csdn.net/oldmtn/article/details/48131375
播放声音和播放视频数有点区别的。
这里将解码得到的音频AVPacket放到一个一个的队列中，然后在SDL的回调函数里面取每一个AVPacket，并将其解码，将得到的buffer填充到SDL请求获取的buffer中。
这里需要注意的是: 在audio_decode_frame的avcodec_decode_audio4函数里面，我们获取到的每一个AVFrame，都需要判断其格式，因为SDL需要的音频格式为
AV_SAMPLE_FMT_S16，因此从文件里面获取的音频格式若不是AV_SAMPLE_FMT_S16的话，需要做一个重采样。
下面是代码：
// tutorial03.c
// A pedagogical video player that will stream through every video frame as fast as it can
// and play audio (out of sync).
//
// This tutorial was written by Stephen Dranger (dranger@gmail.com).
//
// Code based on FFplay, Copyright (c) 2003 Fabrice Bellard, 
// and a tutorial by Martin Bohme (boehme@inb.uni-luebeckREMOVETHIS.de)
// Tested on Gentoo, CVS version 5/01/07 compiled with GCC 4.1.1
//
// Use the Makefile to build all examples.
//
// Run using
// tutorial03 myvideofile.mpg
//
// to play the stream on your screen.



extern "C"
{
#include "libavutil/avstring.h"
#include "libavutil/mathematics.h"
#include "libavutil/pixdesc.h"
#include "libavutil/imgutils.h"
#include "libavutil/dict.h"
#include "libavutil/parseutils.h"
#include "libavutil/samplefmt.h"
#include "libavutil/avassert.h"
#include "libavutil/time.h"
#include "libavformat/avformat.h"
#include "libavdevice/avdevice.h"
#include "libswscale/swscale.h"
#include "libavutil/opt.h"
#include "libavcodec/avfft.h"
#include "libswresample/swresample.h"

#include "SDL1.2/SDL.h"
#include "SDL1.2/SDL_thread.h"
}

#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")
#pragma comment(lib, "SDL.lib")

#ifdef __MINGW32__
#undef main /* Prevents SDL from overriding main() */
#endif

#include <stdio.h>

#define SDL_AUDIO_BUFFER_SIZE 1024
#define MAX_AUDIO_FRAME_SIZE 192000


struct SwrContext *swr_ctx;
DECLARE_ALIGNED(16, uint8_t, audio_buf2)[MAX_AUDIO_FRAME_SIZE * 4];

static inline double rint(double x)
{
    return x >= 0 ? floor(x + 0.5) : ceil(x - 0.5);
}

typedef struct PacketQueue {
    AVPacketList *first_pkt, *last_pkt;
    int nb_packets;
    int size;
    SDL_mutex *mutex;
    SDL_cond *cond;
} PacketQueue;

PacketQueue audioq;

int quit = 0;

void packet_queue_init(PacketQueue *q) {
    memset(q, 0, sizeof(PacketQueue));
    q->mutex = SDL_CreateMutex();
    q->cond = SDL_CreateCond();
}
int packet_queue_put(PacketQueue *q, AVPacket *pkt) {

    AVPacketList *pkt1;
    if(av_dup_packet(pkt) < 0) {
        return -1;
    }
    pkt1 = (AVPacketList *)av_malloc(sizeof(AVPacketList));
    if (!pkt1)
        return -1;
    pkt1->pkt = *pkt;
    pkt1->next = NULL;


    SDL_LockMutex(q->mutex);

    if (!q->last_pkt)
        q->first_pkt = pkt1;
    else
        q->last_pkt->next = pkt1;
    q->last_pkt = pkt1;
    q->nb_packets++;
    q->size += pkt1->pkt.size;
    SDL_CondSignal(q->cond);

    SDL_UnlockMutex(q->mutex);
    return 0;
}
static int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)
{
    AVPacketList *pkt1;
    int ret;

    SDL_LockMutex(q->mutex);

    for(;;) {

        if(quit) {
            ret = -1;
            break;
        }

        pkt1 = q->first_pkt;
        if (pkt1) {
            q->first_pkt = pkt1->next;
            if (!q->first_pkt)
                q->last_pkt = NULL;
            q->nb_packets--;
            q->size -= pkt1->pkt.size;
            *pkt = pkt1->pkt;
            av_free(pkt1);
            ret = 1;
            break;
        } else if (!block) {
            ret = 0;
            break;
        } else {
            SDL_CondWait(q->cond, q->mutex);
        }
    }
    SDL_UnlockMutex(q->mutex);
    return ret;
}

int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size) {

    static AVPacket pkt;
    static uint8_t *audio_pkt_data = NULL;
    static int audio_pkt_size = 0;
    static AVFrame frame;

    int len1, data_size = 0;

    for(;;) {
        while(audio_pkt_size > 0) {
            int got_frame = 0;
            len1 = avcodec_decode_audio4(aCodecCtx, &frame, &got_frame, &pkt);
            if(len1 < 0) {
                /* if error, skip frame */
                audio_pkt_size = 0;
                break;
            }
            audio_pkt_data += len1;
            audio_pkt_size -= len1;
            if (got_frame)
            {
                uint64_t dec_channel_layout =
                    (aCodecCtx->channel_layout && aCodecCtx->channels == av_get_channel_layout_nb_channels(aCodecCtx->channel_layout)) ?
                    aCodecCtx->channel_layout : av_get_default_channel_layout(aCodecCtx->channels);

                AVSampleFormat tgtFmt = AV_SAMPLE_FMT_S16;
                if( aCodecCtx->sample_fmt != tgtFmt ) {
                    // 需要重采样
                    if( swr_ctx == NULL ) {
                        swr_ctx = swr_alloc();
                        swr_ctx = swr_alloc_set_opts(swr_ctx,
                            dec_channel_layout, tgtFmt, aCodecCtx->sample_rate,
                            dec_channel_layout, aCodecCtx->sample_fmt, aCodecCtx->sample_rate, 0, NULL);

                        if( !swr_ctx || swr_init(swr_ctx) < 0 ) {
                            assert(false);
                        }
                    }

                    if( swr_ctx ) {
                        const uint8_t **in = (const uint8_t **)frame.extended_data;
                        uint8_t *out[] = {audio_buf2};
                        int out_count = sizeof(audio_buf2) / aCodecCtx->channels / av_get_bytes_per_sample(aCodecCtx->sample_fmt);

                        int len2 = swr_convert(swr_ctx, out, out_count, in, frame.nb_samples);
                        if( len2 < 0 ) {
                            LogPrintfA("swr_convert() failed\n");
                            break;
                        }
                        if( len2 == out_count ) {
                            LogPrintfA("warning: audio buffer is probably too small\n");
                            swr_init(swr_ctx);
                        }

                        data_size = len2 * aCodecCtx->channels * av_get_bytes_per_sample(tgtFmt);
                        memcpy(audio_buf, audio_buf2, data_size);
                    }
                } else {
                    // 不需要重采样
                    data_size = av_samples_get_buffer_size(NULL,
                        aCodecCtx->channels,
                        frame.nb_samples,
                        aCodecCtx->sample_fmt,
                        1);
                    assert(data_size <= buf_size);
                    memcpy(audio_buf, frame.data[0], data_size);
                }
            }
            if(data_size <= 0) {
                /* No data yet, get more frames */
                continue;
            }
            /* We have data, return it and come back for more later */
            return data_size;
        }
        if(pkt.data)
            av_free_packet(&pkt);

        if(quit) {
            return -1;
        }

        if(packet_queue_get(&audioq, &pkt, 1) < 0) {
            return -1;
        }
        audio_pkt_data = pkt.data;
        audio_pkt_size = pkt.size;
    }
}

void audio_callback(void *userdata, Uint8 *stream, int len) {

    AVCodecContext *aCodecCtx = (AVCodecContext *)userdata;
    int len1, audio_size;

    static uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2];
    static unsigned int audio_buf_size = 0;
    static unsigned int audio_buf_index = 0;

    while(len > 0) {
        if(audio_buf_index >= audio_buf_size) {
            /* We have already sent all our data; get more */
            audio_size = audio_decode_frame(aCodecCtx, audio_buf, audio_buf_size);
            if(audio_size < 0) {
                /* If error, output silence */
                audio_buf_size = 1024; // arbitrary?
                memset(audio_buf, 0, audio_buf_size);
            } else {
                audio_buf_size = audio_size;
            }
            audio_buf_index = 0;
        }
        len1 = audio_buf_size - audio_buf_index;
        if(len1 > len)
            len1 = len;
        memcpy(stream, (uint8_t *)audio_buf + audio_buf_index, len1);
        len -= len1;
        stream += len1;
        audio_buf_index += len1;
    }
}

int _tmain() {
    AVFormatContext *pFormatCtx = NULL;
    int             i, videoStream, audioStream;
    AVCodecContext  *pCodecCtx = NULL;
    AVCodec         *pCodec = NULL;
    AVFrame         *pFrame = NULL; 
    AVPacket        packet;
    int             frameFinished;
    //float           aspect_ratio;

    AVCodecContext  *aCodecCtx = NULL;
    AVCodec         *aCodec = NULL;

    SDL_Overlay     *bmp = NULL;
    SDL_Surface     *screen = NULL;
    SDL_Rect        rect;
    SDL_Event       event;
    SDL_AudioSpec   wanted_spec, spec;

    struct SwsContext   *sws_ctx            = NULL;
    AVDictionary        *videoOptionsDict   = NULL;
    AVDictionary        *audioOptionsDict   = NULL;

    char szFile[] = "song.flv";

    // Register all formats and codecs
    av_register_all();

    if( SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER) ) {
        fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
        exit(1);
    }

    // Open video file
    if( avformat_open_input(&pFormatCtx, szFile, NULL, NULL) != 0 ) {
        return -1; // Couldn't open file
    }

    // Retrieve stream information
    if( avformat_find_stream_info(pFormatCtx, NULL) < 0 ) {
        return -1; // Couldn't find stream information
    }

    // Dump information about file onto standard error
    av_dump_format(pFormatCtx, 0, szFile, 0);

    // Find the first video stream
    videoStream = -1;
    audioStream = -1;
    for( i = 0; i < pFormatCtx->nb_streams; i ++) {
        if( pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
            videoStream < 0) {
                videoStream = i;
        }
        if( pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO &&
            audioStream < 0) {
                audioStream = i;
        }
    }
    if( videoStream == -1 ) {
        return -1; // Didn't find a video stream
    }
    if( audioStream == -1 ) {
        return -1;
    }

    aCodecCtx = pFormatCtx->streams[audioStream]->codec;
    // Set audio settings from codec info
    wanted_spec.freq = aCodecCtx->sample_rate;
    wanted_spec.format = AUDIO_S16SYS;
    wanted_spec.channels = aCodecCtx->channels;
    wanted_spec.silence = 0;
    wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
    wanted_spec.callback = audio_callback;
    wanted_spec.userdata = aCodecCtx;

    if( SDL_OpenAudio(&wanted_spec, &spec) < 0) {
        fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());
        return -1;
    }
    aCodec = avcodec_find_decoder(aCodecCtx->codec_id);
    if( !aCodec ) {
        fprintf(stderr, "Unsupported codec!\n");
        return -1;
    }
    avcodec_open2(aCodecCtx, aCodec, &audioOptionsDict);

    // audio_st = pFormatCtx->streams[index]
    packet_queue_init(&audioq);
    SDL_PauseAudio(0);

    // Get a pointer to the codec context for the video stream
    pCodecCtx = pFormatCtx->streams[videoStream]->codec;

    // Find the decoder for the video stream
    pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
    if( pCodec == NULL ) {
        fprintf(stderr, "Unsupported codec!\n");
        return -1; // Codec not found
    }
    // Open codec
    if( avcodec_open2(pCodecCtx, pCodec, &videoOptionsDict) < 0 ) {
        return -1; // Could not open codec
    }

    // Allocate video frame
    pFrame = av_frame_alloc();

    // Make a screen to put our video

#ifndef __DARWIN__
    screen = SDL_SetVideoMode(pCodecCtx->width, pCodecCtx->height, 0, 0);
#else
    screen = SDL_SetVideoMode(pCodecCtx->width, pCodecCtx->height, 24, 0);
#endif
    if( !screen ) {
        fprintf(stderr, "SDL: could not set video mode - exiting\n");
        exit(1);
    }

    // Allocate a place to put our YUV image on that screen
    bmp = SDL_CreateYUVOverlay(pCodecCtx->width,
        pCodecCtx->height,
        SDL_YV12_OVERLAY,
        screen);
    sws_ctx =
        sws_getContext
        (
        pCodecCtx->width,
        pCodecCtx->height,
        pCodecCtx->pix_fmt,
        pCodecCtx->width,
        pCodecCtx->height,
        PIX_FMT_YUV420P,
        SWS_BILINEAR,
        NULL,
        NULL,
        NULL
        );


    // Read frames and save first five frames to disk
    i = 0;
    while( av_read_frame(pFormatCtx, &packet) >= 0 ) {
        // Is this a packet from the video stream?
        if( packet.stream_index == videoStream ) {
            // Decode video frame
            avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished,
                &packet);

            // Did we get a video frame?
            if( frameFinished ) {
                SDL_LockYUVOverlay(bmp);

                AVPicture pict;
                pict.data[0] = bmp->pixels[0];
                pict.data[1] = bmp->pixels[2];
                pict.data[2] = bmp->pixels[1];

                pict.linesize[0] = bmp->pitches[0];
                pict.linesize[1] = bmp->pitches[2];
                pict.linesize[2] = bmp->pitches[1];

                // Convert the image into YUV format that SDL uses
                sws_scale
                    (
                    sws_ctx, 
                    (uint8_t const * const *)pFrame->data, 
                    pFrame->linesize, 
                    0,
                    pCodecCtx->height,
                    pict.data,
                    pict.linesize
                    );

                SDL_UnlockYUVOverlay(bmp);

                rect.x = 0;
                rect.y = 0;
                rect.w = pCodecCtx->width;
                rect.h = pCodecCtx->height;
                SDL_DisplayYUVOverlay(bmp, &rect);
                av_free_packet(&packet);

                SDL_Delay(40);
            }
        } else if(packet.stream_index==audioStream) {
            packet_queue_put(&audioq, &packet);
        } else {
            av_free_packet(&packet);
        }

        // Free the packet that was allocated by av_read_frame
        SDL_PollEvent(&event);
        switch(event.type) {
        case SDL_QUIT:
            quit = 1;
            SDL_Quit();
            exit(0);
            break;
        default:
            break;
        }
    }

    // Free the YUV frame
    av_free(pFrame);

    // Close the codec
    avcodec_close(pCodecCtx);

    // Close the video file
    avformat_close_input(&pFormatCtx);

    return 0;
}