原因:通过ffplay源码来分析音视频同步问题.
介绍:
1:线程主要分为:read_thread,audio_thread,video_thread,subtitle_thread,event_loop。
read_thread:主要用于读取源文件AVPacket数据并存放到对应的PacketQueue队列中.
audio_thread:主要用于从audioq队列中中读取AVPacket对象并且解码为AVFrame存放到Sampq队列中.
video_thread:主要用于从videoq队列中读取AVPacket对象并且解码为AVFrame存放到pictq队列中.
subtitle_thread:主要用于从subtitleq队列中读取AVPacket对象并且解码为AVFrame存放到subpq队列中.
event_loop:处理事件并用于视频和字幕的展示.
sdl_audio_callback:音频数据的展示.
2:数据结构主要分为:PacketQueue,FrameQueue。其中PacketQueue定义的对象为videoq,subtitleq,audioq。FrameQueue定义的对象为pictq,subpq,sampq。
2.1:数据结构类型及初始化分析
类型定义:
typedef struct MyAVPacketList {
AVPacket pkt;
struct MyAVPacketList *next;
int serial;
} MyAVPacketList;
通过下面2.2可以分析出各个成员的含义
typedef struct PacketQueue {
MyAVPacketList *first_pkt, *last_pkt;
int nb_packets; 列表中packet的总数.默认:0,put:++,get:--
int size; 列表中packet字节总数,默认:0,put:+=,get-=
int64_t duration;列表中packet总duration。默认0,put +=,get -=
int abort_request;
int serial;
SDL_mutex *mutex;
SDL_cond *cond;
} PacketQueue;
初始化调用:
if (packet_queue_init(&is->videoq) < 0 ||
packet_queue_init(&is->audioq) < 0 ||
packet_queue_init(&is->subtitleq) < 0)
goto fail;
初始化实现:
static int packet_queue_init(PacketQueue *q)
{
memset(q, 0, sizeof(PacketQueue));
q->mutex = SDL_CreateMutex();
if (!q->mutex) {
av_log(NULL, AV_LOG_FATAL, "SDL_CreateMutex(): %s\n", SDL_GetError());
return AVERROR(ENOMEM);
}
q->cond = SDL_CreateCond();
if (!q->cond) {
av_log(NULL, AV_LOG_FATAL, "SDL_CreateCond(): %s\n", SDL_GetError());
return AVERROR(ENOMEM);
}
q->abort_request = 1;
return 0;
}
通过PacketQueue的定义可以看出.PacketQueue内部将会维护一个列表.该列表存放数据类型为MyAVPacketList,由于并不是内存池操作,故可以得知AVPavket的每次存放和读取都会涉及到内存的申请和释放,接下来分析FrameQueue,通过下面的定义可以看出FrameQueue内部维护了一个Frame类型的数组.而Frame结构体内部则包含了用于存放AVFrame的成员.且Frame数组在init时会根据不同类型初始化申请不同数量的数组个数.故可以得知AVFrame的每次存放和读取只会涉及到标志位的改变,并不会内存的申请和释放.
数据结构定义
typedef struct Frame {
AVFrame *frame;
AVSubtitle sub;
int serial;
double pts; /* presentation timestamp for the frame */
double duration; /* estimated duration of the frame */
int64_t pos; /* byte position of the frame in the input file */
int width;
int height;
int format;
AVRational sar;
int uploaded;
int flip_v;
} Frame;
typedef struct FrameQueue {
Frame queue[FRAME_QUEUE_SIZE];
int rindex; 将要读取的数组索引
int windex; 将要写入的数组索引
int size; 包含的有效的Frame个数
int max_size; 最大的数组个数
int keep_last;
int rindex_shown;
SDL_mutex *mutex;
SDL_cond *cond;
PacketQueue *pktq;
} FrameQueue;
初始化调用:
/* start video display */
if (frame_queue_init(&is->pictq, &is->videoq, VIDEO_PICTURE_QUEUE_SIZE, 1) < 0)
goto fail;
if (frame_queue_init(&is->subpq, &is->subtitleq, SUBPICTURE_QUEUE_SIZE, 0) < 0)
goto fail;
if (frame_queue_init(&is->sampq, &is->audioq, SAMPLE_QUEUE_SIZE, 1) < 0)
goto fail;
初始化实现:
static int frame_queue_init(FrameQueue *f, PacketQueue *pktq, int max_size, int keep_last)
{
int i;
memset(f, 0, sizeof(FrameQueue));
if (!(f->mutex = SDL_CreateMutex())) {
av_log(NULL, AV_LOG_FATAL, "SDL_CreateMutex(): %s\n", SDL_GetError());
return AVERROR(ENOMEM);
}
if (!(f->cond = SDL_CreateCond())) {
av_log(NULL, AV_LOG_FATAL, "SDL_CreateCond(): %s\n", SDL_GetError());
return AVERROR(ENOMEM);
}
f->pktq = pktq;
f->max_size = FFMIN(max_size, FRAME_QUEUE_SIZE);
f->keep_last = !!keep_last;
for (i = 0; i < f->max_size; i++)
if (!(f->queue[i].frame = av_frame_alloc()))
return AVERROR(ENOMEM);
return 0;
}
2.2:PacketQueue队列的数据存取标志位分析
首先分析AVPacket的存放流程,当我们读取出源文件一个AVPacket后,向对应的PacketQueue插入数据,内部其实就是做了一个列表的插入过程.
static int packet_queue_put_private(PacketQueue *q, AVPacket *pkt)
{
MyAVPacketList *pkt1;
if (q->abort_request)
return -1;
pkt1 = av_malloc(sizeof(MyAVPacketList));
if (!pkt1)
return -1;
pkt1->pkt = *pkt;
pkt1->next = NULL;
if (pkt == &flush_pkt)
q->serial++;
pkt1->serial = q->serial;
if (!q->last_pkt)
q->first_pkt = pkt1;
else
q->last_pkt->next = pkt1;
q->last_pkt = pkt1;
q->nb_packets++;
q->size += pkt1->pkt.size + sizeof(*pkt1);
q->duration += pkt1->pkt.duration;
/* XXX: should duplicate packet data in DV case */
SDL_CondSignal(q->cond);
return 0;
}
static int packet_queue_put(PacketQueue *q, AVPacket *pkt)
{
int ret;
SDL_LockMutex(q->mutex);
ret = packet_queue_put_private(q, pkt);
SDL_UnlockMutex(q->mutex);
if (pkt != &flush_pkt && ret < 0)
av_packet_unref(pkt);
return ret;
}
分析PacketQueue读取一个AVPacket流程,通过下面代码可以看出外部定义一个AVPacket,然后每次读取后释放原始列表成员.
/* return < 0 if aborted, 0 if no packet and > 0 if packet. */
static int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block, int *serial)
{
MyAVPacketList *pkt1;
int ret;
SDL_LockMutex(q->mutex);
for (;;) {
if (q->abort_request) {
ret = -1;
break;
}
pkt1 = q->first_pkt;
if (pkt1) {
q->first_pkt = pkt1->next;
if (!q->first_pkt)
q->last_pkt = NULL;
q->nb_packets--;
q->size -= pkt1->pkt.size + sizeof(*pkt1);
q->duration -= pkt1->pkt.duration;
*pkt = pkt1->pkt;
if (serial)
*serial = pkt1->serial;
av_free(pkt1);
ret = 1;
break;
} else if (!block) {
ret = 0;
break;
} else {
SDL_CondWait(q->cond, q->mutex);
}
}
SDL_UnlockMutex(q->mutex);
return ret;
}
上面介绍了PacketQueue的成员读取和写入流程.写入发生在read_thread线程中,每次读取出一个AVPacket都存放到对应的PacketQueue队列中,而读取发生在decoder_decode_frame方法中,每次读取出一个AVPacket,然后放入解码器解码,解码后读取出对应的AVFrame.
2.3:分析FrameQueue针对AVFrame的存放和读取,通过下面代码可以看出每次在存放AVFrame时,需要通过调用writable获取一个空闲的Frame对象,然后给Frame对象赋值,赋值完成后调用push将windex和size累加.
static Frame *frame_queue_peek_writable(FrameQueue *f)
{
/* wait until we have space to put a new frame */
SDL_LockMutex(f->mutex);
while (f->size >= f->max_size &&
!f->pktq->abort_request) {
SDL_CondWait(f->cond, f->mutex);
}
SDL_UnlockMutex(f->mutex);
if (f->pktq->abort_request)
return NULL;
return &f->queue[f->windex];
}
static void frame_queue_push(FrameQueue *f)
{
if (++f->windex == f->max_size)
f->windex = 0;
SDL_LockMutex(f->mutex);
f->size++;
SDL_CondSignal(f->cond);
SDL_UnlockMutex(f->mutex);
}
Frame的读取流程如下:通过下面的代码可以看出首先调用readable获取一个Frame对象。随后通过next方法将读取出的Fame引用计数减一,rindex累加和size减少。
static Frame *frame_queue_peek_readable(FrameQueue *f)
{
/* wait until we have a readable a new frame */
SDL_LockMutex(f->mutex);
while (f->size - f->rindex_shown <= 0 &&
!f->pktq->abort_request) {
SDL_CondWait(f->cond, f->mutex);
}
SDL_UnlockMutex(f->mutex);
if (f->pktq->abort_request)
return NULL;
return &f->queue[(f->rindex + f->rindex_shown) % f->max_size];
}
static void frame_queue_next(FrameQueue *f)
{
if (f->keep_last && !f->rindex_shown) {
f->rindex_shown = 1;
return;
}
frame_queue_unref_item(&f->queue[f->rindex]);
if (++f->rindex == f->max_size)
f->rindex = 0;
SDL_LockMutex(f->mutex);
f->size--;
SDL_CondSignal(f->cond);
SDL_UnlockMutex(f->mutex);
}
上面介绍了FrameQueue的读取和写入。可以看出读取和写入的索引记录了待写入的位置和待读取的位置。而通过Frame内部的AVFrame引用计数的修改用于释放.以上只是简单的队列之间数据的存取。接下来分析音视频数据的同步问题。
2.3:音视频同步分析,目前同步可以看到有三种方式:以音频为标准,以视频为标准,以外部时钟为标准。由于音频的输出为有规律的被动输出(sdl_audio_callback定时回调)。故首先分析以音频为主。这里首先分析一个数据结构Clock,主要声明对象有audclk,vidclk,extclk。总的来说每次在音频输出时都会设置audclk对应的pts,而每次视频的输出也会和音频时间戳进行比对判断延时,如果视频比音频慢则减少延迟时间,如果比音频快,则增加延迟时间。
typedef struct Clock {
double pts; /* clock base */
double pts_drift; /* clock base minus time at which we updated the clock */
double last_updated;
double speed;
int serial; /* clock is based on a packet with this serial */
int paused;
int *queue_serial; /* pointer to the current packet queue serial, used for obsolete clock detection */
} Clock;
音频时间戳设置如下:
set_clock_at(&is->audclk, is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec, is->audio_clock_serial, audio_callback_time / 1000000.0);
sync_clock_to_slave(&is->extclk, &is->audclk);
视频同步到音频流程如下:通过获取视频前后两帧数据的pts插值,通过vidclk和audclk的pts插值,来调整delay时长。
static double compute_target_delay(double delay, VideoState *is)
{
double sync_threshold, diff = 0;
/* update delay to follow master synchronisation source */
if (get_master_sync_type(is) != AV_SYNC_VIDEO_MASTER) {
/* if video is slave, we try to correct big delays by
duplicating or deleting a frame */
diff = get_clock(&is->vidclk) - get_master_clock(is);
/* skip or repeat frame. We take into account the
delay to compute the threshold. I still don't know
if it is the best guess */
sync_threshold = FFMAX(AV_SYNC_THRESHOLD_MIN, FFMIN(AV_SYNC_THRESHOLD_MAX, delay));
if (!isnan(diff) && fabs(diff) < is->max_frame_duration) {
if (diff <= -sync_threshold)
delay = FFMAX(0, delay + diff);
else if (diff >= sync_threshold && delay > AV_SYNC_FRAMEDUP_THRESHOLD)
delay = delay + diff;
else if (diff >= sync_threshold)
delay = 2 * delay;
}
}
av_log(NULL, AV_LOG_TRACE, "video: delay=%0.3f A-V=%f\n",
delay, -diff);
return delay;
}
随后通过update_video_pts进行修改videoclk.当设置framedrop为true的话,则会选择进行丢帧。
总结:以上只是简单的分析了了一下ffplay的流程,其中个人认为重点可以观察一下音视频同步。目前音视频同步总的来说就是固定音频时钟更新频率,视频通过每一帧数据的对比来更新下一帧视频显示的时间戳.所以可以认为视频的显示并不是按照固定的频率显示,而是一个动态调整的过程。