ffplay源代码音视频同步分析(2)
前文:
音频同步分析
音频同步主要在audio_decode_frame
函数完成,同步的基本逻辑如下:
Windows和非Windows平台有一些不同,主要在是否判断队列为空。(至于为什么要如此做,我暂时也不太清楚,大概和SDL在Windows下的实现有关)
具体代码分析:
static int audio_decode_frame(VideoState *is)
{
......
do {
#if defined(_WIN32)
while (frame_queue_nb_remaining(&is->sampq) == 0) {
if ((av_gettime_relative() - audio_callback_time) > 1000000LL * is->audio_hw_buf_size / is->audio_tgt.bytes_per_sec / 2)
return -1;
av_usleep (1000);
}
#endif
if (!(af = frame_queue_peek_readable(&is->sampq)))
return -1;
frame_queue_next(&is->sampq);
} while (af->serial != is->audioq.serial);
// 根据与参考时钟的差值,计算应该输出的样本数
wanted_nb_samples = synchronize_audio(is, af->frame->nb_samples);
/*
判断是否需要重采样,判断标准:
1. 取出的音频帧是否和上一帧的format、channel_layout和freq是否相同
2. 输出的样本数与取出的音频帧样本数不同(音频同步)
*/
if (af->frame->format != is->audio_src.fmt ||
dec_channel_layout != is->audio_src.channel_layout ||
af->frame->sample_rate != is->audio_src.freq ||
(wanted_nb_samples != af->frame->nb_samples && !is->swr_ctx)) {
swr_free(&is->swr_ctx);
is->swr_ctx = swr_alloc_set_opts(NULL,
is->audio_tgt.channel_layout, is->audio_tgt.fmt, is>audio_tgt.freq,
dec_channel_layout,af->frame->format, af->frame->sample_rate,
0, NULL);
if (!is->swr_ctx || swr_init(is->swr_ctx) < 0) {
av_log(NULL, AV_LOG_ERROR,
"Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",
af->frame->sample_rate, av_get_sample_fmt_name(af->frame->format), af->frame->channels,
is->audio_tgt.freq, av_get_sample_fmt_name(is->audio_tgt.fmt), is->audio_tgt.channels);
swr_free(&is->swr_ctx);
return -1;
}
is->audio_src.channel_layout = dec_channel_layout;
is->audio_src.channels = af->frame->channels;
is->audio_src.freq = af->frame->sample_rate;
is->audio_src.fmt = af->frame->format;
}
// 重采样,利用重采样库进行样本的插入或剔除
if (is->swr_ctx) {
const uint8_t **in = (const uint8_t **)af->frame->extended_data;
uint8_t **out = &is->audio_buf1;
int out_count = (int64_t)wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate + 256;
int out_size = av_samples_get_buffer_size(NULL, is->audio_tgt.channels, out_count, is->audio_tgt.fmt, 0);
int len2;
if (out_size < 0) {
av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed\n");
return -1;
}
if (wanted_nb_samples != af->frame->nb_samples) {
if (swr_set_compensation(is->swr_ctx, (wanted_nb_samples - af->frame->nb_samples) * is->audio_tgt.freq / af->frame->sample_rate,
wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate) < 0) {
av_log(NULL, AV_LOG_ERROR, "swr_set_compensation() failed\n");
return -1;
}
}
av_fast_malloc(&is->audio_buf1, &is->audio_buf1_size, out_size);
if (!is->audio_buf1)
return AVERROR(ENOMEM);
len2 = swr_convert(is->swr_ctx, out, out_count, in, af->frame->nb_samples);
if (len2 < 0) {
av_log(NULL, AV_LOG_ERROR, "swr_convert() failed\n");
return -1;
}
if (len2 == out_count) {
av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small\n");
if (swr_init(is->swr_ctx) < 0)
swr_free(&is->swr_ctx);
}
is->audio_buf = is->audio_buf1;
resampled_data_size = len2 * is->audio_tgt.channels * av_get_bytes_per_sample(is->audio_tgt.fmt);
} else { //如果swr_ctx没有初始化过,说明无需重采样
is->audio_buf = af->frame->data[0];
resampled_data_size = data_size;
}
// 更新音频时钟,audio_clock最终会在sdl_audio_callback函数中更新audclk
/* update the audio clock with the pts */
if (!isnan(af->pts))
is->audio_clock = af->pts + (double) af->frame->nb_samples / af->frame->sample_rate;
else
is->audio_clock = NAN;
is->audio_clock_serial = af->serial;
......
}
sdl_audio_callback
函数中更新audclk
:
static void sdl_audio_callback(void *opaque, Uint8 *stream, int len)
{
······
while (len > 0) {
if (is->audio_buf_index >= is->audio_buf_size) {
audio_size = audio_decode_frame(is);
······
}
······
}
/* Let's assume the audio driver that is used by SDL has two periods. */
if (!isnan(is->audio_clock)) {
set_clock_at(&is->audclk, is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec, is->audio_clock_serial, audio_callback_time / 1000000.0);
sync_clock_to_slave(&is->extclk, &is->audclk);
}
}
计算应该输出的样本数是同步音视频重要部分,主要由synchronize_audio
函数实现。下面分析一下这个函数:
/* return the wanted number of samples to get better sync if sync_type is video
* or external master clock */
static int synchronize_audio(VideoState *is, int nb_samples)
{
int wanted_nb_samples = nb_samples;
/* if not master, then we try to remove or add samples to correct the clock
当参照时钟为视频时钟或外部时钟时,需要调整输出的样本数 */
if (get_master_sync_type(is) != AV_SYNC_AUDIO_MASTER) {
double diff, avg_diff;
int min_nb_samples, max_nb_samples;
diff = get_clock(&is->audclk) - get_master_clock(is);
if (!isnan(diff) && fabs(diff) < AV_NOSYNC_THRESHOLD) {
is->audio_diff_cum = diff + is->audio_diff_avg_coef * is->audio_diff_cum;
if (is->audio_diff_avg_count < AUDIO_DIFF_AVG_NB) {
/* not enough measures to have a correct estimate */
is->audio_diff_avg_count++;
} else {
/* estimate the A-V difference */
avg_diff = is->audio_diff_cum * (1.0 - is->audio_diff_avg_coef);
if (fabs(avg_diff) >= is->audio_diff_threshold) {
wanted_nb_samples = nb_samples + (int)(diff * is->audio_src.freq);
min_nb_samples = ((nb_samples * (100 - SAMPLE_CORRECTION_PERCENT_MAX) / 100));
max_nb_samples = ((nb_samples * (100 + SAMPLE_CORRECTION_PERCENT_MAX) / 100));
wanted_nb_samples = av_clip(wanted_nb_samples, min_nb_samples, max_nb_samples);
}
av_log(NULL, AV_LOG_TRACE, "diff=%f adiff=%f sample_diff=%d apts=%0.3f %f\n",
diff, avg_diff, wanted_nb_samples - nb_samples,
is->audio_clock, is->audio_diff_threshold);
}
} else {
/* too big difference : may be initial PTS errors, so
reset A-V filter */
is->audio_diff_avg_count = 0;
is->audio_diff_cum = 0;
}
}
return wanted_nb_samples;
}
当音频时钟与参照时钟差距(diff
)大于10(AV_NOSYNC_THRESHOLD
)时,音频时钟和参照时钟差距太大,可能是初始PTS错误,因此重置计数器(audio_diff_avg_count
)和audio_diff_cum
(暂时不知道是什么)。
当音频时钟与参照时钟差距(diff
)在10内时:
- 在开始播放的20(
AUDIO_DIFF_AVG_NB
)个帧内,不调整输出的样本数,主要用来累计audio_diff_cum
。根据注释,原因应该是帧数不够,不足以计算正确的avg_diff
。 - 计算
avg_diff = is->audio_diff_cum * (1.0 - is->audio_diff_avg_coef)
audio_diff_avg_coef
可能是平均filter差距,根据初始化时的注释:
static int stream_component_open(VideoState *is, int stream_index)
{
......
/* init averaging filter */
is->audio_diff_avg_coef = exp(log(0.01) / AUDIO_DIFF_AVG_NB);
is->audio_diff_avg_count = 0;
......
}
在得到avg_diff
之后,判断avg_diff
是否大于需要调整的阈值audio_diff_threshold
。如果需要调整,那么
wanted_nb_samples = nb_samples + (int)(diff * is->audio_src.freq);
min_nb_samples = ((nb_samples * (100 - SAMPLE_CORRECTION_PERCENT_MAX) / 100));
max_nb_samples = ((nb_samples * (100 + SAMPLE_CORRECTION_PERCENT_MAX) / 100));
wanted_nb_samples = av_clip(wanted_nb_samples, min_nb_samples, max_nb_samples);
通过diff
乘上音频数据频率得到需要的输出样本数,而且样本数必须在原来音频帧样本数nb_samples
的0.9-1.1倍范围内的整数。如果不在范围内,那么就取0.9的nb_samples
或1.1nb_samples
。
同步阈值audio_diff_threshold
初始化为:
is->audio_diff_threshold = (double)(is->audio_hw_buf_size) / is->audio_tgt.bytes_per_sec;