【FFmpeg】ffplay源代码音视频同步分析-音频同步分析

ffplay源代码音视频同步分析(2)

前文:

音频同步分析

音频同步主要在audio_decode_frame函数完成,同步的基本逻辑如下:

在这里插入图片描述

Windows和非Windows平台有一些不同,主要在是否判断队列为空。(至于为什么要如此做,我暂时也不太清楚,大概和SDL在Windows下的实现有关)

具体代码分析:

static int audio_decode_frame(VideoState *is)
{
	......
    do {
#if defined(_WIN32)
        while (frame_queue_nb_remaining(&is->sampq) == 0) {
            if ((av_gettime_relative() - audio_callback_time) > 1000000LL * is->audio_hw_buf_size / is->audio_tgt.bytes_per_sec / 2)
                return -1;
            av_usleep (1000);
        }
#endif
        if (!(af = frame_queue_peek_readable(&is->sampq)))
            return -1;
        frame_queue_next(&is->sampq);
    } while (af->serial != is->audioq.serial);
    
    // 根据与参考时钟的差值,计算应该输出的样本数
    wanted_nb_samples = synchronize_audio(is, af->frame->nb_samples);
	
    /* 
    判断是否需要重采样,判断标准:
    	1. 取出的音频帧是否和上一帧的format、channel_layout和freq是否相同
    	2. 输出的样本数与取出的音频帧样本数不同(音频同步)
    */
    if (af->frame->format        != is->audio_src.fmt            ||
        dec_channel_layout       != is->audio_src.channel_layout ||
        af->frame->sample_rate   != is->audio_src.freq           ||
        (wanted_nb_samples       != af->frame->nb_samples && !is->swr_ctx)) {
        swr_free(&is->swr_ctx);
        is->swr_ctx = swr_alloc_set_opts(NULL,
                                        is->audio_tgt.channel_layout, is->audio_tgt.fmt, is>audio_tgt.freq,
                                        dec_channel_layout,af->frame->format, af->frame->sample_rate,
                                        0, NULL);
        if (!is->swr_ctx || swr_init(is->swr_ctx) < 0) {
            av_log(NULL, AV_LOG_ERROR,
                   "Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",
                    af->frame->sample_rate, av_get_sample_fmt_name(af->frame->format), af->frame->channels,
                    is->audio_tgt.freq, av_get_sample_fmt_name(is->audio_tgt.fmt), is->audio_tgt.channels);
            swr_free(&is->swr_ctx);
            return -1;
        }
        is->audio_src.channel_layout = dec_channel_layout;
        is->audio_src.channels       = af->frame->channels;
        is->audio_src.freq = af->frame->sample_rate;
        is->audio_src.fmt = af->frame->format;
    }
	
    // 重采样,利用重采样库进行样本的插入或剔除
    if (is->swr_ctx) {
        const uint8_t **in = (const uint8_t **)af->frame->extended_data;
        uint8_t **out = &is->audio_buf1;
        int out_count = (int64_t)wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate + 256;
        int out_size  = av_samples_get_buffer_size(NULL, is->audio_tgt.channels, out_count, is->audio_tgt.fmt, 0);
        int len2;
        if (out_size < 0) {
            av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed\n");
            return -1;
        }
        if (wanted_nb_samples != af->frame->nb_samples) {
            if (swr_set_compensation(is->swr_ctx, (wanted_nb_samples - af->frame->nb_samples) * is->audio_tgt.freq / af->frame->sample_rate,
                                        wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate) < 0) {
                av_log(NULL, AV_LOG_ERROR, "swr_set_compensation() failed\n");
                return -1;
            }
        }
        av_fast_malloc(&is->audio_buf1, &is->audio_buf1_size, out_size);
        if (!is->audio_buf1)
            return AVERROR(ENOMEM);
        len2 = swr_convert(is->swr_ctx, out, out_count, in, af->frame->nb_samples);
        if (len2 < 0) {
            av_log(NULL, AV_LOG_ERROR, "swr_convert() failed\n");
            return -1;
        }
        if (len2 == out_count) {
            av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small\n");
            if (swr_init(is->swr_ctx) < 0)
                swr_free(&is->swr_ctx);
        }
        is->audio_buf = is->audio_buf1;
        resampled_data_size = len2 * is->audio_tgt.channels * av_get_bytes_per_sample(is->audio_tgt.fmt);
    } else { //如果swr_ctx没有初始化过,说明无需重采样
        is->audio_buf = af->frame->data[0];
        resampled_data_size = data_size;
    }
	
    // 更新音频时钟,audio_clock最终会在sdl_audio_callback函数中更新audclk
    /* update the audio clock with the pts */
    if (!isnan(af->pts))
        is->audio_clock = af->pts + (double) af->frame->nb_samples / af->frame->sample_rate;
    else
        is->audio_clock = NAN;
    is->audio_clock_serial = af->serial;
	
    ......
}

sdl_audio_callback函数中更新audclk

static void sdl_audio_callback(void *opaque, Uint8 *stream, int len)
{
    ······

    while (len > 0) {
        if (is->audio_buf_index >= is->audio_buf_size) {
           audio_size = audio_decode_frame(is);
           ······
        }
        ······
    }
	
    /* Let's assume the audio driver that is used by SDL has two periods. */
    if (!isnan(is->audio_clock)) {
        set_clock_at(&is->audclk, is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec, is->audio_clock_serial, audio_callback_time / 1000000.0);
        sync_clock_to_slave(&is->extclk, &is->audclk);
    }
}

计算应该输出的样本数是同步音视频重要部分,主要由synchronize_audio函数实现。下面分析一下这个函数:

/* return the wanted number of samples to get better sync if sync_type is video
 * or external master clock */
static int synchronize_audio(VideoState *is, int nb_samples)
{
    int wanted_nb_samples = nb_samples;
	
    /* if not master, then we try to remove or add samples to correct the clock 
    	当参照时钟为视频时钟或外部时钟时,需要调整输出的样本数 */
    if (get_master_sync_type(is) != AV_SYNC_AUDIO_MASTER) {
        double diff, avg_diff;
        int min_nb_samples, max_nb_samples;

        diff = get_clock(&is->audclk) - get_master_clock(is);

        if (!isnan(diff) && fabs(diff) < AV_NOSYNC_THRESHOLD) {
            is->audio_diff_cum = diff + is->audio_diff_avg_coef * is->audio_diff_cum;
            if (is->audio_diff_avg_count < AUDIO_DIFF_AVG_NB) {
                /* not enough measures to have a correct estimate */
                is->audio_diff_avg_count++;
            } else {
                /* estimate the A-V difference */
                avg_diff = is->audio_diff_cum * (1.0 - is->audio_diff_avg_coef);

                if (fabs(avg_diff) >= is->audio_diff_threshold) {
                    wanted_nb_samples = nb_samples + (int)(diff * is->audio_src.freq);
                    min_nb_samples = ((nb_samples * (100 - SAMPLE_CORRECTION_PERCENT_MAX) / 100));
                    max_nb_samples = ((nb_samples * (100 + SAMPLE_CORRECTION_PERCENT_MAX) / 100));
                    wanted_nb_samples = av_clip(wanted_nb_samples, min_nb_samples, max_nb_samples);
                }
                av_log(NULL, AV_LOG_TRACE, "diff=%f adiff=%f sample_diff=%d apts=%0.3f %f\n",
                        diff, avg_diff, wanted_nb_samples - nb_samples,
                        is->audio_clock, is->audio_diff_threshold);
            }
        } else {
            /* too big difference : may be initial PTS errors, so
               reset A-V filter */
            is->audio_diff_avg_count = 0;
            is->audio_diff_cum       = 0;
        }
    }

    return wanted_nb_samples;
}

当音频时钟与参照时钟差距(diff)大于10(AV_NOSYNC_THRESHOLD)时,音频时钟和参照时钟差距太大,可能是初始PTS错误,因此重置计数器(audio_diff_avg_count)和audio_diff_cum(暂时不知道是什么)。

当音频时钟与参照时钟差距(diff)在10内时:

  • 在开始播放的20(AUDIO_DIFF_AVG_NB)个帧内,不调整输出的样本数,主要用来累计audio_diff_cum。根据注释,原因应该是帧数不够,不足以计算正确的avg_diff
  • 计算avg_diff = is->audio_diff_cum * (1.0 - is->audio_diff_avg_coef)

audio_diff_avg_coef可能是平均filter差距,根据初始化时的注释:

static int stream_component_open(VideoState *is, int stream_index) 
{
    ......
    /* init averaging filter */
	is->audio_diff_avg_coef  = exp(log(0.01) / AUDIO_DIFF_AVG_NB);
    is->audio_diff_avg_count = 0;
    ......
}

在得到avg_diff之后,判断avg_diff是否大于需要调整的阈值audio_diff_threshold。如果需要调整,那么

wanted_nb_samples = nb_samples + (int)(diff * is->audio_src.freq);
min_nb_samples = ((nb_samples * (100 - SAMPLE_CORRECTION_PERCENT_MAX) / 100));
max_nb_samples = ((nb_samples * (100 + SAMPLE_CORRECTION_PERCENT_MAX) / 100));
wanted_nb_samples = av_clip(wanted_nb_samples, min_nb_samples, max_nb_samples);

通过diff乘上音频数据频率得到需要的输出样本数,而且样本数必须在原来音频帧样本数nb_samples的0.9-1.1倍范围内的整数。如果不在范围内,那么就取0.9的nb_samples或1.1nb_samples
同步阈值audio_diff_threshold初始化为:

is->audio_diff_threshold = (double)(is->audio_hw_buf_size) / is->audio_tgt.bytes_per_sec;

Reference

ffplay音视频同步分析——音频同步视频 - 知乎 (zhihu.com)

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值