相关代码在NetEqImpl::GetDecision中
sample_left为语音缓冲区的未播放数据
做初步判定
首先由DecisionLogic::GetDecision做初步判定。
cur_size_samples为语音缓冲区和抖动缓冲区所有未播放的样本总和。
const size_t cur_size_samples =
samples_left + packet_buffer_.NumSamplesInBuffer(decoder_frame_length);
然后计算延迟抖动,具体参考"NetEq延迟估计"。
FilterBufferLevel(cur_size_samples, prev_mode);
初步判定分了以下几种情况。
- 如果没有数据包
if (!next_packet) {
return NoPacket(play_dtmf);
}
Operations DecisionLogic::NoPacket(bool play_dtmf) {
if (cng_state_ == kCngRfc3389On) {
// Keep on playing comfort noise.
return kRfc3389CngNoPacket;
} else if (cng_state_ == kCngInternalOn) {
// Keep on playing codec internal comfort noise.
return kCodecInternalCng;
} else if (play_dtmf) {
return kDtmf;
} else {
// Nothing to play, do expand.
return kExpand;
}
}
一般情况下做Expand。
- 如果丢包次数达到上限
复位,按Normal处理
// If the expand period was very long, reset NetEQ since it is likely that the
// sender was restarted.
if (num_consecutive_expands_ > kReinitAfterExpands) {
*reset_decoder = true;
return kNormal;
}
- 在上一次是Expand情况下,还需满足一些条件才可以设本次为Expand
在expansion后不要立即重启audio避免数据再次用尽
if (postpone_decoding_after_expand_ && prev_mode == kModeExpand &&
!packet_buffer_.ContainsDtxOrCngPacket(decoder_database_) &&
cur_size_samples<static_cast<size_t>(delay_manager_->TargetLevel() *
packet_length_samples_)>> 8 &&
expand.MuteFactor(0) < 16384 / 2) {
return kExpand;
}
- 当前帧和上一帧都没有丢失
这种情况下用网络延迟和抖动延迟估计值判定。
if (target_timestamp == available_timestamp) {
return ExpectedPacketAvailable(prev_mode, play_dtmf); // 当前帧和上一帧都没有丢失
}
Operations DecisionLogic::ExpectedPacketAvailable(Modes prev_mode,
bool play_dtmf) {
if (!disallow_time_stretching_ && prev_mode != kModeExpand && !play_dtmf) {
// Check criterion for time-stretching.
int low_limit, high_limit;
delay_manager_->BufferLimits(&low_limit, &high_limit); // low_limit和high_limit由filter_level_计算得出
if (buffer_level_filter_->filtered_current_level() >= high_limit << 2)
return kFastAccelerate;
if (TimescaleAllowed()) {
if (buffer_level_filter_->filtered_current_level() >= high_limit)
return kAccelerate;
if (buffer_level_filter_->filtered_current_level() < low_limit)
return kPreemptiveExpand;
}
}
return kNormal;
}
void DelayManager::BufferLimits(int* lower_limit, int* higher_limit) const {
if (!lower_limit || !higher_limit) {
RTC_LOG_F(LS_ERROR) << "NULL pointers supplied as input";
assert(false);
return;
}
int window_20ms = 0x7FFF; // Default large value for legacy bit-exactness.
if (packet_len_ms_ > 0) {
window_20ms = (20 << 8) / packet_len_ms_; // 因为window_20ms是Q8格式
}
// |target_level_| is in Q8 already.
*lower_limit = (target_level_ * 3) / 4;
// |higher_limit| is equal to |target_level_|, but should at
// least be 20 ms higher than |lower_limit_|.
*higher_limit = std::max(target_level_, *lower_limit + window_20ms);
}
low_limit和high_limit由网络延迟target_level_估算。
如果抖动延迟大于4倍的high_limit,FastAccelerate,这种情况下,抖动延迟过大,需要加速;
如果抖动延迟大于high_limit,Accelerate;
如果抖动延迟小于low_limit,PreemptiveExpand
- 如果数据过期
Operations DecisionLogic::FuturePacketAvailable(
const SyncBuffer& sync_buffer,
const Expand& expand,
size_t decoder_frame_length,
Modes prev_mode,
uint32_t target_timestamp,
uint32_t available_timestamp,
bool play_dtmf,
size_t generated_noise_samples) {
// Required packet is not available, but a future packet is.
// Check if we should continue with an ongoing expand because the new packet
// is too far into the future.
uint32_t timestamp_leap = available_timestamp - target_timestamp;
if ((prev_mode == kModeExpand) && !ReinitAfterExpands(timestamp_leap) &&
!MaxWaitForPacket() && PacketTooEarly(timestamp_leap) &&
UnderTargetLevel()) {
if (play_dtmf) {
// Still have DTMF to play, so do not do expand.
return kDtmf;
} else {
// Nothing to play.
return kExpand;
}
}
const size_t samples_left =
sync_buffer.FutureLength() - expand.overlap_length();
const size_t cur_size_samples =
samples_left + packet_buffer_.NumPacketsInBuffer() * decoder_frame_length;
// If previous was comfort noise, then no merge is needed.
if (prev_mode == kModeRfc3389Cng || prev_mode == kModeCodecInternalCng) {
// Keep the same delay as before the CNG, but make sure that the number of
// samples in buffer is no higher than 4 times the optimal level. (Note that
// TargetLevel() is in Q8.)
if (static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
available_timestamp ||
cur_size_samples >
((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) *
4) {
// Time to play this new packet.
return kNormal;
} else {
// Too early to play this new packet; keep on playing comfort noise.
if (prev_mode == kModeRfc3389Cng) {
return kRfc3389CngNoPacket;
} else { // prevPlayMode == kModeCodecInternalCng.
return kCodecInternalCng;
}
}
}
// Do not merge unless we have done an expand before.
if (prev_mode == kModeExpand) {
return kMerge;
} else if (play_dtmf) {
// Play DTMF instead of expand.
return kDtmf;
} else {
return kExpand;
}
}
如果是播放音乐,只会进下面的分支,只能返回kMerge 或者kExpand,当上一次mode是Expand,本次是Merge,其它情况是Expand。
- 其它情况
未定义操作,unDefined.
再做进一步判定
- 数据足够,且初步判定不是加速,减速,融合,正常播放。
if (samples_left >= rtc::dchecked_cast<int>(output_size_samples_) &&
*operation != kMerge && *operation != kAccelerate &&
*operation != kFastAccelerate && *operation != kPreemptiveExpand) {
*operation = kNormal;
return 0;
}
- 如果初步判定时Expand,仍做Expand处理
case kExpand: {
timestamp_ = end_timestamp;
return 0;
}
- 如果初步判定是加速
如果sample_left大于30ms(2个基音周期),数据足够,保存原判定不变;
如果sample_left大于10ms小于30ms且帧长大于30ms,调整为正常播放,在这种情况下,如果从抖动缓冲区中取数据解码,由于帧长可能很长,有可能使语音缓冲区溢出且数据大于10ms也足够了,所以正常播放;
如果sample_left小于20ms,且帧长小于30ms,调整为正常播放,因为这种情况下不能保证能凑齐30ms数据;
其它情况初步判定不变。
case kAccelerate: // 根据sample_left和decoder_frame_length_做调整
case kFastAccelerate: {
// In order to do an accelerate we need at least 30 ms of audio data.
if (samples_left >= static_cast<int>(samples_30_ms)) {
// Already have enough data, so we do not need to extract any more.
decision_logic_->set_sample_memory(samples_left);
decision_logic_->set_prev_time_scale(true);
return 0;
} else if (samples_left >= static_cast<int>(samples_10_ms) && // sample_left 语音缓冲区的数据
decoder_frame_length_ >= samples_30_ms) { // 还的数据超过了语音缓冲区的上限,可能存在overflow
// Avoid decoding more data as it might overflow the playout buffer.
*operation = kNormal;
return 0;
} else if (samples_left < static_cast<int>(samples_20_ms) && // sample_left < 20ms
decoder_frame_length_ < samples_30_ms) {
// Build up decoded data by decoding at least 20 ms of audio data. Do
// not perform accelerate yet, but wait until we only need to do one
// decoding.
required_samples = 2 * output_size_samples_; // required_samples为20 ms
*operation = kNormal;
}
// If none of the above is true, we have one of two possible situations:
// (1) 20 ms <= samples_left < 30 ms and decoder_frame_length_ < 30 ms; or
// (2) samples_left < 10 ms and decoder_frame_length_ >= 30 ms.
// In either case, we move on with the accelerate decision, and decode one
// frame now.
break;
}
- 如果初步判定是减速
第一个if条件没太看明白,按道理应该和加速一样,(samples_left >= static_cast<int>(samples_10_ms) &&
decoder_frame_length_ >= samples_30_ms)也有语音缓冲区溢出的风险,这里没考虑这种情况;
第二个if条件也没看太懂。
case kPreemptiveExpand: { // 减速播放
// In order to do a preemptive expand we need at least 30 ms of decoded
// audio data.
if ((samples_left >= static_cast<int>(samples_30_ms)) ||
(samples_left >= static_cast<int>(samples_10_ms) &&
decoder_frame_length_ >= samples_30_ms)) {
// Already have enough data, so we do not need to extract any more.
// Or, avoid decoding more data as it might overflow the playout buffer.
// Still try preemptive expand, though.
decision_logic_->set_sample_memory(samples_left);
decision_logic_->set_prev_time_scale(true);
return 0;
}
if (samples_left < static_cast<int>(samples_20_ms) &&
decoder_frame_length_ < samples_30_ms) {
// Build up decoded data by decoding at least 20 ms of audio data.
// Still try to perform preemptive expand.
required_samples = 2 * output_size_samples_; // 20 ms data
}
// Move on with the preemptive expand decision.
break;
}
- 如果初步判定是Merge,仍为Merge
case kMerge: {
required_samples =
std::max(merge_->RequiredFutureSamples(), required_samples);
break;
}
- 其它情况初步判定不变