【转】WebRTC AECM时延估计

最新推荐文章于 2024-02-29 14:21:22 发布

一片橡树叶子的故事

最新推荐文章于 2024-02-29 14:21:22 发布

阅读量1.2k

点赞数 2

分类专栏：音频信号处理文章标签：前端

原文链接：https://blog.csdn.net/huihui0725/article/details/102820695

版权

音频信号处理专栏收录该内容

4 篇文章 1 订阅

订阅专栏

时延问题：一般基于互相关计算的自适应时延估计算法的计算复杂度为，其随计算范围的增长呈二次上升趋势，因此很难再如此大的范围内进行动态地时延计算。在实际应用中一般采用大范围粗略估计加小范围精确计算的方法，例如在WebRTC的AECM回声消除模块中，其API接口中有一个特殊的参数msInSoundCardBuf，要求调用者传入一个以毫秒为单位的回声时延估计值，然后又AECM内部在这个估计值的基础上进行**小范围内回声时延精确计算，因此该参数的准确性将直接音响到AECM内部时延计算准确性和速度，而这种影响又能直观的反映在回声消除整体性能入ERLE上。
最常用的时延估计方法是计算两信号的互相关函数，通过遍历候选延时的互相关系数，选择取得最大值的候选时间为实际延时。但此方法在计算时计算复杂度高，尤其是应用在实时要求高的系统中。AECM算法简化了流程，在算法复杂度和性能上做了较好的权衡，远端和近端语音信号的在FFT后的频谱far_spectrum和near_spectrum，其中远端频谱将被缓存起来作为候选匹配项。选择频谱中最重要的32个频段（12-43），算法估计了频谱的均值threshold_spectrum并设其为门限值。当，某个频段值大于门限值时，将改为设置为1，反之则设为0。这样便得到了远端和近端信号二值化的频谱，通过求解两者的按位异或值，选择相似度最高的候选远端信号并计算对应的延时。

WebRTC AECM中时延估计模块为
int WebRtc_DelayEstimatorProcessFix(
void handle,
const uint16_t near_spectrum,
int spectrum_size,
int near_q)
它包括二进制频谱量化
// Get binary spectra. binary_spectrum：32个频带的判别结果近端信号的量化结果
binary_spectrum = BinarySpectrumFix(near_spectrum,
self->mean_near_spectrum,
near_q,
&(self->near_spectrum_initialized));
和int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator self,
uint32_t binary_near_spectrum)，该模块主要是从候选的时延中找到一个相似度最大的，控制逻辑比较复查，将代码和简单注释贴在下面：
int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator self,
uint32_t binary_near_spectrum) {
int i = 0;
int candidate_delay = -1;
int valid_candidate = 0;
int delay_test = 0;

int32_t value_best_candidate = kMaxBitCountsQ9;
int32_t value_worst_candidate = 0;
int32_t valley_depth = 0;

assert(self != NULL); //不满足条件终止程序
if (self->farend->history_size != self->history_size) {
// Non matching history sizes.
return -1;
}
if (self->near_history_size > 1) {
// If we apply lookahead, shift near-end binary spectrum history. Insert
// current |binary_near_spectrum| and pull out the delayed one.
memmove(&(self->binary_near_history[1]), &(self->binary_near_history[0]),
(self->near_history_size - 1) * sizeof(uint32_t));
self->binary_near_history[0] = binary_near_spectrum;
binary_near_spectrum = self->binary_near_history[self->lookahead];
}

// Compare with delayed spectra and store the |bit_counts| for each delay.
BitCountComparison(binary_near_spectrum, self->farend->binary_far_history, //(a,b,c,d) d = sum（a(i)==b(i)） 0<ihistory_size, self->bit_counts); //返回bit_counts：两个binary对应位数不等的个数按位异或并求结果总1的个数

// Update |mean_bit_counts|, which is the smoothed version of |bit_counts|.
for (i = 0; i < self->history_size; i++) {
// |bit_counts| is constrained to [0, 32], meaning we can smooth with a
// factor up to 2^26. We use Q9. 可以左移26位，我们左移9位
int32_t bit_count = (self->bit_counts[i] << 9); // Q9.

// Update |mean_bit_counts| only when far-end signal has something to
// contribute. If |far_bit_counts| is zero the far-end signal is weak and
// we likely have a poor echo condition, hence don’t update.
if (self->farend->far_bit_counts[i] > 0) { //0 远端信号较弱，不进行更新更新时将远端信号按照强弱量化
// Make number of right shifts piecewise linear w.r.t. |far_bit_counts|.
//far_bit_counts 线性分段（0:5）(6:10) (11:15) (16:21) (22:26) （27:31）（32）对应的shifts取值为 13 12 11 10 9 8 7
int shifts = kShiftsAtZero; // = 13
shifts -= (kShiftsLinearSlope * self->farend->far_bit_counts[i]) >> 4; //13-floor(3*far_bit_counts/16) far_bit_counts取值范围0:32 shifts 取值13:7
WebRtc_MeanEstimatorFix(bit_count, shifts, &(self->mean_bit_counts[i])); //（a,b,&c) c = c +|a-c|>>b
}

}

// Find |candidate_delay|, |value_best_candidate| and |value_worst_candidate|
// of |mean_bit_counts|.
for (i = 0; i < self->history_size; i++) { //
if (self->mean_bit_counts[i] < value_best_candidate) { //找最小值值最小相似度最高
value_best_candidate = self->mean_bit_counts[i];
candidate_delay = i; //候选值
}
if (self->mean_bit_counts[i] > value_worst_candidate) {//找最大值
value_worst_candidate = self->mean_bit_counts[i];
}
}
valley_depth = value_worst_candidate - value_best_candidate; //宽度

// The |value_best_candidate| is a good indicator on the probability of
// |candidate_delay| being an accurate delay (a small |value_best_candidate|
// means a good binary match). In the following sections we make a decision
// whether to update |last_delay| or not.
// 1) If the difference bit counts between the best and the worst delay //最佳和最差延迟候选之间的插值太小，我们认为不可靠，不更新last_delay
// candidates is too small we consider the situation to be unreliable and
// don’t update |last_delay|.
// 2) If the situation is reliable we update |last_delay| if the value of the //如果情况可靠，更新 last_delay
// best candidate delay has a value less than //如果最优候选值小于自适应门限minimum_probability 或 last_delay_probability 更新
// i) an adaptive threshold |minimum_probability|, or
// ii) this corresponding value |last_delay_probability|, but updated at
// this time instant.

// Update |minimum_probability|.
if ((self->minimum_probability > kProbabilityLowerLimit) && //17
(valley_depth > kProbabilityMinSpread)) { //5.5 深度足够大
// The “hard” threshold can’t be lower than 17 (in Q9).
// The valley in the curve also has to be distinct, i.e., the
// difference between |value_worst_candidate| and |value_best_candidate| has
// to be large enough.
int32_t threshold = value_best_candidate + kProbabilityOffset; // +2
if (threshold < kProbabilityLowerLimit) { //门限下限值17
threshold = kProbabilityLowerLimit;
}
if (self->minimum_probability > threshold) { //更新
self->minimum_probability = threshold;
}
}
// Update |last_delay_probability|.
// We use a Markov type model, i.e., a slowly increasing level over time.
self->last_delay_probability++;
// Validate |candidate_delay|. We have a reliable instantaneous delay
// estimate if
// 1) The valley is distinct enough (|valley_depth| > |kProbabilityOffset|)
// and
// 2) The depth of the valley is deep enough
// (|value_best_candidate| < |minimum_probability|)
// and deeper than the best estimate so far
// (|value_best_candidate| < |last_delay_probability|)
valid_candidate = ((valley_depth > kProbabilityOffset) && // last_delay 更新条件判断 1：更新，0：不跟新
((value_best_candidate < self->minimum_probability) ||
(value_best_candidate < self->last_delay_probability)));

if (self->robust_validation_enabled) {
int is_histogram_valid = 0;
UpdateRobustValidationStatistics(self, candidate_delay, valley_depth,
value_best_candidate);
is_histogram_valid = HistogramBasedValidation(self, candidate_delay);
valid_candidate = RobustValidation(self, candidate_delay, valid_candidate, // last_delay 更新条件判断 1：更新，0：不跟新
is_histogram_valid);

}
if (valid_candidate) {
if (candidate_delay != self->last_delay) {
self->last_delay_histogram =
(self->histogram[candidate_delay] > kLastHistogramMax ?
kLastHistogramMax : self->histogram[candidate_delay]);
// Adjust the histogram if we made a change to |last_delay|, though it was
// not the most likely one according to the histogram.
if (self->histogram[candidate_delay] <
self->histogram[self->compare_delay]) {
self->histogram[self->compare_delay] = self->histogram[candidate_delay];
}
}
self->last_delay = candidate_delay;
if (value_best_candidate < self->last_delay_probability) {
self->last_delay_probability = value_best_candidate;
}
self->compare_delay = self->last_delay;
}

if(self->last_delay!= -2 &&self->last_delay!= 0)
delay_test = 1;

return self->last_delay;

}
————————————————
版权声明：本文为CSDN博主「huihui0725」的原创文章，遵循CC 4.0 BY-SA版权协议，转载请附上原文出处链接及本声明。
原文链接：https://blog.csdn.net/huihui0725/article/details/102820695

一片橡树叶子的故事

关注

2
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
【转】WebRTC AECM时延估计

时延问题：一般基于互相关计算的自适应时延估计算法的计算复杂度为，其随计算范围的增长呈二次上升趋势，因此很难再如此大的范围内进行动态地时延计算。在实际应用中一般采用大范围粗略估计加小范围精确计算的方法，例如在WebRTC的AECM回声消除模块中，其API接口中有一个特殊的参数msInSoundCardBuf，要求调用者传入一个以毫秒为单位的回声时延估计值，然后又AECM内部在这个估计值的基础上进行**小范围内回声时延精确计算，因此该参数的准确性将直接音响到AECM内部时延计算准确性和速度，而这种影响又能直观的
复制链接

扫一扫