agc.c
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/* analog_agc.c
*
* Using a feedback system, determines an appropriate analog volume level
* given an input signal and current volume level. Targets a conservative
* signal level and is intended for use with a digital AGC to apply
* additional gain.
*
*/
#include "agc.h"
#include <stdlib.h>
#ifdef WEBRTC_AGC_DEBUG_DUMP
#include <stdio.h>
#endif
#ifndef MIN
#define MIN(A, B) ((A) < (B) ? (A) : (B)) // Get min value
#endif
#ifndef MAX
#define MAX(A, B) ((A) > (B) ? (A) : (B)) // Get max value
#endif
// Shifting with negative numbers allowed
// Positive means left shift
#ifndef SHIFT_W32
#define SHIFT_W32(x, c) ((c) >= 0 ? (x) * (1 << (c)) : (x) >> -(c))
#endif
// C + the 32 most significant bits of A * B
#ifndef AGC_SCALEDIFF32
#define AGC_SCALEDIFF32(A, B, C) \
((C) + ((B) >> 16) * (A) + (((0x0000FFFF & (B)) * (A)) >> 16)) // 0x0000FFFF & (B) 取低16位 @
#endif
#ifndef AGC_MUL32
// the 32 most significant bits of A(19) * B(26) >> 13
#define AGC_MUL32(A, B) (((B) >> 13) * (A) + (((0x00001FFF & (B)) * (A)) >> 13))
#endif
/* The slope of in Q13*/
static const int16_t kSlope1[8] = {21793, 12517, 7189, 4129,
2372, 1362, 472, 78};
/* The offset in Q14 */
static const int16_t kOffset1[8] = {25395, 23911, 22206, 20737,
19612, 18805, 17951, 17367};
/* The slope of in Q13*/
static const int16_t kSlope2[8] = {2063, 1731, 1452, 1218, 1021, 857, 597, 337};
/* The offset in Q14 */
static const int16_t kOffset2[8] = {18432, 18379, 18290, 18177,
18052, 17920, 17670, 17286};
static const int16_t kMuteGuardTimeMs = 8000; // 该计时器可在静音后的短时间内防止大幅增加音量 (maybechange#) @
static const int16_t kInitCheck = 42;
static const size_t kNumSubframes = 10;
/* Default settings if config is not used */
#define AGC_DEFAULT_TARGET_LEVEL 3
#define AGC_DEFAULT_COMP_GAIN 9
/* This is the target level for the analog part in ENV scale. To convert to RMS
* scale you
* have to add OFFSET_ENV_TO_RMS.
*/
#define ANALOG_TARGET_LEVEL 11
#define ANALOG_TARGET_LEVEL_2 5 // ANALOG_TARGET_LEVEL / 2
/* Offset between RMS scale (analog part) and ENV scale (digital part). This
* value actually
* varies with the FIXED_ANALOG_TARGET_LEVEL, hence we should in the future
* replace it with
* a table.
*/
#define OFFSET_ENV_TO_RMS 9
/* The reference input level at which the digital part gives an output of
* targetLevelDbfs
* (desired level) if we have no compression gain. This level should be set high
* enough not
* to compress the peaks due to the dynamics.
*/
#define DIGITAL_REF_AT_0_COMP_GAIN 4
/* Speed of reference level decrease.
*/
#define DIFF_REF_TO_ANALOG 5
#ifdef MIC_LEVEL_FEEDBACK
#define NUM_BLOCKS_IN_SAT_BEFORE_CHANGE_TARGET 7
#endif
/* Size of analog gain table */
#define GAIN_TBL_LEN 32
/* Matlab code:
* fprintf(1, '\t%i, %i, %i, %i,\n', round(10.^(linspace(0,10,32)/20) * 2^12));
*/
/* Q12 */
static const uint16_t kGainTableAnalog[GAIN_TBL_LEN] = {
4096, 4251, 4412, 4579, 4752, 4932, 5118, 5312, 5513, 5722, 5938,
6163, 6396, 6638, 6889, 7150, 7420, 7701, 7992, 8295, 8609, 8934,
9273, 9623, 9987, 10365, 10758, 11165, 11587, 12025, 12480, 12953};
/* Gain/Suppression tables for virtual Mic (in Q10) */
static const uint16_t kGainTableVirtualMic[128] = {
1052, 1081, 1110, 1141, 1172, 1204, 1237, 1271, 1305, 1341, 1378,
1416, 1454, 1494, 1535, 1577, 1620, 1664, 1710, 1757, 1805, 1854,
1905, 1957, 2010, 2065, 2122, 2180, 2239, 2301, 2364, 2428, 2495,
2563, 2633, 2705, 2779, 2855, 2933, 3013, 3096, 3180, 3267, 3357,
3449, 3543, 3640, 3739, 3842, 3947, 4055, 4166, 4280, 4397, 4517,
4640, 4767, 4898, 5032, 5169, 5311, 5456, 5605, 5758, 5916, 6078,
6244, 6415, 6590, 6770, 6956, 7146, 7341, 7542, 7748, 7960, 8178,
8402, 8631, 8867, 9110, 9359, 9615, 9878, 10148, 10426, 10711, 11004,
11305, 11614, 11932, 12258, 12593, 12938, 13292, 13655, 14029, 14412, 14807,
15212, 15628, 16055, 16494, 16945, 17409, 17885, 18374, 18877, 19393, 19923,
20468, 21028, 21603, 22194, 22801, 23425, 24065, 24724, 25400, 26095, 26808,
27541, 28295, 29069, 29864, 30681, 31520, 32382};
static const uint16_t kSuppressionTableVirtualMic[128] = {
1024, 1006, 988, 970, 952, 935, 918, 902, 886, 870, 854, 839, 824, 809, 794,
780, 766, 752, 739, 726, 713, 700, 687, 675, 663, 651, 639, 628, 616, 605,
594, 584, 573, 563, 553, 543, 533, 524, 514, 505, 496, 487, 478, 470, 461,
453, 445, 437, 429, 421, 414, 406, 399, 392, 385, 378, 371, 364, 358, 351,
345, 339, 333, 327, 321, 315, 309, 304, 298, 293, 288, 283, 278, 273, 268,
263, 258, 254, 249, 244, 240, 236, 232, 227, 223, 219, 215, 211, 208, 204,
200, 197, 193, 190, 186, 183, 180, 176, 173, 170, 167, 164, 161, 158, 155,
153, 150, 147, 145, 142, 139, 137, 134, 132, 130, 127, 125, 123, 121, 118,
116, 114, 112, 110, 108, 106, 104, 102};
/* Table for target energy levels. Values in Q(-7)
* Matlab code
* targetLevelTable = fprintf('%d,\t%d,\t%d,\t%d,\n',
* round((32767*10.^(-(0:63)'/20)).^2*16/2^7) */
static const int32_t kTargetLevelTable[64] = {
134209536, 106606424, 84680493, 67264106, 53429779, 42440782, 33711911,
26778323, 21270778, 16895980, 13420954, 10660642, 8468049, 6726411,
5342978, 4244078, 3371191, 2677832, 2127078, 1689598, 1342095,
1066064, 846805, 672641, 534298, 424408, 337119, 267783,
212708, 168960, 134210, 106606, 84680, 67264, 53430,
42441, 33712, 26778, 21271, 16896, 13421, 10661,
8468, 6726, 5343, 4244, 3371, 2678, 2127,
1690, 1342, 1066, 847, 673, 534, 424,
337, 268, 213, 169, 134, 107, 85,
67};
static __inline int16_t DivW32W16ResW16(int32_t num, int16_t den) { // 16位除法, 防止除以0 @
// Guard against division with 0
return (den != 0) ? (int16_t) (num / den) : (int16_t) 0x7FFF;
}
static __inline int32_t DivW32W16(int32_t num, int16_t den) { // 32位除法, 防止除以0
// Guard against division with 0
return (den != 0) ? (int32_t) (num / den) : (int32_t) 0x7FFFFFFF;
}
static __inline uint32_t __clz_uint32(uint32_t v) {
// Never used with input 0
assert(v > 0);
#if defined(__INTEL_COMPILER)
return _bit_scan_reverse(v) ^ 31U;
#elif defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
// This will translate either to (bsr ^ 31U), clz , ctlz, cntlz, lzcnt depending on
// -march= setting or to a software routine in exotic machines.
return __builtin_clz(v);
#elif defined(_MSC_VER)
// for _BitScanReverse
#include <intrin.h>
{
uint32_t idx;
_BitScanReverse(&idx, v);
return idx ^ 31U;
}
#else
// Will never be emitted for MSVC, GCC, Intel compilers
static const uint8_t byte_to_unary_table[] = {
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
return word > 0xffffff ? byte_to_unary_table[v >> 24] :
word > 0xffff ? byte_to_unary_table[v >> 16] + 8 :
word > 0xff ? byte_to_unary_table[v >> 8] + 16 :
byte_to_unary_table[v] + 24;
#endif
}
// Return the number of steps a can be left-shifted without overflow, 返回 a 可以左移而不溢出的步数 @
// or 0 if a == 0.
static __inline int16_t NormU32(uint32_t a) {
if (a == 0) return 0;
return (int16_t) __clz_uint32(a);
}
static __inline int16_t SatW32ToW16(int32_t value32) { // value32 限制在[-32768,32767] @
return (int16_t) value32 > 32767 ? (int16_t) 32767 : (value32 < -32768) ? (int16_t) (-32768) : (int16_t) value32;
}
// Return the number of steps a can be left-shifted without overflow,
// or 0 if a == 0.
static __inline int16_t NormW32(int32_t a) {
if (a == 0) return 0;
uint32_t v = (uint32_t) (a < 0 ? ~a : a);
// Returns the number of leading zero bits in the argument.
return (int16_t) (__clz_uint32(v) - 1);
}
static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
return SatW32ToW16((int32_t) a + (int32_t) b);
}
int32_t DotProductWithScale(const int16_t *vector1,
const int16_t *vector2,
size_t length,
int scaling) {
int64_t sum = 0;
size_t i = 0;
/* Unroll the loop to improve performance. */
for (i = 0; i + 3 < length; i += 4) {
sum += (vector1[i + 0] * vector2[i + 0]) >> scaling;
sum += (vector1[i + 1] * vector2[i + 1]) >> scaling;
sum += (vector1[i + 2] * vector2[i + 2]) >> scaling;
sum += (vector1[i + 3] * vector2[i + 3]) >> scaling;
}
for (; i < length; i++) {
sum += (vector1[i] * vector2[i]) >> scaling;
}
return (int32_t) (sum);
}
static float fast_sqrt(float x) {
float s;
#if defined(__x86_64__)
__asm__ __volatile__ ("sqrtss %1, %0" : "=x"(s) : "x"(x));
#elif defined(__i386__)
s = x;
__asm__ __volatile__ ("fsqrt" : "+t"(s));
#elif defined(__arm__) && defined(__VFP_FP__)
__asm__ __volatile__ ("vsqrt.f32 %0, %1" : "=w"(s) : "w"(x));
#else
s = sqrtf(x);
#endif
return s;
}
static __inline void downsampleBy2(const int16_t *in, size_t len,
int16_t *out, int32_t *filtState) {
int32_t tmp1, tmp2, diff, in32, out32;
size_t i;
register int32_t state0 = filtState[0];
register int32_t state1 = filtState[1];
register int32_t state2 = filtState[2];
register int32_t state3 = filtState[3];
register int32_t state4 = filtState[4];
register int32_t state5 = filtState[5];
register int32_t state6 = filtState[6];
register int32_t state7 = filtState[7];
for (i = (len >> 1); i > 0; i--) {
// lower allpass filter
in32 = (int32_t) (*in++) * (1 << 10);
diff = in32 - state1;
tmp1 = ((state0) + ((diff) >> 16) * (kResampleAllpass2[0]) +
(((uint32_t) ((diff) & 0x0000FFFF) * (kResampleAllpass2[0])) >> 16));
state0 = in32;
diff = tmp1 - state2;
tmp2 = ((state1) + ((diff) >> 16) * (kResampleAllpass2[1]) +
(((uint32_t) ((diff) & 0x0000FFFF) * (kResampleAllpass2[1])) >> 16));
state1 = tmp1;
diff = tmp2 - state3;
state3 = ((state2) + ((diff) >> 16) * (kResampleAllpass2[2]) +
(((uint32_t) ((diff) & 0x0000FFFF) * (kResampleAllpass2[2])) >> 16));
state2 = tmp2;
// upper allpass filter
in32 = (int32_t) (*in++) * (1 << 10);
diff = in32 - state5;
tmp1 = ((state4) + ((diff) >> 16) * (kResampleAllpass1[0]) +
(((uint32_t) ((diff) & 0x0000FFFF) * (kResampleAllpass1[0])) >> 16));
state4 = in32;
diff = tmp1 - state6;
tmp2 = ((state5) + ((diff) >> 16) * (kResampleAllpass1[1]) +
(((uint32_t) ((diff) & 0x0000FFFF) * (kResampleAllpass1[1])) >> 16));
state5 = tmp1;
diff = tmp2 - state7;
state7 = ((state6) + ((diff) >> 16) * (kResampleAllpass1[2]) +
(((uint32_t) ((diff) & 0x0000FFFF) * (kResampleAllpass1[2])) >> 16));
state6 = tmp2;
// add two allpass outputs, divide by two and round
out32 = (state3 + state7 + 1024) >> 11;
// limit amplitude to prevent wrap-around, and write to output array
*out++ = SatW32ToW16(out32);
}
filtState[0] = state0;
filtState[1] = state1;
filtState[2] = state2;
filtState[3] = state3;
filtState[4] = state4;
filtState[5] = state5;
filtState[6] = state6;
filtState[7] = state7;
}
// To generate the gaintable, copy&paste the following lines to a Matlab window:
// MaxGain = 6; MinGain = 0; CompRatio = 3; Knee = 1;
// zeros = 0:31; lvl = 2.^(1-zeros);
// A = -10*log10(lvl) * (CompRatio - 1) / CompRatio;
// B = MaxGain - MinGain;
// gains = round(2^16*10.^(0.05 * (MinGain + B * (
// log(exp(-Knee*A)+exp(-Knee*B)) - log(1+exp(-Knee*B)) ) /
// log(1/(1+exp(Knee*B))))));
// fprintf(1, '\t%i, %i, %i, %i,\n', gains);
// % Matlab code for plotting the gain and input/output level characteristic
// (copy/paste the following 3 lines):
// in = 10*log10(lvl); out = 20*log10(gains/65536);
// subplot(121); plot(in, out); axis([-30, 0, -5, 20]); grid on; xlabel('Input
// (dB)'); ylabel('Gain (dB)');
// subplot(122); plot(in, in+out); axis([-30, 0, -30, 5]); grid on;
// xlabel('Input (dB)'); ylabel('Output (dB)');
// zoom on;
// Generator table for y=log2(1+e^x) in Q8.
enum {
kGenFuncTableSize = 128
};
static const uint16_t kGenFuncTable[kGenFuncTableSize] = {
256, 485, 786, 1126, 1484, 1849, 2217, 2586, 2955, 3324, 3693,
4063, 4432, 4801, 5171, 5540, 5909, 6279, 6648, 7017, 7387, 7756,
8125, 8495, 8864, 9233, 9603, 9972, 10341, 10711, 11080, 11449, 11819,
12188, 12557, 12927, 13296, 13665, 14035, 14404, 14773, 15143, 15512, 15881,
16251, 16620, 16989, 17359, 17728, 18097, 18466, 18836, 19205, 19574, 19944,
20313, 20682, 21052, 21421, 21790, 22160, 22529, 22898, 23268, 23637, 24006,
24376, 24745, 25114, 25484, 25853, 26222, 26592, 26961, 27330, 27700, 28069,
28438, 28808, 29177, 29546, 29916, 30285, 30654, 31024, 31393, 31762, 32132,
32501, 32870, 33240, 33609, 33978, 34348, 34717, 35086, 35456, 35825, 36194,
36564, 36933, 37302, 37672, 38041, 38410, 38780, 39149, 39518, 39888, 40257,
40626, 40996, 41365, 41734, 42104, 42473, 42842, 43212, 43581, 43950, 44320,
44689, 45058, 45428, 45797, 46166, 46536, 46905};
static const int16_t kAvgDecayTime = 250; // frames; < 3000
int32_t WebRtcAgc_CalculateGainTable(int32_t *gainTable, // Q16
int16_t digCompGaindB, // Q0
int16_t targetLevelDbfs, // Q0
uint8_t limiterEnable,
int16_t analogTarget) // Q0
{
// This function generates the compressor gain table used in the fixed digital
// part.
uint32_t tmpU32no1, tmpU32no2, absInLevel, logApprox;
int32_t inLevel, limiterLvl;
int32_t tmp32, tmp32no1, tmp32no2, numFIX, den, y32;
const uint16_t kLog10 = 54426; // log2(10) in Q14
const uint16_t kLog10_2 = 49321; // 10*log10(2) in Q14
const uint16_t kLogE_1 = 23637; // log2(e) in Q14
uint16_t constMaxGain;
uint16_t tmpU16, intPart, fracPart;
const int16_t kCompRatio = 3;
// const int16_t kSoftLimiterLeft = 1;
int16_t limiterOffset = 0; // Limiter offset
int16_t limiterIdx, limiterLvlX;
int16_t constLinApprox, maxGain, diffGain;//zeroGainLvl
int16_t i, tmp16, tmp16no1;
int zeros, zerosScale;
// Constants
// kLogE_1 = 23637; // log2(e) in Q14
// kLog10 = 54426; // log2(10) in Q14
// kLog10_2 = 49321; // 10*log10(2) in Q14
// Calculate maximum digital gain and zero gain level
tmp32no1 = (digCompGaindB - analogTarget) * (kCompRatio - 1);
tmp16no1 = analogTarget - targetLevelDbfs;
tmp16no1 +=
DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio);
maxGain = MAX(tmp16no1, (analogTarget - targetLevelDbfs));
// tmp32no1 = maxGain * kCompRatio;
// zeroGainLvl = digCompGaindB;
// zeroGainLvl -= WebRtcSpl_DivW32W16ResW16(tmp32no1 + ((kCompRatio - 1) >> 1), kCompRatio - 1);
if ((digCompGaindB <= analogTarget) && (limiterEnable)) {
//zeroGainLvl += (analogTarget - digCompGaindB + kSoftLimiterLeft);
limiterOffset = 0;
}
// Calculate the difference between maximum gain and gain at 0dB0v:
// diffGain = maxGain + (compRatio-1)*zeroGainLvl/compRatio
// = (compRatio-1)*digCompGaindB/compRatio
tmp32no1 = digCompGaindB * (kCompRatio - 1);
diffGain =
DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio);
if (diffGain < 0 || diffGain >= kGenFuncTableSize) {
assert(0);
return -1;
}
// Calculate the limiter level and index:
// limiterLvlX = analogTarget - limiterOffset
// limiterLvl = targetLevelDbfs + limiterOffset/compRatio
limiterLvlX = analogTarget - limiterOffset;
limiterIdx = 2 + DivW32W16ResW16((int32_t) limiterLvlX * (1 << 13),
kLog10_2 / 2);
tmp16no1 =
DivW32W16ResW16(limiterOffset + (kCompRatio >> 1), kCompRatio);
limiterLvl = targetLevelDbfs + tmp16no1;
// Calculate (through table lookup):
// constMaxGain = log2(1+2^(log2(e)*diffGain)); (in Q8)
constMaxGain = kGenFuncTable[diffGain]; // in Q8
// Calculate a parameter used to approximate the fractional part of 2^x with a
// piecewise linear function in Q14:
// constLinApprox = round(3/2*(4*(3-2*sqrt(2))/(log(2)^2)-0.5)*2^14);
constLinApprox = 22817; // in Q14
// Calculate a denominator used in the exponential part to convert from dB to
// linear scale:
// den = 20*constMaxGain (in Q8)
den = ((int32_t) (int16_t) (20) * (uint16_t) (constMaxGain)); // in Q8
for (i = 0; i < 32; i++) {
// Calculate scaled input level (compressor):
// inLevel =
// fix((-constLog10_2*(compRatio-1)*(1-i)+fix(compRatio/2))/compRatio)
tmp16 = (int16_t) ((kCompRatio - 1) * (i - 1)); // Q0
tmp32 = ((int32_t) (int16_t) (tmp16) * (uint16_t) (kLog10_2)) + 1; // Q14
inLevel = DivW32W16(tmp32, kCompRatio); // Q14
// Calculate diffGain-inLevel, to map using the genFuncTable
inLevel = (int32_t) diffGain * (1 << 14) - inLevel; // Q14
// Make calculations on abs(inLevel) and compensate for the sign afterwards.
absInLevel = (uint32_t) (((int32_t) (inLevel) >= 0) ? ((int32_t) (inLevel)) : -((int32_t) (inLevel))); // Q14
// LUT with interpolation
intPart = (uint16_t) (absInLevel >> 14);
fracPart =
(uint16_t) (absInLevel & 0x00003FFF); // extract the fractional part
tmpU16 = kGenFuncTable[intPart + 1] - kGenFuncTable[intPart]; // Q8
tmpU32no1 = tmpU16 * fracPart; // Q22
tmpU32no1 += (uint32_t) kGenFuncTable[intPart] << 14; // Q22
logApprox = tmpU32no1 >> 8; // Q14
// Compensate for negative exponent using the relation:
// log2(1 + 2^-x) = log2(1 + 2^x) - x
if (inLevel < 0) {
zeros = NormU32(absInLevel);
zerosScale = 0;
if (zeros < 15) {
// Not enough space for multiplication
tmpU32no2 = absInLevel >> (15 - zeros); // Q(zeros-1)
tmpU32no2 = ((uint32_t) ((uint32_t) (tmpU32no2) * (uint16_t) (kLogE_1))); // Q(zeros+13)
if (zeros < 9) {
zerosScale = 9 - zeros;
tmpU32no1 >>= zerosScale; // Q(zeros+13)
} else {
tmpU32no2 >>= zeros - 9; // Q22
}
} else {
tmpU32no2 = ((uint32_t) ((uint32_t) (absInLevel) * (uint16_t) (kLogE_1))); // Q28
tmpU32no2 >>= 6; // Q22
}
logApprox = 0;
if (tmpU32no2 < tmpU32no1) {
logApprox = (tmpU32no1 - tmpU32no2) >> (8 - zerosScale); // Q14
}
}
numFIX = (maxGain * constMaxGain) * (1 << 6); // Q14
numFIX -= (int32_t) logApprox * diffGain; // Q14
// Calculate ratio
// Shift |numFIX| as much as possible.
// Ensure we avoid wrap-around in |den| as well.
if (numFIX > (den >> 8) || -numFIX > (den >> 8)) // |den| is Q8.
{
zeros = NormW32(numFIX);
} else {
zeros = NormW32(den) + 8;
}
numFIX *= 1 << zeros; // Q(14+zeros)
// Shift den so we end up in Qy1
tmp32no1 = SHIFT_W32(den, zeros - 9); // Q(zeros - 1)
y32 = numFIX / tmp32no1; // in Q15
// This is to do rounding in Q14.
y32 = y32 >= 0 ? (y32 + 1) >> 1 : -((-y32 + 1) >> 1);
if (limiterEnable && (i < limiterIdx)) {
tmp32 = ((int32_t) (int16_t) (i - 1) * (uint16_t) (kLog10_2)); // Q14
tmp32 -= limiterLvl * (1 << 14); // Q14
y32 = DivW32W16(tmp32 + 10, 20);
}
if (y32 > 39000) {
tmp32 = (y32 >> 1) * kLog10 + 4096; // in Q27
tmp32 >>= 13; // In Q14.
} else {
tmp32 = y32 * kLog10 + 8192; // in Q28
tmp32 >>= 14; // In Q14.
}
tmp32 += 16 << 14; // in Q14 (Make sure final output is in Q16)
// Calculate power
if (tmp32 > 0) {
intPart = (int16_t) (tmp32 >> 14);
fracPart = (uint16_t) (tmp32 & 0x00003FFF); // in Q14
if ((fracPart >> 13) != 0) {
tmp16 = (2 << 14) - constLinApprox;
tmp32no2 = (1 << 14) - fracPart;
tmp32no2 *= tmp16;
tmp32no2 >>= 13;
tmp32no2 = (1 << 14) - tmp32no2;
} else {
tmp16 = constLinApprox - (1 << 14);
tmp32no2 = (fracPart * tmp16) >> 13;
}
fracPart = (uint16_t) tmp32no2;
gainTable[i] =
(1 << intPart) + SHIFT_W32(fracPart, intPart - 14);
} else {
gainTable[i] = 0;
}
}
return 0;
}
int32_t WebRtcAgc_InitDigital(DigitalAgc *stt, int16_t agcMode) {
if (agcMode == kAgcModeFixedDigital) {
// start at minimum to find correct gain faster
stt->capacitorSlow = 0;
} else {
// start out with 0 dB gain start out with 0 dB gain @
stt->capacitorSlow = 134217728; // (int32_t)(0.125f * 32768.0f * 32768.0f);
}
stt->capacitorFast = 0;
stt->gain = 65536;
stt->gatePrevious = 0;
stt->agcMode = agcMode;
#ifdef WEBRTC_AGC_DEBUG_DUMP
stt->frameCounter = 0;
#endif
// initialize VADs
WebRtcAgc_InitVad(&stt->vadNearend);
WebRtcAgc_InitVad(&stt->vadFarend);
return 0;
}
int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc *stt,
const int16_t *in_far,
size_t nrSamples) {
assert(stt);
// VAD for far end
WebRtcAgc_ProcessVad(&stt->vadFarend, in_far, nrSamples);
return 0;
}
int32_t WebRtcAgc_ProcessDigital(DigitalAgc *stt,
int16_t *const *in_near,
size_t num_bands,
int16_t *const *out,
uint32_t FS,
int16_t lowlevelSignal) {
// array for gains (one value per ms, incl start & end) 增益数组(每毫秒一个值,包括开始和结束) num_bands是通道数 @
int32_t gains[11];
int32_t out_tmp, tmp32;
int32_t env[10];
int32_t max_nrg;
int32_t cur_level;
int32_t gain32, delta;
int16_t logratio;
int16_t lower_thr, upper_thr;
int16_t zeros = 0, zeros_fast, frac = 0;
int16_t decay;
int16_t gate, gain_adj;
int16_t k;
size_t n, i, L;
int16_t L2; // samples/subframe
// determine number of samples per ms 子帧的个数固定为10个,根据采样率可计算每个子帧内sample的个数为 L
if (FS == 8000) {
L = 8;
L2 = 3;
} else if (FS == 16000 || FS == 32000 || FS == 48000) {
L = 16;
L2 = 4;
} else {
return -1;
}
for (i = 0; i < num_bands; ++i) {
if (in_near[i] != out[i]) {
// Only needed if they don't already point to the same place.
memcpy(out[i], in_near[i], 10 * L * sizeof(in_near[i][0]));
}
}
// VAD for near end // 近端VAD,返回一个标准分logRatio(对数似然比),反映出语音是否存在 @
logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out[0], L * 10);// 一帧数据的vad @
// Account for far end VAD 去除远端信号vad的影响,当超过10帧(100ms)时,使用远端Vad来修正近端Vad, V = 3/4 * V_near - 1/4 * V_far @
if (stt->vadFarend.counter > 10) {
tmp32 = 3 * logratio;
logratio = (int16_t) ((tmp32 - stt->vadFarend.logRatio) >> 2); // stt->vadFarend 不知在哪计算? @
}
// Determine decay factor depending on VAD 根据 VAD计算的logratio 确定衰减因子decay @
// upper_thr = 1.0f;
// lower_thr = 0.25f;
upper_thr = 1024; // Q10 上限 @
lower_thr = 0; // Q10 下限 @
if (logratio > upper_thr) {
// decay = -2^17 / DecayTime; -> -65
decay = -65; // 似然比L大于上限,衰减decay = -65 @
} else if (logratio < lower_thr) {
decay = 0; // 似然比L小于下限,衰减decay = 0 @
} else {
// decay = (int16_t)(((lower_thr - logratio)
// * (2^27/(DecayTime*(upper_thr-lower_thr)))) >> 10);
// SUBSTITUTED: 2^27/(DecayTime*(upper_thr-lower_thr)) -> 65
tmp32 = (lower_thr - logratio) * 65; // 在范围内 则decay = -65 * L
decay = (int16_t) (tmp32 >> 10);
}
// 仅在非固定增益模式下,修正小音量的decay @
// adjust decay factor for long silence (detected as low standard deviation) 调整长时间静音的衰减因子 (即被检测为低标准差时) @
// This is only done in the adaptive modes
if (stt->agcMode != kAgcModeFixedDigital) {
if (stt->vadNearend.stdLongTerm < 4000) { // stdLongTerm 以 dB 为单位的输入电平的方差 @
decay = 0;
} else if (stt->vadNearend.stdLongTerm < 8096) {
// decay = (int16_t)(((stt->vadNearend.stdLongTerm - 4000) * decay) >>
// 12);
tmp32 = (stt->vadNearend.stdLongTerm - 4000) * decay;
decay = (int16_t) (tmp32 >> 12);
}
if (lowlevelSignal != 0) { // 此处判断 不是低能量语音 lowlevelSignal != 0 @
decay = 0;
}
}
#ifdef WEBRTC_AGC_DEBUG_DUMP
stt->frameCounter++;
fprintf(stt->logFile, "%5.2f\t%d\t%d\t%d\t", (float)(stt->frameCounter) / 100,
logratio, decay, stt->vadNearend.stdLongTerm);
#endif // 下面开始快慢包络的计算过程,通过快、慢包络和增益计算每个子帧的增益数组gain @
// Find max amplitude per sub frame 计算每个子帧内的sample的最大能量值 @
// iterate over sub frames
for (k = 0; k < 10; k++) {
// iterate over samples
max_nrg = 0;
for (n = 0; n < L; n++) {
int32_t nrg = out[0][k * L + n] * out[0][k * L + n];
if (nrg > max_nrg) {
max_nrg = nrg;
}
}
env[k] = max_nrg; // 将10个长度为L的1ms子帧的最大值做为包络
}
// Calculate gain per sub frame 计算每个子帧的增益 @
gains[0] = stt->gain;
for (k = 0; k < 10; k++) {
// Fast envelope follower 快速包络跟踪 @
// decay time = -131000 / -1000 = 131 (ms)
stt->capacitorFast =
AGC_SCALEDIFF32(-1000, stt->capacitorFast, stt->capacitorFast);// C_fast = -1000 * C_fast + C_fast @
if (env[k] > stt->capacitorFast) {
stt->capacitorFast = env[k]; // C_fast = env[k] @
}
// Slow envelope follower 缓慢的包络跟踪 @
if (env[k] > stt->capacitorSlow) {
// increase capacitorSlow 增大 慢包络
stt->capacitorSlow = AGC_SCALEDIFF32(500, (env[k] - stt->capacitorSlow),
stt->capacitorSlow); // C_slow = 500 * (env[k] - C_slow) + C_slow @
} else {
// decrease capacitorSlow 减小 慢包络
stt->capacitorSlow =
AGC_SCALEDIFF32(decay, stt->capacitorSlow, stt->capacitorSlow); // C_slow = (C_slow)^2 + C_slow @
}
// use maximum of both capacitors as current level 用快慢包络中最大的那个值作为当前等级 @
if (stt->capacitorFast > stt->capacitorSlow) {
cur_level = stt->capacitorFast;
} else {
cur_level = stt->capacitorSlow;
}
// Translate signal level into gain, using a piecewise linear approximation LOG2的分段线性函数把cur_level转换成gain @
// find number of leading zeros 这里将把等级映射到增益上去,增益分为整数和小数部分。这里用了很trick的方法,首先计算定点数前面0的个数来获取索引值(0-31),然后小数部分通过线形插值获取 @
zeros = NormU32((uint32_t) cur_level); // 找到前多少位为0,可理解为(该数左移而不溢出的步数) ,计算到前面多少个0可确定在gaintable所处的等级(0-31) @
if (cur_level == 0) {
zeros = 31;
}
tmp32 = ((uint32_t) cur_level << zeros) & 0x7FFFFFFF;
frac = (int16_t) (tmp32 >> 19); // Q12.
tmp32 = (stt->gainTable[zeros - 1] - stt->gainTable[zeros]) * frac; // 小数部分用线性差值的方法,找到gain[zeros],gain[zeros-1]中间的量。 @
gains[k + 1] = stt->gainTable[zeros] + (tmp32 >> 12); // gain 由 整数部分 + 小数部分组成,查表gainTable中zero等级处的值用作整数部分增益,之后增益倍数小于 1.0 的部分由小数部分的frac决定 @
#ifdef WEBRTC_AGC_DEBUG_DUMP // gainTable长度为32,其实表示的是一个 int 型数据的 32 位(short 型数据的能量值范围为 [0, 32768^2] 可以用无符号 int 型数据表示),从高位到低位
if (k == 0) { // ,为 1 的最高位具有最大的数量级称为整数部分intpart,后续数位组成小数部分称为 fracpart。因此 [0, 32768] 之间的任意一个数都对应数字增益表中的一个增益值。
fprintf(stt->logFile, "%d\t%d\t%d\t%d\t%d\n", env[0], cur_level,
stt->capacitorFast, stt->capacitorSlow, zeros);
}
#endif
}
// Gate processing (lower gain during absence of speech) 下面为门限gate的计算部分,gate意味着衰减原来的增益, 一部分是基于快包络计算的似然比,另一部分是短时方差。 @
zeros = (zeros << 9) - (frac >> 3); // 对应于-log(C_fast) 慢包络大时值越小 @
// find number of leading zeros
zeros_fast = NormU32((uint32_t) stt->capacitorFast); // 对应于 -log(C_fast) 快包络大时值越小 @
if (stt->capacitorFast == 0) {
zeros_fast = 31;
}
tmp32 = ((uint32_t) stt->capacitorFast << zeros_fast) & 0x7FFFFFFF;
zeros_fast <<= 9;
zeros_fast -= (int16_t) (tmp32 >> 22);
gate = 1000 + zeros_fast - zeros - stt->vadNearend.stdShortTerm; // 门限 gate ≈ -log(C_slow/C_fast) + (3.91 - STD_near) @
if (gate < 0) {
stt->gatePrevious = 0;
} else {
tmp32 = stt->gatePrevious * 7;
gate = (int16_t) ((gate + tmp32) >> 3); // 平滑门限 @ g = 1/8 *g + 7/8 * g_pre @
stt->gatePrevious = gate;
}
// gate < 0 -> no gate 当gate最小的时候为0(语音)此时不使用gainTable[0]的值作为参考,直接用计算的gain @
// gate > 2500 -> max gate
if (gate > 0) {
if (gate < 2500) {
gain_adj = (2500 - gate) >> 5; // 平滑后将门限转为 gain_adj 用于调整增益。当gate处于最大最小值之间,g[k+1]在gainTable[0]和g[k+1]确定的这条直线上移动。 @
} else {
gain_adj = 0; // 当gate最大的时候为2500(噪声),gain_adj取到最小,此时g[k+1]要取到相对于gainTable[0]的值的70% ,防止噪声被过度放大 ,(maybechange#)@
}
for (k = 0; k < 10; k++) {
if ((gains[k + 1] - stt->gainTable[0]) > 8388608) { // 8388608 -> (1000 0000 0000 0000 0000 0000) @
// To prevent wraparound
tmp32 = (gains[k + 1] - stt->gainTable[0]) >> 8;
tmp32 *= 178 + gain_adj;
} else {
tmp32 = (gains[k + 1] - stt->gainTable[0]) * (178 + gain_adj);
tmp32 >>= 8;
}
gains[k + 1] = stt->gainTable[0] + tmp32; // 经过门限修正后的子帧gains @
}
}
// Limit gain to avoid overload distortion 限制增益以避免过载失真 @
for (k = 0; k < 10; k++) {
// To prevent wrap around
zeros = 10;
if (gains[k + 1] > 47453132) { // gain > (10 1101 0100 0001 0011 1100 1100) @
zeros = 16 - NormW32(gains[k + 1]);
}
gain32 = (gains[k + 1] >> zeros) + 1;
gain32 *= gain32;
// check for overflow 检查是否溢出 @
while (AGC_MUL32((env[k] >> 12) + 1, gain32) >
SHIFT_W32((int32_t) 32767, 2 * (1 - zeros + 10))) {
// multiply by 253/256 ==> -0.1 dB
if (gains[k + 1] > 8388607) { // (111 1111 1111 1111 1111 1111) @
// Prevent wrap around
gains[k + 1] = (gains[k + 1] / 256) * 253;
} else {
gains[k + 1] = (gains[k + 1] * 253) / 256;
}
gain32 = (gains[k + 1] >> zeros) + 1;
gain32 *= gain32;
}
}
// gain reductions should be done 1 ms earlier than gain increases 增益减少应该比增益增加早 1 毫秒 @
for (k = 1; k < 10; k++) {
if (gains[k] > gains[k + 1]) {
gains[k] = gains[k + 1];
}
}
// save start gain for next frame
stt->gain = gains[10];
// Apply gain 下面是gain对各个子帧的处理 @
// handle first sub frame separately 单独处理第一个子帧 @
delta = (gains[1] - gains[0]) * (1 << (4 - L2));
gain32 = gains[0] * (1 << 4);
// iterate over samples
for (n = 0; n < L; n++) {
for (i = 0; i < num_bands; ++i) {
tmp32 = out[i][n] * ((gain32 + 127) >> 7);
out_tmp = tmp32 >> 16;
if (out_tmp > 4095) {
out[i][n] = (int16_t) 32767;
} else if (out_tmp < -4096) {
out[i][n] = (int16_t) -32768;
} else {
tmp32 = out[i][n] * (gain32 >> 4);
out[i][n] = (int16_t) (tmp32 >> 16);
}
}
//
gain32 += delta;
}
// iterate over subframes 迭代剩余子帧 @
for (k = 1; k < 10; k++) {
delta = (gains[k + 1] - gains[k]) * (1 << (4 - L2)); // 计算本次增益和上次的差值 @
gain32 = gains[k] * (1 << 4);
// iterate over samples 迭代每个子帧内的每个sample @
for (n = 0; n < L; n++) {
for (i = 0; i < num_bands; ++i) { // 迭代通道数 @
int64_t tmp64 = ((int64_t) (out[i][k * L + n])) * (gain32 >> 4);
tmp64 = tmp64 >> 16;
if (tmp64 > 32767) {
out[i][k * L + n] = 32767;
} else if (tmp64 < -32768) {
out[i][k * L + n] = -32768;
} else {
out[i][k * L + n] = (int16_t) (tmp64);
}
}
gain32 += delta;
}
}
return 0;
}
void WebRtcAgc_InitVad(AgcVad *state) {
int16_t k;
state->HPstate = 0; // state of high pass filter
state->logRatio = 0; // log( P(active) / P(inactive) )
// average input level (Q10)
state->meanLongTerm = 15 << 10;
// variance of input level (Q8)
state->varianceLongTerm = 500 << 8;
state->stdLongTerm = 0; // standard deviation of input level in dB
// short-term average input level (Q10)
state->meanShortTerm = 15 << 10;
// short-term variance of input level (Q8)
state->varianceShortTerm = 500 << 8;
state->stdShortTerm =
0; // short-term standard deviation of input level in dB
state->counter = 3; // counts updates
for (k = 0; k < 8; k++) {
// downsampling filter
state->downState[k] = 0;
}
}
// AgcVad *state 的初始化在WebRtcAgc_InitVad , ProcessVad函数将返回一个标准分logRatio @
int16_t WebRtcAgc_ProcessVad(AgcVad *state, // (i) VAD state
const int16_t *in, // (i) Speech signal
size_t nrSamples) // (i) number of samples
{
uint32_t nrg;
int32_t out, tmp32, tmp32b;
uint16_t tmpU16;
int16_t k, subfr, tmp16;
int16_t buf1[8];
int16_t buf2[4];
int16_t HPstate;
int16_t zeros, dB;
// process in 10 sub frames of 1 ms (to save on memory)
nrg = 0;
HPstate = state->HPstate;
for (subfr = 0; subfr < 10; subfr++) { // 首先下采样到4kHz,按10个1ms的子帧处理 @
// downsample to 4 kHz
if (nrSamples == 160) {
for (k = 0; k < 8; k++) {
tmp32 = (int32_t) in[2 * k] + (int32_t) in[2 * k + 1];
tmp32 >>= 1; // 相邻两个sample取平均,16个sample变为8个 @
buf1[k] = (int16_t) tmp32;
}
in += 16;
downsampleBy2(buf1, 8, buf2, state->downState); // 8个sample下采样到4个 @
} else {
downsampleBy2(in, 8, buf2, state->downState);
in += 8;
}
// high pass filter and compute energy
for (k = 0; k < 4; k++) {
out = buf2[k] + HPstate;
tmp32 = 600 * out;
HPstate = (int16_t) ((tmp32 >> 10) - buf2[k]);
// Add 'out * out / 2**6' to 'nrg' in a non-overflowing
// way. Guaranteed to work as long as 'out * out / 2**6' fits in
// an int32_t.
nrg += out * (out / (1 << 6));
nrg += out * (out % (1 << 6)) / (1 << 6);
}
}
state->HPstate = HPstate;
// find number of leading zeros
if (!(0xFFFF0000 & nrg)) { // nrg取值在低16位时 @
zeros = 16;
} else {
zeros = 0;
}
if (!(0xFF000000 & (nrg << zeros))) { // nrg取值在高16位的低8位 (1111 1111 【0000 0000】 0000 0000 0000 0000) @
zeros += 8;
}
if (!(0xF0000000 & (nrg << zeros))) { // nrg取值在高12-8位 (1111 【0000】 0000 0000 0000 0000 0000 0000) @
zeros += 4;
}
if (!(0xC0000000 & (nrg << zeros))) { // nrg取值在高14-13位 (11【00】 0000 0000 0000 0000 0000 0000 0000) @
zeros += 2;
}
if (!(0x80000000 & (nrg << zeros))) {
zeros += 1;
}
// energy level (range {-32..30}) (Q10)
dB = (15 - zeros) * (1 << 11);
// Update statistics
if (state->counter < kAvgDecayTime) {
// decay time = AvgDecTime * 10 ms
state->counter++;
}
// 计算短时均值和方差,描述语音包络瞬时变化,能够准确反映语音的包络 @
// update short-term estimate of mean energy level (Q10)
tmp32 = state->meanShortTerm * 15 + dB;
state->meanShortTerm = (int16_t) (tmp32 >> 4);
// update short-term estimate of variance in energy level (Q8)
tmp32 = (dB * dB) >> 12;
tmp32 += state->varianceShortTerm * 15;
state->varianceShortTerm = tmp32 / 16;
// update short-term estimate of standard deviation in energy level (Q10)
tmp32 = state->meanShortTerm * state->meanShortTerm;
tmp32 = (state->varianceShortTerm << 12) - tmp32;
state->stdShortTerm = (int16_t) fast_sqrt(tmp32);
// 计算长时均值、方差、标准差,描述信号整体缓慢的变化趋势,勾勒信号的 “重心线”,比较平滑有利于利用门限值作为检测条件 @
// update long-term estimate of mean energy level (Q10)
tmp32 = state->meanLongTerm * state->counter + dB;
state->meanLongTerm =
DivW32W16ResW16(tmp32, WebRtcSpl_AddSatW16(state->counter, 1)); // 计算 tmp32 / counter @
// update long-term estimate of variance in energy level (Q8)
tmp32 = (dB * dB) >> 12;
tmp32 += state->varianceLongTerm * state->counter;
state->varianceLongTerm =
DivW32W16(tmp32, WebRtcSpl_AddSatW16(state->counter, 1));
// update long-term estimate of standard deviation in energy level (Q10)
tmp32 = state->meanLongTerm * state->meanLongTerm;
tmp32 = (state->varianceLongTerm << 12) - tmp32;
state->stdLongTerm = (int16_t) fast_sqrt(tmp32);
// update voice activity measure (Q10)
tmp16 = 3 << 12;
// TODO(bjornv): (dB - state->meanLongTerm) can overflow, e.g., in
// ApmTest.Process unit test. Previously the macro WEBRTC_SPL_MUL_16_16()
// was used, which did an intermediate cast to (int16_t), hence losing
// significant bits. This cause logRatio to max out positive, rather than
// negative. This is a bug, but has very little significance.
tmp32 = tmp16 * (int16_t) (dB - state->meanLongTerm);
tmp32 = DivW32W16(tmp32, state->stdLongTerm);
tmpU16 = (13 << 12);
tmp32b = ((int32_t) (int16_t) (state->logRatio) * (uint16_t) (tmpU16));
tmp32 += tmp32b >> 10;
// 计算标准分数,描述短时均值与 “重心线” 的偏差,位于中心之上的部分可以认为发生语音活动的可能性极大; @
state->logRatio = (int16_t) (tmp32 >> 6);
// limit
if (state->logRatio > 2048) {
state->logRatio = 2048;
}
if (state->logRatio < -2048) {WebRtcAgc_ProcessAnalog
state->logRatio = -2048;
}
return state->logRatio; // Q10
}
int WebRtcAgc_AddMic(void *state,
int16_t *const *in_mic,
size_t num_bands,
size_t samples) {
int32_t nrg, max_nrg, sample, tmp32;
int32_t *ptr;
uint16_t targetGainIdx, gain;
size_t i;
int16_t n, L, tmp16, tmp_speech[16];
LegacyAgc *stt;
stt = (LegacyAgc *) state;
if (stt->fs == 8000) {
L = 8; // 10个子帧,每个子帧的sample为8 @
if (samples != 80) { // 10ms的sample为80 @
return -1;
}
} else {
L = 16;
if (samples != 160) {
return -1;
}
}
/* apply slowly varying digital gain */
if (stt->micVol > stt->maxAnalog) { // 实际值 micVol大于规定的模拟最大值maxAnalog时,查表给予额外补偿 @
/* |maxLevel| is strictly >= |micVol|, so this condition should be
* satisfied here, ensuring there is no divide-by-zero. */
assert(stt->maxLevel > stt->maxAnalog);
/* Q1 */ // 归一化音量目标音量
tmp16 = (int16_t) (stt->micVol - stt->maxAnalog);
tmp32 = (GAIN_TBL_LEN - 1) * tmp16;
tmp16 = (int16_t) (stt->maxLevel - stt->maxAnalog);
targetGainIdx = tmp32 / tmp16; // 计算目标增益targetGainIdx = (micVol-maxAnalog)*31 / (maxLevel-maxAnalog)
assert(targetGainIdx < GAIN_TBL_LEN);
/* Increment through the table towards the target gain.
* If micVol drops below maxAnalog, we allow the gain
* to be dropped immediately. */ // gaintable下标调节到目标增益targetGain下标 maybechange#提高addmic灵敏度@
if (stt->gainTableIdx < targetGainIdx) {
stt->gainTableIdx++;
} else if (stt->gainTableIdx > targetGainIdx) {
stt->gainTableIdx--;
}
/* Q12 */
gain = kGainTableAnalog[stt->gainTableIdx]; // 获取增益 @
for (i = 0; i < samples; i++) { // 线性放大信号 @
size_t j;
for (j = 0; j < num_bands; ++j) {
sample = (in_mic[j][i] * gain) >> 12;
if (sample > 32767) {
in_mic[j][i] = 32767;
} else if (sample < -32768) {
in_mic[j][i] = -32768;
} else {
in_mic[j][i] = (int16_t) sample;
}
}
}
} else {
stt->gainTableIdx = 0;
}
/* compute envelope */
if (stt->inQueue > 0) { // 10ms的batch 指示器 @
ptr = stt->env[1]; // 指针ptr存储包络地址
} else {
ptr = stt->env[0];
}
for (i = 0; i < kNumSubframes; i++) { // ptr存储每个子帧内的sample的最大能量 @
/* iterate over samples */
max_nrg = 0;
for (n = 0; n < L; n++) {
nrg = in_mic[0][i * L + n] * in_mic[0][i * L + n];
if (nrg > max_nrg) {
max_nrg = nrg;
}
}
ptr[i] = max_nrg;
}
/* compute energy */
if (stt->inQueue > 0) {
ptr = stt->Rxx16w32_array[1]; // ptr存储2ms麦克风能量的地址 @
} else {
ptr = stt->Rxx16w32_array[0];
}
for (i = 0; i < kNumSubframes / 2; i++) {
if (stt->fs == 16000) {
downsampleBy2(&in_mic[0][i * 32], 32, tmp_speech,
stt->filterState); // 降采样到8k @
} else {
memcpy(tmp_speech, &in_mic[0][i * 16], 16 * sizeof(short));
}
/* Compute energy in blocks of 16 samples */
ptr[i] = DotProductWithScale(tmp_speech, tmp_speech, 16, 4); // 计算2ms的麦克风能量值
}
/* update queue information */
if (stt->inQueue == 0) { // 更新10ms指示器,1表示10ms,2表示20ms @
stt->inQueue = 1;
} else {
stt->inQueue = 2;
}
/* call VAD (use low band only) */
WebRtcAgc_ProcessVad(&stt->vadMic, in_mic[0], samples); // 计算每10ms的VAD
return 0;
}
int WebRtcAgc_AddFarend(void *state, const int16_t *in_far, size_t samples) {
LegacyAgc *stt = (LegacyAgc *) state;
int err = WebRtcAgc_GetAddFarendError(state, samples);
if (err != 0)
return err;
return WebRtcAgc_AddFarendToDigital(&stt->digitalAgc, in_far, samples);
}
int WebRtcAgc_GetAddFarendError(void *state, size_t samples) {
LegacyAgc *stt;
stt = (LegacyAgc *) state;
if (stt == NULL)
return -1;
if (stt->fs == 8000) {
if (samples != 80)
return -1;
} else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000) {
if (samples != 160)
return -1;
} else {
return -1;
}
return 0;
}
// 根据 micVol 在 WebRtcAgc_VirtualMic 模块中更新增益下标 gainIdx,并查表得到新的增益 gain @
int WebRtcAgc_VirtualMic(void *agcInst,
int16_t *const *in_near,
size_t num_bands,
size_t samples,
int32_t micLevelIn,
int32_t *micLevelOut) {
int32_t tmpFlt, micLevelTmp, gainIdx;
uint16_t gain;
size_t ii, j;
LegacyAgc *stt;
uint32_t nrg;
size_t sampleCntr;
uint32_t frameNrg = 0;
uint32_t frameNrgLimit = 5500; // 帧能量上限 @
int16_t numZeroCrossing = 0;
const int16_t kZeroCrossingLowLim = 15; // 过零率下限 @
const int16_t kZeroCrossingHighLim = 20; // 过零率下上限 @
stt = (LegacyAgc *) agcInst;
/*
* Before applying gain decide if this is a low-level signal.
* The idea is that digital AGC will not adapt to low-level
* signals.
*/
if (stt->fs != 8000) {
frameNrgLimit = frameNrgLimit << 1; // 根据采样率选择帧能量上限
}
frameNrg = (uint32_t) (in_near[0][0] * in_near[0][0]);
for (sampleCntr = 1; sampleCntr < samples; sampleCntr++) {
// increment frame energy if it is less than the limit
// the correct value of the energy is not important
if (frameNrg < frameNrgLimit) { // 按sample计算能量,求和到帧能量frameNrg @
nrg = (uint32_t) (in_near[0][sampleCntr] * in_near[0][sampleCntr]); // 当frameNrg = frameNrgLimit,平均一个sample幅值 = 8.3 = 9 = -6 dBfs
frameNrg += nrg;
}
// Count the zero crossings 计算帧过零率 @
numZeroCrossing +=
((in_near[0][sampleCntr] ^ in_near[0][sampleCntr - 1]) < 0); // 符号位0为正数,1为负数。两数异号则异或后的符号位位1,为负数 @
}
// 根据能量和过零率判断是否是低能量信号 (maybechange#) 可防止对噪声段进行增益放大 @ @
if ((frameNrg < 500) || (numZeroCrossing <= 5)) { // 无语音情况
stt->lowLevelSignal = 1;
} else if (numZeroCrossing <= kZeroCrossingLowLim) { // 浊音段低过零率 @
stt->lowLevelSignal = 0;
} else if (frameNrg <= frameNrgLimit) { // 无用语音段 能量太小 @
stt->lowLevelSignal = 1;
} else if (numZeroCrossing >= kZeroCrossingHighLim) { // 过零率高 (maybechang#) 可能将清音误判
stt->lowLevelSignal = 1;
} else {
stt->lowLevelSignal = 0;
}
micLevelTmp = micLevelIn << stt->scale; // 获得麦克风输入等级大小 @
/* Set desired level */
gainIdx = stt->micVol; // 设置目标增益等级 @
if (stt->micVol > stt->maxAnalog) { // 麦克风音量等级小于最大模拟等级 @
gainIdx = stt->maxAnalog;
}
if (micLevelTmp != stt->micRef) { // 如果外部有调节,则重置相关值 @
/* Something has happened with the physical level, restart. */
stt->micRef = micLevelTmp;
stt->micVol = 127;
*micLevelOut = 127;
stt->micGainIdx = 127;
gainIdx = 127;
}
/* Pre-process the signal to emulate the microphone level. */
/* Take one step at a time in the gain table. */ // 根据gainidx获取增益gain
if (gainIdx > 127) {
gain = kGainTableVirtualMic[gainIdx - 128];
} else {
gain = kSuppressionTableVirtualMic[127 - gainIdx];
}
for (ii = 0; ii < samples; ii++) {
tmpFlt = (in_near[0][ii] * gain) >> 10;
if (tmpFlt > 32767) {
tmpFlt = 32767;
gainIdx--;
if (gainIdx >= 127) {
gain = kGainTableVirtualMic[gainIdx - 127];// (线性)增益曲线 1~3倍 @
} else {
gain = kSuppressionTableVirtualMic[127 - gainIdx];// (非线性)衰减曲线 1~0.1倍 @
}
}
if (tmpFlt < -32768) {
tmpFlt = -32768;
gainIdx--;
if (gainIdx >= 127) {
gain = kGainTableVirtualMic[gainIdx - 127];
} else {
gain = kSuppressionTableVirtualMic[127 - gainIdx];
}
}
in_near[0][ii] = (int16_t) tmpFlt;
for (j = 1; j < num_bands; ++j) {
tmpFlt = (in_near[j][ii] * gain) >> 10; // 使用增益 @
if (tmpFlt > 32767) {
tmpFlt = 32767;
}
if (tmpFlt < -32768) {
tmpFlt = -32768;
}
in_near[j][ii] = (int16_t) tmpFlt;
}
}
/* Set the level we (finally) used */
stt->micGainIdx = gainIdx; // 记录增益系数
// *micLevelOut = stt->micGainIdx;
*micLevelOut = stt->micGainIdx >> stt->scale; // 输出增益调节后的等级 @
/* Add to Mic as if it was the output from a true microphone */
if (WebRtcAgc_AddMic(agcInst, in_near, num_bands, samples) != 0) { // 增强虚拟麦克风信号 @
return -1;
}
return 0;
}
void WebRtcAgc_UpdateAgcThresholds(LegacyAgc *stt) {
int16_t tmp16;
#ifdef MIC_LEVEL_FEEDBACK
int zeros;
if (stt->micLvlSat) {
/* Lower the analog target level since we have reached its maximum */
zeros = WebRtcSpl_NormW32(stt->Rxx160_LPw32);
stt->targetIdxOffset = (3 * zeros - stt->targetIdx - 2) / 4;
}
#endif
/* Set analog target level in envelope dBOv scale */
tmp16 = (DIFF_REF_TO_ANALOG * stt->compressionGaindB) + ANALOG_TARGET_LEVEL_2;
tmp16 = DivW32W16ResW16((int32_t) tmp16, ANALOG_TARGET_LEVEL);
stt->analogTarget = DIGITAL_REF_AT_0_COMP_GAIN + tmp16;
if (stt->analogTarget < DIGITAL_REF_AT_0_COMP_GAIN) {
stt->analogTarget = DIGITAL_REF_AT_0_COMP_GAIN;
}
if (stt->agcMode == kAgcModeFixedDigital) {
/* Adjust for different parameter interpretation in FixedDigital mode */
stt->analogTarget = stt->compressionGaindB;
}
#ifdef MIC_LEVEL_FEEDBACK
stt->analogTarget += stt->targetIdxOffset;
#endif
/* Since the offset between RMS and ENV is not constant, we should make this
* into a
* table, but for now, we'll stick with a constant, tuned for the chosen
* analog
* target level.
*/
stt->targetIdx = ANALOG_TARGET_LEVEL + OFFSET_ENV_TO_RMS; // 11 + 9
#ifdef MIC_LEVEL_FEEDBACK
stt->targetIdx += stt->targetIdxOffset;
#endif
/* Analog adaptation limits */
/* analogTargetLevel = round((32767*10^(-targetIdx/20))^2*16/2^7) */
stt->analogTargetLevel =
RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx]; /* ex. -20 dBov */
stt->startUpperLimit =
RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx - 1]; /* -19 dBov */
stt->startLowerLimit =
RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx + 1]; /* -21 dBov */
stt->upperPrimaryLimit =
RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx - 2]; /* -18 dBov */
stt->lowerPrimaryLimit =
RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx + 2]; /* -22 dBov */
stt->upperSecondaryLimit =
RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx - 5]; /* -15 dBov */
stt->lowerSecondaryLimit =
RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx + 5]; /* -25 dBov */
stt->upperLimit = stt->startUpperLimit;
stt->lowerLimit = stt->startLowerLimit;
}
void WebRtcAgc_SaturationCtrl(LegacyAgc *stt,
uint8_t *saturated,
const int32_t *env) {
int16_t i, tmpW16;
/* Check if the signal is saturated */ // 求每块子帧中16个点中平方值最大的点作为包络,包络的计算在AddMic中,基于信号的包络计算信号是否饱和 @
for (i = 0; i < 10; i++) {
tmpW16 = (int16_t) (env[i] >> 20);
if (tmpW16 > 875) { // 超过-0.683dB(30290)则累加一次envSum @
stt->envSum += tmpW16;
}
}
if (stt->envSum > 25000) { // 当有一块值大于-0.683dB(30290)就为envSum累加一次。如果累计超过25000,则算饱和 @
*saturated = 1; // 注意,若每次0dB(32767)需要连续28块子帧(28ms)才能饱和,若连续每块-0.683dB(30290)需要(34ms)才能饱和
stt->envSum = 0;
}
/* stt->envSum *= 0.99; */ // 若没饱和,按照0.99的值衰减 envSum @
stt->envSum = (int16_t) ((stt->envSum * 32440) >> 15); // 32440 /(2^15) = 0.98999 @
}
void WebRtcAgc_ZeroCtrl(LegacyAgc *stt, int32_t *inMicLevel, const int32_t *env) {
int16_t i;
int64_t tmp = 0;
int32_t midVal;
// 如果10ms的包络平均值小于-73.31(7)那么就算10ms的全0,msZero增加10。当静音时间大于500ms的时候,设置语音活度(actgiveSpeech)和块低频能量最大值(Rxx16_LPw32Max)为0,
//并且加大输入的麦克风等级inMicLevel。
/* Is the input signal zero? */
for (i = 0; i < 10; i++) {
tmp += env[i];
}
/* Each block is allowed to have a few non-zero
* samples.
*/
if (tmp < 500) {
stt->msZero += 10;
} else {
stt->msZero = 0;
}
if (stt->muteGuardMs > 0) {
stt->muteGuardMs -= 10;
}
if (stt->msZero > 500) {
stt->msZero = 0;
/* Increase microphone level only if it's less than 50% */ // 如果小于50%音量水平,增大音量1.1倍
midVal = (stt->maxAnalog + stt->minLevel + 1) / 2;
if (*inMicLevel < midVal) {
/* *inMicLevel *= 1.1; */
*inMicLevel = (1126 * *inMicLevel) >> 10;
/* Reduces risk of a muted mic repeatedly triggering excessive levels due
* to zero signal detection. */ // 降低静音麦克风因零信号检测而反复触发过高电平的风险。 @
*inMicLevel = MIN(*inMicLevel, stt->zeroCtrlMax);
stt->micVol = *inMicLevel;
}
#ifdef WEBRTC_AGC_DEBUG_DUMP
fprintf(stt->fpt,
"\t\tAGC->zeroCntrl, frame %d: 500 ms under threshold,"
" micVol: %d\n",
stt->fcount, stt->micVol);
#endif
stt->activeSpeech = 0;
stt->Rxx16_LPw32Max = 0;
/* The AGC has a tendency (due to problems with the VAD parameters), to
* vastly increase the volume after a muting event. This timer prevents
* upwards adaptation for a short period. */ // AGC 倾向于(由于 VAD 参数的问题)在静音事件后大幅增加音量。 该计时器可在短时间内防止向上适应 @
stt->muteGuardMs = kMuteGuardTimeMs;
}
}
void WebRtcAgc_SpeakerInactiveCtrl(LegacyAgc *stt) {
/* Check if the near end speaker is inactive.
* If that is the case the VAD threshold is
* increased since the VAD speech model gets
* more sensitive to any sound after a long
* silence.
*/ // 调节vad的阈值 @
int32_t tmp32;
int16_t vadThresh;
if (stt->vadMic.stdLongTerm < 2500) {
stt->vadThreshold = 1500;
} else {
vadThresh = kNormalVadThreshold;
if (stt->vadMic.stdLongTerm < 4500) {
/* Scale between min and max threshold */
vadThresh += (4500 - stt->vadMic.stdLongTerm) / 2;
}
/* stt->vadThreshold = (31 * stt->vadThreshold + vadThresh) / 32; */
tmp32 = vadThresh + 31 * stt->vadThreshold;
stt->vadThreshold = (int16_t) (tmp32 >> 5);
}
}
void WebRtcAgc_ExpCurve(int16_t volume, int16_t *index) {
// volume in Q14
// index in [0-7]
/* 8 different curves */
if (volume > 5243) {
if (volume > 7864) {
if (volume > 12124) {
*index = 7;
} else {
*index = 6;
}
} else {
if (volume > 6554) {
*index = 5;
} else {
*index = 4;
}
}
} else {
if (volume > 2621) {
if (volume > 3932) {
*index = 3;
} else {
*index = 2;
}
} else {
if (volume > 1311) {
*index = 1;
} else {
*index = 0;
}
}
}
}
// 该函数的作用就是根据输入信号的能量大小、饱和标志(WebRtcAgc_StaturationCtrl()函数)、零状态(WebRtcAgc_ZeroCtrl())和近端语音活度(WebRtcAgc_SpeakerInactiveCtrl())来初步调整和控制语音幅度的大小。 @
int32_t WebRtcAgc_ProcessAnalog(void *state,
int32_t inMicLevel,
int32_t *outMicLevel,
int16_t vadLogRatio,
int16_t echo,
uint8_t *saturationWarning) {
uint32_t tmpU32;
int32_t Rxx16w32, tmp32;
int32_t inMicLevelTmp, lastMicVol;
int16_t i;
uint8_t saturated = 0;
LegacyAgc *stt;
stt = (LegacyAgc *) state;
inMicLevelTmp = inMicLevel << stt->scale; // scale始终为0是内部音量级别的比例因子,表示保持原有的0-255的范围 将设备0-100的音量隐射到0-255 @
// 若设备音量当前为30,对应 inMicLevel = 2.25 * 30
if (inMicLevelTmp > stt->maxAnalog) {
#ifdef WEBRTC_AGC_DEBUG_DUMP
fprintf(stt->fpt, "\tAGC->ProcessAnalog, frame %d: micLvl > maxAnalog\n",
stt->fcount);
#endif
return -1;
} else if (inMicLevelTmp < stt->minLevel) {
#ifdef WEBRTC_AGC_DEBUG_DUMP
fprintf(stt->fpt, "\tAGC->ProcessAnalog, frame %d: micLvl < minLevel\n",
stt->fcount);
#endif
return -1;
}
// 第一步:预处理麦克风音量
if (stt->firstCall == 0) { // 如果第一次调用处理
int32_t tmpVol;
stt->firstCall = 1;
tmp32 = ((stt->maxLevel - stt->minLevel) * 51) >> 9; // (255 - 0) * 51 / 2^9 = (int)25
tmpVol = (stt->minLevel + tmp32);
/* If the mic level is very low at start, increase it! */ // 刚开始音量太小时,增加音量到tmpVol @
if ((inMicLevelTmp < tmpVol) && (stt->agcMode == kAgcModeAdaptiveAnalog)) {
inMicLevelTmp = tmpVol;
}
stt->micVol = inMicLevelTmp; // stt->micVol 用于carsh上一帧的音量和在后续表示当前音量值,最后储存了输出音量的值, @
}
/* Set the mic level to the previous output value if there is digital input 有数字信号时,设置输入麦克风音量为上一帧的音量 ? @
* gain */
if ((inMicLevelTmp == stt->maxAnalog) && (stt->micVol > stt->maxAnalog)) {
inMicLevelTmp = stt->micVol; // 音量过大时,将上一帧的音量给到当前帧
}
/* If the mic level was manually changed to a very low value raise it! */ // 当mic水平被手动调整到很低的值,增加音量 @
if ((inMicLevelTmp != stt->micVol) && (inMicLevelTmp < stt->minOutput)) {
tmp32 = ((stt->maxLevel - stt->minLevel) * 51) >> 9;
inMicLevelTmp = (stt->minLevel + tmp32);
stt->micVol = inMicLevelTmp;
#ifdef MIC_LEVEL_FEEDBACK
// stt->numBlocksMicLvlSat = 0;
#endif
#ifdef WEBRTC_AGC_DEBUG_DUMP
fprintf(stt->fpt,
"\tAGC->ProcessAnalog, frame %d: micLvl < minLevel by manual"
" decrease, raise vol\n",
stt->fcount);
#endif
}
if (inMicLevelTmp != stt->micVol) { // 为了确保 stt->micVol = inMicLevelTmp 被更新
if (inMicLevel == stt->lastInMicLevel) {
// We requested a volume adjustment, but it didn't occur. This is 要求调整音量,但没有发生时 可能是被手动调整过,需要恢复请求的值以防止卡死 @
// probably due to a coarse quantization of the volume slider.
// Restore the requested value to prevent getting stuck.
inMicLevelTmp = stt->micVol;
} else {
// As long as the value changed, update to match. 只要相比于上一帧音量值发生变化,就要更新以匹配。 @
stt->micVol = inMicLevelTmp;
}
}
if (inMicLevelTmp > stt->maxLevel) {
// Always allow the user to raise the volume above the maxLevel. 始终允许用户将音量提高到 maxLevel 以上。 @
stt->maxLevel = inMicLevelTmp;
}
// Store last value here, after we've taken care of manual updates etc. 在此处存储手动更新的最后一个值 @
stt->lastInMicLevel = inMicLevel;
lastMicVol = stt->micVol;
/* Checks if the signal is saturated. Also a check if individual samples 第二步: 检查和处理信号饱和 @
* are larger than 12000 is done. If they are the counter for increasing
* the volume level is set to -100ms 检查信号是否饱和。 还检查单个样本是否大于 12000。 如果它们是增加音量级别的计数器,则设置为 -100ms @
*/
WebRtcAgc_SaturationCtrl(stt, &saturated, stt->env[0]); // stt->env[0]何处被赋值?? 通过addMic函数内的指针ptr计算包络
/* The AGC is always allowed to lower the level if the signal is saturated */ // 如果信号饱和,将始终允许 AGC 降低电平 @
if (saturated == 1) { // 饱和处理,对应信号连续高能量(大于-0.683dB(30290))的情况 @
/* Lower the recording level
* Rxx160_LP is adjusted down because it is so slow it could
* cause the AGC to make wrong decisions. */
/* stt->Rxx160_LPw32 *= 0.875; */
stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 8) * 7; // 若饱和,调低Rxx160_LPw32(低通滤波一帧能量),即低频能量 @
stt->zeroCtrlMax = stt->micVol; // 零状态音量控制为当前音量值 ? @
/* stt->micVol *= 0.903; */
tmp32 = inMicLevelTmp - stt->minLevel;
tmpU32 = ((uint32_t) ((uint32_t) (29591) * (uint32_t) (tmp32)));
stt->micVol = (tmpU32 >> 15) + stt->minLevel; // 衰减当前音量值 @
if (stt->micVol > lastMicVol - 2) { // 判断是否需要进一步衰减 防止快变,限制麦克风调节速度 (# maybechange) @
stt->micVol = lastMicVol - 2;
}
inMicLevelTmp = stt->micVol;
#ifdef WEBRTC_AGC_DEBUG_DUMP
fprintf(stt->fpt,
"\tAGC->ProcessAnalog, frame %d: saturated, micVol = %d\n",
stt->fcount, stt->micVol);
#endif
if (stt->micVol < stt->minOutput) { // stt->minOutput(最小输出音量)是定值,比stt->minLevel高4%,音量小于该值时提示异常 @
*saturationWarning = 1;
}
/* Reset counter for decrease of volume level to avoid
* decreasing too much. The saturation control can still
* lower the level if needed. */
stt->msTooHigh = -100; // 音量过高持续时间(ms),重置下降级别,防止下降太多 @
/* Enable the control mechanism to ensure that our measure,
* Rxx160_LP, is in the correct range. This must be done since
* the measure is very slow. */ // 启用控制机制以确保我们的测量值 Rxx160_LP 在正确的范围内。 必须这样做,因为该措施非常缓慢。@
stt->activeSpeech = 0; // 活动语音持续时间(ms) @
stt->Rxx16_LPw32Max = 0; // 存储并跟踪最大能量子帧 @
/* Reset to initial values */ // 重置参数 @
stt->msecSpeechInnerChange = kMsecSpeechInner; // 音量变化之间的最小语音毫秒数(内界限),(maybechange#) @
stt->msecSpeechOuterChange = kMsecSpeechOuter; // 音量变化之间的最小语音毫秒数(外界限),(maybechange#) @
stt->changeToSlowMode = 0; // 到达目标一段时间后更改为慢速模式 (maybechange#) @
stt->muteGuardMs = 0; // 计数器防止静音动作 @
stt->upperLimit = stt->startUpperLimit; // mic能量的上下限 @
stt->lowerLimit = stt->startLowerLimit;
#ifdef MIC_LEVEL_FEEDBACK
// stt->numBlocksMicLvlSat = 0;
#endif
}
/* Check if the input speech is zero. If so the mic volume
* is increased. On some computers the input is zero up as high
* level as 17% */
WebRtcAgc_ZeroCtrl(stt, &inMicLevelTmp, stt->env[0]); // 该函数用来计算信号的大小,并且用msZero来记录,用来控制语音活度(actgiveSpeech)和子帧低频能量最大值(Rxx16_LPw32Max) @
//,这两个变量后续影响计算低频能量Rxx160_LPw32。如果10ms的包络平均值小于-73.31(7)那么就算10ms的全0,msZero增加10。当静音时间大于500ms的时候归零actgiveSpeech 和(Rxx16_LPw32Max)@
/* Check if the near end speaker is inactive.
* If that is the case the VAD threshold is
* increased since the VAD speech model gets
* more sensitive to any sound after a long
* silence. 检查近端扬声器是否处于非活动状态。如果是这种情况,由于VAD语音模型在长时间静音后对任何声音更敏感,因此VAD阈值增加。 @
*/
WebRtcAgc_SpeakerInactiveCtrl(stt); // 过近端输入信号的方差来调节活度阈值vadThreshold。该阈值决定是否进入micVol和调节主流程。声音越小越难进入调节流程。 @
// 计算子帧低频能量Rxx16_LPw32和帧能量Rxx160w32 @
for (i = 0; i < 5; i++) { // 每2个子帧处理一次 (即2ms)
/* Computed on blocks of 16 samples */
Rxx16w32 = stt->Rxx16w32_array[0][i]; // array何处被赋值? WebRtcAgc_Addmic内对其赋值
/* Rxx160w32 in Q(-7) */
tmp32 = (Rxx16w32 - stt->Rxx16_vectorw32[stt->Rxx16pos]) >> 3;
stt->Rxx160w32 = stt->Rxx160w32 + tmp32; // 一帧的平均能量 @
stt->Rxx16_vectorw32[stt->Rxx16pos] = Rxx16w32; // stt->Rxx16_vectorw32在stt->Rxx16pos下标时的子帧能量 @
/* Circular buffer */ // 调整子帧能量的下标 @
stt->Rxx16pos++;
if (stt->Rxx16pos == RXX_BUFFER_LEN) {
stt->Rxx16pos = 0;
}
/* Rxx16_LPw32 in Q(-4) */
tmp32 = (Rxx16w32 - stt->Rxx16_LPw32) >> kAlphaShortTerm;
stt->Rxx16_LPw32 = (stt->Rxx16_LPw32) + tmp32; // 低通子帧能量 @
if (vadLogRatio > stt->vadThreshold) { // vadLogRatio被传入该函数前在何处被计算??? 在addmic函数最后面计算的 若不满足说明没检测到语音,则开始下一次循环 @
/* Speech detected! */ // 根据vadMic.logRatio判断是否进入<调节流程>,核心是对micVol调节,保证一个帧的能量在区间[upperLimit,loweLimit)]内 @
/* Check if Rxx160_LP is in the correct range. If 检查 Rxx160_LP(帧的低频能量) 是否在正确的范围内。 @
* it is too high/low then we set it to the maximum of 如果它太高/太低,那么我们在语音的前 200 毫秒期间将其设置为 Rxx16_LPw32(子帧的低频能量) 的最大值。 @
* Rxx16_LPw32 during the first 200ms of speech.
*/
if (stt->activeSpeech < 250) {
stt->activeSpeech += 2;
if (stt->Rxx16_LPw32 > stt->Rxx16_LPw32Max) {
stt->Rxx16_LPw32Max = stt->Rxx16_LPw32; // 跟踪子帧能量最大值 @
}
} else if (stt->activeSpeech == 250) {
stt->activeSpeech += 2;
tmp32 = stt->Rxx16_LPw32Max >> 3;
stt->Rxx160_LPw32 = tmp32 * RXX_BUFFER_LEN; // 一帧的低频能量 @
}
tmp32 = (stt->Rxx160w32 - stt->Rxx160_LPw32) >> kAlphaLongTerm;
stt->Rxx160_LPw32 = stt->Rxx160_LPw32 + tmp32; // 平滑处理 @
if (stt->Rxx160_LPw32 > stt->upperSecondaryLimit) { // (高能量)界限1:帧低频能量 > -27dbfs @
stt->msTooHigh += 2; // 高能量次数, +2因为两个子帧2ms @
stt->msTooLow = 0;
stt->changeToSlowMode = 0;
if (stt->msTooHigh > stt->msecSpeechOuterChange) { // 高能量时间超外界 (正常340/缓慢模式500)(maybechange提前预判高能量) @
stt->msTooHigh = 0;
/* Lower the recording level */ // 衰减帧低频能量 @
/* Multiply by 0.828125 which corresponds to decreasing ~0.8dB */
tmp32 = stt->Rxx160_LPw32 >> 6;
stt->Rxx160_LPw32 = tmp32 * 53;
/* Reduce the max gain to avoid excessive oscillation // 降低最大增益以避免过度振荡 @
* (but never drop below the maximum analog level).
*/
stt->maxLevel = (15 * stt->maxLevel + stt->micVol) / 16;
stt->maxLevel = MAX(stt->maxLevel, stt->maxAnalog);
stt->zeroCtrlMax = stt->micVol; // 零状态音量(最大gain)控制为当前音量值
/* 0.95 in Q15 */ // 降低麦克风音量
tmp32 = inMicLevelTmp - stt->minLevel;
tmpU32 = ((uint32_t) ((uint32_t) (31130) * (uint32_t) (tmp32)));
stt->micVol = (tmpU32 >> 15) + stt->minLevel; // micVol = 0.95*inMicLevelTmp + 0.05 * minLevel
if (stt->micVol > lastMicVol - 1) { // 限制音量降低的速度 (maybechange#)@
stt->micVol = lastMicVol - 1; // 实则加快了音量降低速度
}
inMicLevelTmp = stt->micVol;
/* Enable the control mechanism to ensure that our measure,
* Rxx160_LP, is in the correct range.
*/ // 开启控制,保证Rxx16_LPw32在正常范围 @
stt->activeSpeech = 0;
stt->Rxx16_LPw32Max = 0;
#ifdef MIC_LEVEL_FEEDBACK
// stt->numBlocksMicLvlSat = 0;
#endif
#ifdef WEBRTC_AGC_DEBUG_DUMP
fprintf(stt->fpt,
"\tAGC->ProcessAnalog, frame %d: measure >"
" 2ndUpperLim, micVol = %d, maxLevel = %d\n",
stt->fcount, stt->micVol, stt->maxLevel);
#endif
}
} else if (stt->Rxx160_LPw32 > stt->upperLimit) { // (高能量)界限2:帧低频能量 > upperLimit(-21 dBfs) @
stt->msTooHigh += 2; // 处理过程参考上面界限1的处理,仅区别处注释 @
stt->msTooLow = 0;
stt->changeToSlowMode = 0;
if (stt->msTooHigh > stt->msecSpeechInnerChange) { // 高能量时间超内界 (正常520/缓慢) @
/* Lower the recording level */
stt->msTooHigh = 0;
/* Multiply by 0.828125 which corresponds to decreasing ~0.8dB */
stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 53;
/* Reduce the max gain to avoid excessive oscillation
* (but never drop below the maximum analog level).
*/
stt->maxLevel = (15 * stt->maxLevel + stt->micVol) / 16;
stt->maxLevel = MAX(stt->maxLevel, stt->maxAnalog);
stt->zeroCtrlMax = stt->micVol;
/* 0.965 in Q15 */ // 降低麦克风音量程度与界限1的情况不同 0.965 @
//tmp32 = inMicLevelTmp - stt->minLevel;
tmpU32 = ((uint32_t) ((uint32_t) (31621) * (uint32_t) ((inMicLevelTmp - stt->minLevel))));
stt->micVol = (tmpU32 >> 15) + stt->minLevel;
if (stt->micVol > lastMicVol - 1) {
stt->micVol = lastMicVol - 1;
}
inMicLevelTmp = stt->micVol;
#ifdef MIC_LEVEL_FEEDBACK
// stt->numBlocksMicLvlSat = 0;
#endif
#ifdef WEBRTC_AGC_DEBUG_DUMP
fprintf(stt->fpt,
"\tAGC->ProcessAnalog, frame %d: measure >"
" UpperLim, micVol = %d, maxLevel = %d\n",
stt->fcount, stt->micVol, stt->maxLevel);
#endif
}
} else if (stt->Rxx160_LPw32 < stt->lowerSecondaryLimit) { //(低能量)界限3:帧低频能量 > lowerSecondaryLimit(-27 dBfs) @
stt->msTooHigh = 0;
stt->changeToSlowMode = 0;
stt->msTooLow += 2; // +2因为两个子帧2ms
if (stt->msTooLow > stt->msecSpeechOuterChange) { // 低能量时间超外界 (正常340/缓慢500) @
/* Raise the recording level */
int16_t index, weightFIX;
int16_t volNormFIX = 16384; // =1 in Q14.
stt->msTooLow = 0; // 重置低能量标志 @
/* Normalize the volume level */
tmp32 = (inMicLevelTmp - stt->minLevel) << 14;
if (stt->maxInit != stt->minLevel) { // 最小音量不为初始最大值 @
volNormFIX = tmp32 / (stt->maxInit - stt->minLevel); // 获得归一化音量 @
}
/* Find correct curve */
WebRtcAgc_ExpCurve(volNormFIX, &index); // 根据归一化音量计算衰减曲线 (8 different curves) @
/* Compute weighting factor for the volume increase, 32^(-2*X)/2+1.05 */
// 计算音量增加的权重因子@
weightFIX =
kOffset1[index] - (int16_t) ((kSlope1[index] * volNormFIX) >> 13);
/* stt->Rxx160_LPw32 *= 1.047 [~0.2 dB]; */ // 增加低频帧能量0.2dB @
stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 67;
//tmp32 = inMicLevelTmp - stt->minLevel;
tmpU32 =
((uint32_t) weightFIX * (uint32_t) (inMicLevelTmp - stt->minLevel));
stt->micVol = (tmpU32 >> 14) + stt->minLevel;
if (stt->micVol < lastMicVol + 2) { // 限制音量增加速度 @
stt->micVol = lastMicVol + 2; // 实则加快了音量增加速度 @
}
inMicLevelTmp = stt->micVol;
#ifdef MIC_LEVEL_FEEDBACK
/* Count ms in level saturation */
// if (stt->micVol > stt->maxAnalog) {
if (stt->micVol > 150) {
/* mic level is saturated */
stt->numBlocksMicLvlSat++;
fprintf(stderr, "Sat mic Level: %d\n", stt->numBlocksMicLvlSat);
}
#endif
#ifdef WEBRTC_AGC_DEBUG_DUMP
fprintf(stt->fpt,
"\tAGC->ProcessAnalog, frame %d: measure <"
" 2ndLowerLim, micVol = %d\n",
stt->fcount, stt->micVol);
#endif
}
} else if (stt->Rxx160_LPw32 < stt->lowerLimit) { //(低能量)界限4:帧低频能量 > lowerLimit(-23 dBfs) @
stt->msTooHigh = 0;
stt->changeToSlowMode = 0;
stt->msTooLow += 2; // +2因为两个子帧2ms
if (stt->msTooLow > stt->msecSpeechInnerChange) { // 低能量时间超外界限 @
/* Raise the recording level */
int16_t index, weightFIX;
int16_t volNormFIX = 16384; // =1 in Q14.
stt->msTooLow = 0;
/* Normalize the volume level */
tmp32 = (inMicLevelTmp - stt->minLevel) << 14;
if (stt->maxInit != stt->minLevel) {
volNormFIX = tmp32 / (stt->maxInit - stt->minLevel);
}
/* Find correct curve */
WebRtcAgc_ExpCurve(volNormFIX, &index);
/* Compute weighting factor for the volume increase, (3.^(-2.*X))/8+1 */
// 计算音量增加的权重因子 @
weightFIX =
kOffset2[index] - (int16_t) ((kSlope2[index] * volNormFIX) >> 13);
/* stt->Rxx160_LPw32 *= 1.047 [~0.2 dB]; */
stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 67;
// tmp32 = inMicLevelTmp - stt->minLevel;
tmpU32 =
((uint32_t) weightFIX * (uint32_t) (inMicLevelTmp - stt->minLevel));
stt->micVol = (tmpU32 >> 14) + stt->minLevel;
if (stt->micVol < lastMicVol + 1) { // 限制音量增加速度 @
stt->micVol = lastMicVol + 1; // 加快增加 @
}
inMicLevelTmp = stt->micVol;
#ifdef MIC_LEVEL_FEEDBACK
/* Count ms in level saturation */
// if (stt->micVol > stt->maxAnalog) {
if (stt->micVol > 150) {
/* mic level is saturated */
stt->numBlocksMicLvlSat++;
fprintf(stderr, "Sat mic Level: %d\n", stt->numBlocksMicLvlSat);
}
#endif
#ifdef WEBRTC_AGC_DEBUG_DUMP
fprintf(stt->fpt,
"\tAGC->ProcessAnalog, frame %d: measure < LowerLim, micVol "
"= %d\n",
stt->fcount, stt->micVol);
#endif
}
} else { //(能量在正常上下界范围内) 界限5:lowerLimit < Rxx160_LP/640 < upperLimit @
/* The signal is inside the desired range which is:
* lowerLimit < Rxx160_LP/640 < upperLimit
*/ // 4000ms 即 4s
if (stt->changeToSlowMode > 4000) { // 判断是否进入缓慢模式,若是则调整边界 @
stt->msecSpeechInnerChange = 1000; // 原取值520 @
stt->msecSpeechOuterChange = 500; // 原取值340 @
stt->upperLimit = stt->upperPrimaryLimit; // -21dBfs --> -20dBfs
stt->lowerLimit = stt->lowerPrimaryLimit; // -23dBfs --> -24dBfs
} else {
stt->changeToSlowMode += 2; // in milliseconds +2因为两个子帧是2ms
}
stt->msTooLow = 0; // 重置超界标志 @
stt->msTooHigh = 0;
stt->micVol = inMicLevelTmp;
}
#ifdef MIC_LEVEL_FEEDBACK
if (stt->numBlocksMicLvlSat > NUM_BLOCKS_IN_SAT_BEFORE_CHANGE_TARGET) {
stt->micLvlSat = 1;
fprintf(stderr, "target before = %d (%d)\n", stt->analogTargetLevel,
stt->targetIdx);
WebRtcAgc_UpdateAgcThresholds(stt);
WebRtcAgc_CalculateGainTable(
&(stt->digitalAgc.gainTable[0]), stt->compressionGaindB,
stt->targetLevelDbfs, stt->limiterEnable, stt->analogTarget);
stt->numBlocksMicLvlSat = 0;
stt->micLvlSat = 0;
fprintf(stderr, "target offset = %d\n", stt->targetIdxOffset);
fprintf(stderr, "target after = %d (%d)\n", stt->analogTargetLevel,
stt->targetIdx);
}
#endif
}
}
/* Ensure gain is not increased in presence of echo or after a mute event
* (but allow the zeroCtrl() increase on the frame of a mute detection).
*/ //确保在出现回声或静音事件后增益不会增加。但允许在静音检测帧上增加 zeroCtrl() @
if (echo == 1 ||
(stt->muteGuardMs > 0 && stt->muteGuardMs < kMuteGuardTimeMs)) {
if (stt->micVol > lastMicVol) {
stt->micVol = lastMicVol;
}
}
/* limit the gain */ // 限制音量的界限 @
if (stt->micVol > stt->maxLevel) {
stt->micVol = stt->maxLevel;
} else if (stt->micVol < stt->minOutput) {
stt->micVol = stt->minOutput;
}
*outMicLevel = MIN(stt->micVol, stt->maxAnalog) >> stt->scale;
return 0;
}
int WebRtcAgc_Process(void *agcInst,
int16_t *const *in_near,
size_t num_bands,
size_t samples,
int16_t *const *out,
int32_t inMicLevel,
int32_t *outMicLevel,
int16_t echo,
uint8_t *saturationWarning) {
LegacyAgc *stt;
stt = (LegacyAgc *) agcInst;
//
if (stt == NULL) {
return -1;
}
//
if (stt->fs == 8000) {
if (samples != 80) {
return -1;
}
} else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000) {
if (samples != 160) {
return -1;
}
} else {
return -1;
}
*saturationWarning = 0; // 返回1代表发生了饱和事件,无法进一步减少音量;否则为0 @
// TODO(minyue): PUT IN RANGE CHECKING FOR INPUT LEVELS
*outMicLevel = inMicLevel;
#ifdef WEBRTC_AGC_DEBUG_DUMP
stt->fcount++;
#endif
// 三种mode都会通过WebRtcAgc_ProcessDigital计算数字增益 Digital gain
if (WebRtcAgc_ProcessDigital(&stt->digitalAgc, in_near, num_bands, out,
stt->fs, stt->lowLevelSignal) == -1) {
#ifdef WEBRTC_AGC_DEBUG_DUMP
fprintf(stt->fpt, "AGC->Process, frame %d: Error from DigAGC\n\n",
stt->fcount);
#endif
return -1;
}
// // Analog gain 注意:小能量时不做analog处理 (新修改)使用时注释掉下面未加入WebRtcAgc_VirtualMic的原代码
if(WebRtcAgc_VirtualMic(agcInst, out, num_bands, samples, inMicLevel, outMicLevel)){
return -1;
}
if (stt->lowLevelSignal == 0 || stt->agcMode == kAgcModeAdaptiveDigital){
if (WebRtcAgc_ProcessAnalog(agcInst, outMicLevel, outMicLevel,
stt->vadMic.logRatio, echo,
saturationWarning) == -1) {
return -1;
}
}
// 满足下面2个条件则执行WebRtcAgc_ProcessAnalog, 1.mode在为非固定增益, 2. lowLevelSignal=0 或者 mode不是自适应数字模式 (用上面代码时注释掉)@
// if (stt->agcMode < kAgcModeFixedDigital &&
// (stt->lowLevelSignal == 0 || stt->agcMode != kAgcModeAdaptiveDigital)) {
// if (WebRtcAgc_ProcessAnalog(agcInst, inMicLevel, outMicLevel,
// stt->vadMic.logRatio, echo,
// saturationWarning) == -1) {
// return -1;
// }
// }
#ifdef WEBRTC_AGC_DEBUG_DUMP
fprintf(stt->agcLog, "%5d\t%d\t%d\t%d\t%d\n", stt->fcount, inMicLevel,
*outMicLevel, stt->maxLevel, stt->micVol);
#endif
/* update queue */ // 处理结束,更新队列 @
if (stt->inQueue > 1) {
memcpy(stt->env[0], stt->env[1], 10 * sizeof(int32_t));
memcpy(stt->Rxx16w32_array[0], stt->Rxx16w32_array[1], 5 * sizeof(int32_t));
}
if (stt->inQueue > 0) {
stt->inQueue--;
}
return 0;
}
int WebRtcAgc_set_config(void *agcInst, WebRtcAgcConfig agcConfig) {
/*包括设置模拟适配的阈值,以及增益表 @ */
LegacyAgc *stt;
stt = (LegacyAgc *) agcInst;
if (stt == NULL) {
return -1;
}
if (stt->initFlag != kInitCheck) {
stt->lastError = AGC_UNINITIALIZED_ERROR;
return -1;
}
if (agcConfig.limiterEnable != kAgcFalse &&
agcConfig.limiterEnable != kAgcTrue) {
stt->lastError = AGC_BAD_PARAMETER_ERROR;
return -1;
}
stt->limiterEnable = agcConfig.limiterEnable;
stt->compressionGaindB = agcConfig.compressionGaindB;
if ((agcConfig.targetLevelDbfs < 0) || (agcConfig.targetLevelDbfs > 31)) {
stt->lastError = AGC_BAD_PARAMETER_ERROR;
return -1;
}
stt->targetLevelDbfs = agcConfig.targetLevelDbfs;
if (stt->agcMode == kAgcModeFixedDigital) {
/* Adjust for different parameter interpretation in FixedDigital mode */
stt->compressionGaindB += agcConfig.targetLevelDbfs;
}
/* Update threshold levels for analog adaptation */
WebRtcAgc_UpdateAgcThresholds(stt); // 更新模拟适配的阈值水平 @
/* Recalculate gain table */
if (WebRtcAgc_CalculateGainTable(
&(stt->digitalAgc.gainTable[0]), stt->compressionGaindB,
stt->targetLevelDbfs, stt->limiterEnable, stt->analogTarget) == -1) {
#ifdef WEBRTC_AGC_DEBUG_DUMP
fprintf(stt->fpt, "AGC->set_config, frame %d: Error from calcGainTable\n\n",
stt->fcount);
#endif
return -1;
}
/* Store the config in a WebRtcAgcConfig */
stt->usedConfig.compressionGaindB = agcConfig.compressionGaindB;
stt->usedConfig.limiterEnable = agcConfig.limiterEnable;
stt->usedConfig.targetLevelDbfs = agcConfig.targetLevelDbfs;
return 0;
}
int WebRtcAgc_get_config(void *agcInst, WebRtcAgcConfig *config) {
LegacyAgc *stt;
stt = (LegacyAgc *) agcInst;
if (stt == NULL) {
return -1;
}
if (config == NULL) {
stt->lastError = AGC_NULL_POINTER_ERROR;
return -1;
}
if (stt->initFlag != kInitCheck) {
stt->lastError = AGC_UNINITIALIZED_ERROR;
return -1;
}
config->limiterEnable = stt->usedConfig.limiterEnable;
config->targetLevelDbfs = stt->usedConfig.targetLevelDbfs;
config->compressionGaindB = stt->usedConfig.compressionGaindB;
return 0;
}
void *WebRtcAgc_Create() {
LegacyAgc *stt = malloc(sizeof(LegacyAgc));
#ifdef WEBRTC_AGC_DEBUG_DUMP
stt->fpt = fopen("./agc_test_log.txt", "wt");
stt->agcLog = fopen("./agc_debug_log.txt", "wt");
stt->digitalAgc.logFile = fopen("./agc_log.txt", "wt");
#endif
stt->initFlag = 0;
stt->lastError = 0;
return stt;
}
void WebRtcAgc_Free(void *state) {
LegacyAgc *stt;
stt = (LegacyAgc *) state;
#ifdef WEBRTC_AGC_DEBUG_DUMP
fclose(stt->fpt);
fclose(stt->agcLog);
fclose(stt->digitalAgc.logFile);
#endif
free(stt);
}
/* minLevel - Minimum volume level
* maxLevel - Maximum volume level
*/
int WebRtcAgc_Init(void *agcInst,
int32_t minLevel,
int32_t maxLevel,
int16_t agcMode,
uint32_t fs) {
int32_t max_add, tmp32;
int16_t i;
int tmpNorm;
LegacyAgc *stt;
/* typecast state pointer */
stt = (LegacyAgc *) agcInst;
if (WebRtcAgc_InitDigital(&stt->digitalAgc, agcMode) != 0) {
stt->lastError = AGC_UNINITIALIZED_ERROR;
return -1;
}
/* Analog AGC variables */
stt->envSum = 0;
/* mode = 0 - Only saturation protection
* 1 - Analog Automatic Gain Control [-targetLevelDbfs (default -3
* dBOv)]
* 2 - Digital Automatic Gain Control [-targetLevelDbfs (default -3
* dBOv)]
* 3 - Fixed Digital Gain [compressionGaindB (default 8 dB)]
*/
#ifdef WEBRTC_AGC_DEBUG_DUMP
stt->fcount = 0;
fprintf(stt->fpt, "AGC->Init\n");
#endif
if (agcMode < kAgcModeUnchanged || agcMode > kAgcModeFixedDigital) {
#ifdef WEBRTC_AGC_DEBUG_DUMP
fprintf(stt->fpt, "AGC->Init: error, incorrect mode\n\n");
#endif
return -1;
}
stt->agcMode = agcMode;
stt->fs = fs;
/* initialize input VAD */
WebRtcAgc_InitVad(&stt->vadMic);
/* If the volume range is smaller than 0-256 then
* the levels are shifted up to Q8-domain */
tmpNorm = NormU32((uint32_t) maxLevel); // 如果音量范围小于0-256,调节到Q8范围 @
stt->scale = tmpNorm - 23;
if (stt->scale < 0) {
stt->scale = 0;
}
// TODO(bjornv): Investigate if we really need to scale up a small range now
// when we have
// a guard against zero-increments. For now, we do not support scale up (scale
// = 0).
stt->scale = 0; // ???
maxLevel <<= stt->scale;
minLevel <<= stt->scale;
/* Make minLevel and maxLevel static in AdaptiveDigital */
if (stt->agcMode == kAgcModeAdaptiveDigital) {
minLevel = 0;
maxLevel = 255;
stt->scale = 0;
}
/* The maximum supplemental volume range is based on a vague idea
* of how much lower the gain will be than the real analog gain. */
max_add = (maxLevel - minLevel) / 4; // 最大音量补充,基于增益比实际模拟增益低多少 @
/* Minimum/maximum volume level that can be set */
stt->minLevel = minLevel;
stt->maxAnalog = maxLevel;
stt->maxLevel = maxLevel + max_add;
stt->maxInit = stt->maxLevel;
stt->zeroCtrlMax = stt->maxAnalog;
stt->lastInMicLevel = 0;
/* Initialize micVol parameter */
stt->micVol = stt->maxAnalog;
if (stt->agcMode == kAgcModeAdaptiveDigital) {
stt->micVol = 127; /* Mid-point of mic level */
}
stt->micRef = stt->micVol;
stt->micGainIdx = 127;
#ifdef MIC_LEVEL_FEEDBACK
stt->numBlocksMicLvlSat = 0;
stt->micLvlSat = 0;
#endif
#ifdef WEBRTC_AGC_DEBUG_DUMP
fprintf(stt->fpt, "AGC->Init: minLevel = %d, maxAnalog = %d, maxLevel = %d\n",
stt->minLevel, stt->maxAnalog, stt->maxLevel);
#endif
/* Minimum output volume is 4% higher than the available lowest volume level
*/
tmp32 = ((stt->maxLevel - stt->minLevel) * 10) >> 8;
stt->minOutput = (stt->minLevel + tmp32);
stt->msTooLow = 0;
stt->msTooHigh = 0;
stt->changeToSlowMode = 0;
stt->firstCall = 0;
stt->msZero = 0;
stt->muteGuardMs = 0;
stt->gainTableIdx = 0;
stt->msecSpeechInnerChange = kMsecSpeechInner;
stt->msecSpeechOuterChange = kMsecSpeechOuter;
stt->activeSpeech = 0;
stt->Rxx16_LPw32Max = 0;
stt->vadThreshold = kNormalVadThreshold;
stt->inActive = 0;
for (i = 0; i < RXX_BUFFER_LEN; i++) {
stt->Rxx16_vectorw32[i] = (int32_t) 1000; /* -54dBm0 */ // 子帧能量的向量 @
}
stt->Rxx160w32 =
125 * RXX_BUFFER_LEN; /* (stt->Rxx16_vectorw32[0]>>3) = 125 */ // 一帧的平均能量 @
stt->Rxx16pos = 0; // Rxx16_vectorw32当前的位置 @
stt->Rxx16_LPw32 = (int32_t) 16284; /* Q(-4) */ // 低通滤波器子帧能量 @
for (i = 0; i < 5; i++) {
stt->Rxx16w32_array[0][i] = 0;
}
for (i = 0; i < 10; i++) {
stt->env[0][i] = 0; // 子帧包络 @
stt->env[1][i] = 0;
}
stt->inQueue = 0; // 10ms的batch 指示器 @
#ifdef MIC_LEVEL_FEEDBACK
stt->targetIdxOffset = 0; // 用于电平补偿的表索引偏移 @
#endif
memset(stt->filterState, 0, 8 * sizeof(int32_t)); // 从宽带下采样到窄带 @
stt->initFlag = kInitCheck;
// Default config settings. AGC三个核心参数的默认设置 @
stt->defaultConfig.limiterEnable = kAgcTrue; // 限压器开关 默认开 @
stt->defaultConfig.targetLevelDbfs = AGC_DEFAULT_TARGET_LEVEL;
stt->defaultConfig.compressionGaindB = AGC_DEFAULT_COMP_GAIN;
if (WebRtcAgc_set_config(stt, stt->defaultConfig) == -1) {
stt->lastError = AGC_UNSPECIFIED_ERROR;
return -1;
}
stt->Rxx160_LPw32 = stt->analogTargetLevel; // Initialize rms value
stt->lowLevelSignal = 0;
/* Only positive values are allowed that are not too large */
if ((minLevel >= maxLevel) || (maxLevel & 0xFC000000)) {
#ifdef WEBRTC_AGC_DEBUG_DUMP
fprintf(stt->fpt, "minLevel, maxLevel value(s) are invalid\n\n");
#endif
return -1;
} else {
#ifdef WEBRTC_AGC_DEBUG_DUMP
fprintf(stt->fpt, "\n");
#endif
return 0;
}
}
agc.h
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_
#define MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_
//#define MIC_LEVEL_FEEDBACK
#ifdef WEBRTC_AGC_DEBUG_DUMP
#include <stdio.h>
#endif
#include <stdint.h> // NOLINT(build/include)
#include <string.h>
#ifdef WEBRTC_AGC_DEBUG_DUMP
#include <stdio.h>
#endif
#include <stdint.h> // NOLINT(build/include)
#include <assert.h>
// allpass filter coefficients.
static const uint16_t kResampleAllpass1[3] = {3284, 24441, 49528};
static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255};
// AgcVad的初始化在WebRtcAgc_InitVad @
typedef struct {
int32_t downState[8];
int16_t HPstate;
int16_t counter;
int16_t logRatio; // log( P(active) / P(inactive) ) (Q10)
int16_t meanLongTerm; // Q10
int32_t varianceLongTerm; // Q8
int16_t stdLongTerm; // Q10
int16_t meanShortTerm; // Q10
int32_t varianceShortTerm; // Q8
int16_t stdShortTerm; // Q10
} AgcVad; // total = 54 bytes
typedef struct {
int32_t capacitorSlow;
int32_t capacitorFast;
int32_t gain;
int32_t gainTable[32];
int16_t gatePrevious;
int16_t agcMode;
AgcVad vadNearend;
AgcVad vadFarend;
#ifdef WEBRTC_AGC_DEBUG_DUMP
FILE* logFile;
int frameCounter;
#endif
} DigitalAgc;
int32_t WebRtcAgc_InitDigital(DigitalAgc *digitalAgcInst, int16_t agcMode);
int32_t WebRtcAgc_ProcessDigital(DigitalAgc *digitalAgcInst,
int16_t *const *inNear,
size_t num_bands,
int16_t *const *out,
uint32_t FS,
int16_t lowLevelSignal);
int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc *digitalAgcInst,
const int16_t *inFar,
size_t nrSamples);
void WebRtcAgc_InitVad(AgcVad *vadInst);
int16_t WebRtcAgc_ProcessVad(AgcVad *vadInst, // (i) VAD state
const int16_t *in, // (i) Speech signal
size_t nrSamples); // (i) number of samples
int32_t WebRtcAgc_CalculateGainTable(int32_t *gainTable, // Q16
int16_t compressionGaindB, // Q0 (in dB)
int16_t targetLevelDbfs, // Q0 (in dB)
uint8_t limiterEnable,
int16_t analogTarget);
// Errors
#define AGC_UNSPECIFIED_ERROR 18000
#define AGC_UNSUPPORTED_FUNCTION_ERROR 18001
#define AGC_UNINITIALIZED_ERROR 18002
#define AGC_NULL_POINTER_ERROR 18003
#define AGC_BAD_PARAMETER_ERROR 18004
// Warnings
#define AGC_BAD_PARAMETER_WARNING 18050
enum {
kAgcModeUnchanged,
kAgcModeAdaptiveAnalog,
kAgcModeAdaptiveDigital,
kAgcModeFixedDigital
};
enum {
kAgcFalse = 0, kAgcTrue
};
typedef struct {
int16_t targetLevelDbfs; // default 3 (-3 dBOv)
int16_t compressionGaindB; // default 9 dB
uint8_t limiterEnable; // default kAgcTrue (on)
} WebRtcAgcConfig;
#if defined(__cplusplus)
extern "C" {
#endif
/*
* This function analyses the number of samples passed to
* farend and produces any error code that could arise.
*
* Input:
* - agcInst : AGC instance.
* - samples : Number of samples in input vector.
*
* Return value:
* : 0 - Normal operation.
* : -1 - Error.
*/
int WebRtcAgc_GetAddFarendError(void *state, size_t samples);
/*
* This function processes a 10 ms frame of far-end speech to determine
* if there is active speech. The length of the input speech vector must be
* given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or
* FS=48000).
*
* Input:
* - agcInst : AGC instance.
* - inFar : Far-end input speech vector
* - samples : Number of samples in input vector
*
* Return value:
* : 0 - Normal operation.
* : -1 - Error
*/
int WebRtcAgc_AddFarend(void *agcInst, const int16_t *inFar, size_t samples);
/*
* This function processes a 10 ms frame of microphone speech to determine
* if there is active speech. The length of the input speech vector must be
* given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or
* FS=48000). For very low input levels, the input signal is increased in level
* by multiplying and overwriting the samples in inMic[].
*
* This function should be called before any further processing of the
* near-end microphone signal.
*
* Input:
* - agcInst : AGC instance.
* - inMic : Microphone input speech vector for each band
* - num_bands : Number of bands in input vector
* - samples : Number of samples in input vector
*
* Return value:
* : 0 - Normal operation.
* : -1 - Error
*/
int WebRtcAgc_AddMic(void *agcInst,
int16_t *const *inMic,
size_t num_bands,
size_t samples);
/*
* This function replaces the analog microphone with a virtual one.
* It is a digital gain applied to the input signal and is used in the
* agcAdaptiveDigital mode where no microphone level is adjustable. The length
* of the input speech vector must be given in samples (80 when FS=8000, and 160
* when FS=16000, FS=32000 or FS=48000).
*
* Input:
* - agcInst : AGC instance.
* - inMic : Microphone input speech vector for each band
* - num_bands : Number of bands in input vector
* - samples : Number of samples in input vector
* - micLevelIn : Input level of microphone (static)
*
* Output:
* - inMic : Microphone output after processing (L band)
* - inMic_H : Microphone output after processing (H band)
* - micLevelOut : Adjusted microphone level after processing
*
* Return value:
* : 0 - Normal operation.
* : -1 - Error
*/
int WebRtcAgc_VirtualMic(void *agcInst,
int16_t *const *inMic,
size_t num_bands,
size_t samples,
int32_t micLevelIn,
int32_t *micLevelOut);
/*
* This function processes a 10 ms frame and adjusts (normalizes) the gain both
* analog and digitally. The gain adjustments are done only during active
* periods of speech. The length of the speech vectors must be given in samples
* (80 when FS=8000, and 160 when FS=16000, FS=32000 or FS=48000). The echo
* parameter can be used to ensure the AGC will not adjust upward in the
* presence of echo.
*
* This function should be called after processing the near-end microphone
* signal, in any case after any echo cancellation.
*
* Input:
* - agcInst : AGC instance
* - inNear : Near-end input speech vector for each band
* - num_bands : Number of bands in input/output vector
* - samples : Number of samples in input/output vector
* - inMicLevel : Current microphone volume level
* - echo : Set to 0 if the signal passed to add_mic is
* almost certainly free of echo; otherwise set
* to 1. If you have no information regarding echo
* set to 0.
*
* Output:
* - outMicLevel : Adjusted microphone volume level
* - out : Gain-adjusted near-end speech vector
* : May be the same vector as the input.
* - saturationWarning : A returned value of 1 indicates a saturation event
* has occurred and the volume cannot be further
* reduced. Otherwise will be set to 0.
*
* Return value:
* : 0 - Normal operation.
* : -1 - Error
*/
int WebRtcAgc_Process(void *agcInst,
int16_t *const *inNear,
size_t num_bands,
size_t samples,
int16_t *const *out,
int32_t inMicLevel,
int32_t *outMicLevel,
int16_t echo,
uint8_t *saturationWarning);
/*
* This function sets the config parameters (targetLevelDbfs,
* compressionGaindB and limiterEnable).
*
* Input:
* - agcInst : AGC instance
* - config : config struct
*
* Output:
*
* Return value:
* : 0 - Normal operation.
* : -1 - Error
*/
int WebRtcAgc_set_config(void *agcInst, WebRtcAgcConfig config);
/*
* This function returns the config parameters (targetLevelDbfs,
* compressionGaindB and limiterEnable).
*
* Input:
* - agcInst : AGC instance
*
* Output:
* - config : config struct
*
* Return value:
* : 0 - Normal operation.
* : -1 - Error
*/
int WebRtcAgc_get_config(void *agcInst, WebRtcAgcConfig *config);
/*
* This function creates and returns an AGC instance, which will contain the
* state information for one (duplex) channel.
*/
void *WebRtcAgc_Create(void);
/*
* This function frees the AGC instance created at the beginning.
*
* Input:
* - agcInst : AGC instance.
*/
void WebRtcAgc_Free(void *agcInst);
/*
* This function initializes an AGC instance.
*
* Input:
* - agcInst : AGC instance.
* - minLevel : Minimum possible mic level
* - maxLevel : Maximum possible mic level
* - agcMode : 0 - Unchanged
* : 1 - Adaptive Analog Automatic Gain Control -3dBOv
* : 2 - Adaptive Digital Automatic Gain Control -3dBOv
* : 3 - Fixed Digital Gain 0dB
* - fs : Sampling frequency
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcAgc_Init(void *agcInst,
int32_t minLevel,
int32_t maxLevel,
int16_t agcMode,
uint32_t fs);
#if defined(__cplusplus)
}
#endif
/* Analog Automatic Gain Control variables:
* Constant declarations (inner limits inside which no changes are done)
* In the beginning the range is narrower to widen as soon as the measure
* 'Rxx160_LP' is inside it. Currently the starting limits are -22.2+/-1dBm0
* and the final limits -22.2+/-2.5dBm0. These levels makes the speech signal
* go towards -25.4dBm0 (-31.4dBov). Tuned with wbfile-31.4dBov.pcm
* The limits are created by running the AGC with a file having the desired
* signal level and thereafter plotting Rxx160_LP in the dBm0-domain defined
* by out=10*log10(in/260537279.7); Set the target level to the average level
* of our measure Rxx160_LP. Remember that the levels are in blocks of 16 in
* Q(-7). (Example matlab code: round(db2pow(-21.2)*16/2^7) )
*/
#define RXX_BUFFER_LEN 10
static const int16_t kMsecSpeechInner = 520;
static const int16_t kMsecSpeechOuter = 340;
static const int16_t kNormalVadThreshold = 400;
static const int16_t kAlphaShortTerm = 6; // 1 >> 6 = 0.0156
static const int16_t kAlphaLongTerm = 10; // 1 >> 10 = 0.000977
typedef struct {
// Configurable parameters/variables
uint32_t fs; // Sampling frequency
int16_t compressionGaindB; // Fixed gain level in dB
int16_t targetLevelDbfs; // Target level in -dBfs of envelope (default -3)
int16_t agcMode; // Hard coded mode (adaptAna/adaptDig/fixedDig)
uint8_t limiterEnable; // Enabling limiter (on/off (default off))
WebRtcAgcConfig defaultConfig;
WebRtcAgcConfig usedConfig;
// General variables
int16_t initFlag; // 判断是否初始化 @
int16_t lastError; // 判断webrtc_set_config失败 @
// Target level parameters
// Based on the above: analogTargetLevel = round((32767*10^(-22/20))^2*16/2^7)
int32_t analogTargetLevel; // = RXX_BUFFER_LEN * 846805; -22 dBfs
int32_t startUpperLimit; // = RXX_BUFFER_LEN * 1066064; -21 dBfs
int32_t startLowerLimit; // = RXX_BUFFER_LEN * 672641; -23 dBfs
int32_t upperPrimaryLimit; // = RXX_BUFFER_LEN * 1342095; -20 dBfs
int32_t lowerPrimaryLimit; // = RXX_BUFFER_LEN * 534298; -24 dBfs
int32_t upperSecondaryLimit; // = RXX_BUFFER_LEN * 2677832; -17 dBfs
int32_t lowerSecondaryLimit; // = RXX_BUFFER_LEN * 267783; -27 dBfs
uint16_t targetIdx; // Table index for corresponding target level
#ifdef MIC_LEVEL_FEEDBACK
uint16_t targetIdxOffset; // Table index offset for level compensation
#endif
int16_t analogTarget; // Digital reference level in ENV scale
// Analog AGC specific variables
int32_t filterState[8]; // For downsampling wb to nb
int32_t upperLimit; // Upper limit for mic energy
int32_t lowerLimit; // Lower limit for mic energy
int32_t Rxx160w32; // Average energy for one frame
int32_t Rxx16_LPw32; // Low pass filtered subframe energies
int32_t Rxx160_LPw32; // Low pass filtered frame energies
int32_t Rxx16_LPw32Max; // Keeps track of largest energy subframe
int32_t Rxx16_vectorw32[RXX_BUFFER_LEN]; // Array with subframe energies
int32_t Rxx16w32_array[2][5]; // Energy values of microphone signal
int32_t env[2][10]; // Envelope values of subframes
int16_t Rxx16pos; // Current position in the Rxx16_vectorw32
int16_t envSum; // Filtered scaled envelope in subframes
int16_t vadThreshold; // Threshold for VAD decision
int16_t inActive; // Inactive time in milliseconds
int16_t msTooLow; // Milliseconds of speech at a too low level
int16_t msTooHigh; // Milliseconds of speech at a too high level
int16_t changeToSlowMode; // Change to slow mode after some time at target
int16_t firstCall; // First call to the process-function
int16_t msZero; // Milliseconds of zero input
int16_t msecSpeechOuterChange; // Min ms of speech between volume changes 音量变化之间的最小语音毫秒数(maybechange#) @
int16_t msecSpeechInnerChange; // Min ms of speech between volume changes 音量变化之间的最小语音毫秒数 (maybechange#)@
int16_t activeSpeech; // Milliseconds of active speech
int16_t muteGuardMs; // Counter to prevent mute action
int16_t inQueue; // 10 ms batch indicator
// Microphone level variables
int32_t micRef; // Remember ref. mic level for virtual mic
uint16_t gainTableIdx; // Current position in virtual gain table
int32_t micGainIdx; // Gain index of mic level to increase slowly
int32_t micVol; // Remember volume between frames
int32_t maxLevel; // Max possible vol level, incl dig gain
int32_t maxAnalog; // Maximum possible analog volume level
int32_t maxInit; // Initial value of "max"
int32_t minLevel; // Minimum possible volume level
int32_t minOutput; // Minimum output volume level
int32_t zeroCtrlMax; // Remember max gain => don't amp low input
int32_t lastInMicLevel;
int16_t scale; // Scale factor for internal volume levels
#ifdef MIC_LEVEL_FEEDBACK
int16_t numBlocksMicLvlSat;
uint8_t micLvlSat;
#endif
// Structs for VAD and digital_agc
AgcVad vadMic;
DigitalAgc digitalAgc;
#ifdef WEBRTC_AGC_DEBUG_DUMP
FILE* fpt;
FILE* agcLog;
int32_t fcount;
#endif
int16_t lowLevelSignal;
} LegacyAgc;
#endif // MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_
main.c
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
//采用https://github.com/mackron/dr_libs/blob/master/dr_wav.h 解码
#define DR_WAV_IMPLEMENTATION
#include "dr_wav.h"
#include "agc.h"
#ifndef nullptr
#define nullptr 0
#endif
#ifndef MIN
#define MIN(A, B) ((A) < (B) ? (A) : (B))
#endif
//计时
#include <stdint.h>
#if defined(__APPLE__)
# include <mach/mach_time.h>
#elif defined(_WIN32)
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
#else // __linux
# include <time.h>
# ifndef CLOCK_MONOTONIC //_RAW
# define CLOCK_MONOTONIC CLOCK_REALTIME
# endif
#endif
static
uint64_t nanotimer() {
static int ever = 0;
#if defined(__APPLE__)
static mach_timebase_info_data_t frequency;
if (!ever) {
if (mach_timebase_info(&frequency) != KERN_SUCCESS) {
return 0;
}
ever = 1;
}
return 0;
#elif defined(_WIN32)
static LARGE_INTEGER frequency;
if (!ever) {
QueryPerformanceFrequency(&frequency);
ever = 1;
}
LARGE_INTEGER t;
QueryPerformanceCounter(&t);
return (t.QuadPart * (uint64_t) 1e9) / frequency.QuadPart;
#else // __linux
struct timespec t;
if (!ever) {
if (clock_gettime(CLOCK_MONOTONIC, &t) != 0) {
return 0;
}
ever = 1;
}
clock_gettime(CLOCK_MONOTONIC, &t);
return (t.tv_sec * (uint64_t)1e9) + t.tv_nsec;
#endif
}
static double now() {
static uint64_t epoch = 0;
if (!epoch) {
epoch = nanotimer();
}
return (nanotimer() - epoch) / 1e9;
};
double calcElapsed(double start, double end) {
double took = -start;
return took + end;
}
//写wav文件
void wavWrite_int16(char *filename, int16_t *buffer, size_t sampleRate, size_t totalSampleCount, unsigned int channels) {
drwav_data_format format = {};
format.container = drwav_container_riff; // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64.
format.format = DR_WAVE_FORMAT_PCM; // <-- Any of the DR_WAVE_FORMAT_* codes.
format.channels = channels;
format.sampleRate = (drwav_uint32) sampleRate;
format.bitsPerSample = 16;
drwav *pWav = drwav_open_file_write(filename, &format);
if (pWav) {
drwav_uint64 samplesWritten = drwav_write(pWav, totalSampleCount, buffer);
drwav_uninit(pWav);
if (samplesWritten != totalSampleCount) {
fprintf(stderr, "ERROR\n");
exit(1);
}
}
}
//读取wav文件
int16_t *wavRead_int16(char *filename, uint32_t *sampleRate, uint64_t *totalSampleCount, unsigned int* channels) {
int16_t *buffer = drwav_open_and_read_file_s16(filename, channels, sampleRate, totalSampleCount);
if (buffer == nullptr) {
printf("读取wav文件失败.");
}
return buffer;
}
//分割路径函数
void splitpath(const char *path, char *drv, char *dir, char *name, char *ext) {
const char *end;
const char *p;
const char *s;
if (path[0] && path[1] == ':') {
if (drv) {
*drv++ = *path++;
*drv++ = *path++;
*drv = '\0';
}
} else if (drv)
*drv = '\0';
for (end = path; *end && *end != ':';)
end++;
for (p = end; p > path && *--p != '\\' && *p != '/';)
if (*p == '.') {
end = p;
break;
}
if (ext)
for (s = end; (*ext = *s++);)
ext++;
for (p = end; p > path;)
if (*--p == '\\' || *p == '/') {
p++;
break;
}
if (name) {
for (s = p; s < end;)
*name++ = *s++;
*name = '\0';
}
if (dir) {
for (s = path; s < p;)
*dir++ = *s++;
*dir = '\0';
}
}
int agcProcess(int16_t *buffer, uint32_t sampleRate, size_t samplesCount, int16_t agcMode) {
if (buffer == nullptr) return -1;
if (samplesCount == 0) return -1;
WebRtcAgcConfig agcConfig;
agcConfig.compressionGaindB = 9; // default 9 dB 最大能增益9db
agcConfig.limiterEnable = 1; // default kAgcTrue (on)
agcConfig.targetLevelDbfs = 3; // default 3 (-3 dBOv)
int minLevel = 0; // 麦克风的最大最小级别范围
int maxLevel = 255;
size_t samples = MIN(160, sampleRate / 100);
if (samples == 0) return -1;
const int maxSamples = 320;
int16_t *input = buffer;
size_t nTotal = (samplesCount / samples); // 帧数 = 总点数/帧长 ,remainedSamples 不足一帧的采样点
void *agcInst = WebRtcAgc_Create();
if (agcInst == NULL) return -1;
int status = WebRtcAgc_Init(agcInst, minLevel, maxLevel, agcMode, sampleRate);
if (status != 0) {
printf("WebRtcAgc_Init fail\n");
WebRtcAgc_Free(agcInst);
return -1;
}
status = WebRtcAgc_set_config(agcInst, agcConfig);
if (status != 0) {
printf("WebRtcAgc_set_config fail\n");
WebRtcAgc_Free(agcInst);
return -1;
}
size_t num_bands = 1; // 通道数
int inMicLevel, outMicLevel = -1, inAnalogLevel;
int16_t out_buffer[maxSamples];
int16_t *out16 = out_buffer;
uint8_t saturationWarning = 1; //是否有溢出发生,增益放大以后的最大值超过了65536
int16_t echo = 0; //增益放大是否考虑回声影响
for (int i = 0; i < nTotal; i++) {
inMicLevel = 127;
if(WebRtcAgc_VirtualMic(agcInst, &input, num_bands, samples, inMicLevel, &outMicLevel)){
return -1;
}
inAnalogLevel = outMicLevel
int nAgcRet = WebRtcAgc_Process(agcInst, (int16_t *const *) &input, num_bands, samples,
(int16_t *const *) &out16, inAnalogLevel, &outMicLevel, echo,
&saturationWarning); // sample 帧长
if (nAgcRet != 0) {
printf("failed in WebRtcAgc_Process\n");
WebRtcAgc_Free(agcInst);
return -1;
}
memcpy(input, out_buffer, samples * sizeof(int16_t)); // 处理完后存回到input
input += samples;
}
// 处理剩余的采样点
const size_t remainedSamples = samplesCount - nTotal * samples;
if (remainedSamples > 0) {
if (nTotal > 0) {
input = input - samples + remainedSamples;
}
inMicLevel = 127;
if(WebRtcAgc_VirtualMic(agcInst, &input, num_bands, samples, inMicLevel, &outMicLevel)){
return -1;
}
inAnalogLevel = outMicLevel
int nAgcRet = WebRtcAgc_Process(agcInst, (int16_t *const *) &input, num_bands, samples,
(int16_t *const *) &out16, inAnalogLevel, &outMicLevel, echo,
&saturationWarning); // sample 帧长
if (nAgcRet != 0) {
printf("failed in WebRtcAgc_Process during filtering the last chunk\n");
WebRtcAgc_Free(agcInst);
return -1;
}
memcpy(&input[samples-remainedSamples], &out_buffer[samples-remainedSamples], remainedSamples * sizeof(int16_t));
input += samples;
}
WebRtcAgc_Free(agcInst);
return 1;
}
void auto_gain(char *in_file, char *out_file) {
//音频采样率
uint32_t sampleRate = 0;
//总音频采样数
uint64_t inSampleCount = 0;
unsigned int channels = 0;
int16_t *inBuffer = wavRead_int16(in_file, &sampleRate, &inSampleCount, &channels);
//如果加载成功
if (inBuffer != nullptr) {
// kAgcModeAdaptiveAnalog 模拟音量调节
// kAgcModeAdaptiveDigital 自适应增益
// kAgcModeFixedDigital 固定增益
double startTime = now();
agcProcess(inBuffer, sampleRate, inSampleCount, kAgcModeAdaptiveDigital);
double elapsed_time = calcElapsed(startTime, now());
printf("time: %d ms\n ", (int) (elapsed_time * 1000));
wavWrite_int16(out_file, inBuffer, sampleRate, inSampleCount, channels);
free(inBuffer);
}
}
int main(int argc, char *argv[]) {
printf("WebRTC Automatic Gain Control\n");
printf("博客:http://cpuimage.cnblogs.com/\n");
printf("音频自动增益\n");
if (argc < 2)
return -1;
char *in_file = argv[1];
char drive[3];
char dir[256];
char fname[256];
char ext[256];
char out_file[1024];
splitpath(in_file, drive, dir, fname, ext);
sprintf(out_file, "%s%s%s_out2%s", drive, dir, fname, ext);
auto_gain(in_file, out_file);
printf("按任意键退出程序 \n");
getchar();
return 0;
}
参考资料
Webrtc AGC 算法原理初识(一) - 简书
Webrtc AGC 算法原理介绍(四)_灯等等凳的博客-程序员秘密 - 程序员秘密
音频自动增益控制 AGC 解决的问题及原理解析 - TeHub
浅谈 WebRTC 的 Audio 在进入 Encoder 之前的处理流程_ITPUB博客
AGC(Automatic Gain Control)—语音自动增益控制简介 - 知乎
单独抽取webRtc的AGC(增益)模块 - 简书
webrtc中AGC的应用 - Z–Y - 博客园
音视频开发进阶|第四讲:音频自动增益控制 AGC - ZEGO即构 - 博客园
WebRTC AGC 流程解析_非典型废言的博客-CSDN博客_agc webrtc
Webrtc AGC 算法原理介绍(一)_灯等等凳的博客-CSDN博客_agc算法
详解 WebRTC 高音质低延时的背后—AGC 自动增益控制_阿里云视频云的博客-CSDN博客
AliAGC 自动增益控制算法:解决复杂场景下的音量问题 - 阿里云视频云 - 博客园
WebRTC中AGC模块分析(上) - 小奥的学习笔记
WebRTC中AGC模块分析(下) - 小奥的学习笔记
WebRTC_M76/src/modules/audio_processing
动态范围规划(调整)Dynamic Range Control的一些心得(一)