速率估算器的使用参见:iptables匹配rateest。
函数gen_new_estimator创建新的速率估算器,其时间间隔必须为:[-2, 3]之间的整数。
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *lock, seqcount_t *running, struct nlattr *opt)
{
struct gnet_estimator *parm = nla_data(opt);
struct net_rate_estimator *old, *est;
struct gnet_stats_basic_packed b;
if (nla_len(opt) < sizeof(*parm))
return -EINVAL;
/* allowed timer periods are :
* -2 : 250ms, -1 : 500ms, 0 : 1 sec
* 1 : 2 sec, 2 : 4 sec, 3 : 8 sec
*/
if (parm->interval < -2 || parm->interval > 3)
return -EINVAL;
分配新的速率估算器结构,并进行初始化。时间间隔增加2,确保intvl_log为正值。
est = kzalloc(sizeof(*est), GFP_KERNEL);
if (!est)
return -ENOBUFS;
seqcount_init(&est->seq);
intvl_log = parm->interval + 2;
est->bstats = bstats;
est->stats_lock = lock;
est->running = running;
est->ewma_log = parm->ewma_log;
est->intvl_log = intvl_log;
est->cpu_bstats = cpu_bstats;
初始化速率估算器的计数值。如果参数rate_est有值,表明存在旧的估算器,删除其定时器,将其计算的平均值赋予新的估算器。
if (lock)
local_bh_disable();
est_fetch_counters(est, &b);
if (lock)
local_bh_enable();
est->last_bytes = b.bytes;
est->last_packets = b.packets;
if (lock)
spin_lock_bh(lock);
old = rcu_dereference_protected(*rate_est, 1);
if (old) {
del_timer_sync(&old->timer);
est->avbps = old->avbps;
est->avpps = old->avpps;
}
时间间隔为:(HZ/4) << intvl_log,启动估算器的定时器(下一次超时时刻为next_jiffies)。最后,将新分配的估算器赋值于函数参数rate_est返回。
est->next_jiffies = jiffies + ((HZ/4) << intvl_log);
timer_setup(&est->timer, est_timer, 0);
mod_timer(&est->timer, est->next_jiffies);
rcu_assign_pointer(*rate_est, est);
获取基础数据
清空b的内容,由函数__gnet_stats_copy_basic处理。
static void est_fetch_counters(struct net_rate_estimator *e,
struct gnet_stats_basic_packed *b)
{
memset(b, 0, sizeof(*b));
if (e->stats_lock)
spin_lock(e->stats_lock);
__gnet_stats_copy_basic(e->running, b, e->cpu_bstats, e->bstats);
if (e->stats_lock)
spin_unlock(e->stats_lock);
如果估算器的cpu_bstats不为空,使用其中的数据。否则,使用bstats中的数据。
void
__gnet_stats_copy_basic(const seqcount_t *running,
struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b)
{
unsigned int seq;
if (cpu) {
__gnet_stats_copy_basic_cpu(bstats, cpu);
return;
}
do {
if (running)
seq = read_seqcount_begin(running);
bstats->bytes = b->bytes;
bstats->packets = b->packets;
} while (running && read_seqcount_retry(running, seq));
遍历每个CPU,将每个CPU的统计数据进行相加操作。
static void
__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu)
{
for_each_possible_cpu(i) {
struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i);
unsigned int start;
u64 bytes, packets;
do {
start = u64_stats_fetch_begin_irq(&bcpu->syncp);
bytes = bcpu->bstats.bytes;
packets = bcpu->bstats.packets;
} while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
bstats->bytes += bytes;
bstats->packets += packets;
估算定时器
首先获取基础数据,其次对数据应用以下EWMA算法:
新的平均值 = av.old - av.old >> ewma + bytes.delta << (10 - ewma - intvl_log)
旧的平均值所占的份额为(av.old - av.old >> ewma),新的统计数据所占份额为:(bytes.delta << (10 - ewma - intvl_log))。
static void est_timer(struct timer_list *t)
{
struct net_rate_estimator *est = from_timer(est, t, timer);
struct gnet_stats_basic_packed b;
u64 rate, brate;
est_fetch_counters(est, &b);
brate = (b.bytes - est->last_bytes) << (10 - est->ewma_log - est->intvl_log);
brate -= (est->avbps >> est->ewma_log);
rate = (b.packets - est->last_packets) << (10 - est->ewma_log - est->intvl_log);
rate -= (est->avpps >> est->ewma_log);
write_seqcount_begin(&est->seq);
est->avbps += brate;
est->avpps += rate;
write_seqcount_end(&est->seq);
est->last_bytes = b.bytes;
est->last_packets = b.packets;
计算下一次超时时间,再次启动定时器。
est->next_jiffies += ((HZ/4) << est->intvl_log);
if (unlikely(time_after_eq(jiffies, est->next_jiffies))) {
/* Ouch... timer was delayed. */
est->next_jiffies = jiffies + 1;
}
mod_timer(&est->timer, est->next_jiffies);
读取估算器结果值
使用seqcount确保读取的数据的一致性。
bool gen_estimator_read(struct net_rate_estimator __rcu **rate_est,
struct gnet_stats_rate_est64 *sample)
{
struct net_rate_estimator *est;
rcu_read_lock();
est = rcu_dereference(*rate_est);
if (!est) {
rcu_read_unlock();
return false;
}
do {
seq = read_seqcount_begin(&est->seq);
sample->bps = est->avbps >> 8;
sample->pps = est->avpps >> 8;
} while (read_seqcount_retry(&est->seq, seq));
内核版本 5.10