schedule -> __schedule -> update_rq_clock -> update_rq_clock_task
static void update_rq_clock_task(struct rq *rq, s64 delta)
{
s64 __maybe_unused steal = 0, irq_delta = 0;
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
如果配置了中断计数
irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
irq_time_read返回irq总时间,是一个per-cpu
if (irq_delta > delta)
irq_delta = delta;
因为irq时间只会在irq里面更新,如果在irq_handler中执行update_rq_clock
那么有可能出现中断延迟大于rq时钟延迟的情况
这样做是保证之后计算rq->clock_task时保证是单调的
rq->prev_irq_time += irq_delta;
更新prev_irq_time到now,
delta -= irq_delta;
计算rq运行不包括irq的时间
#endif
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
if (static_key_false((¶virt_steal_rq_enabled))) {
steal = paravirt_steal_clock(cpu_of(rq));
steal -= rq->prev_steal_time_rq;
if (unlikely(steal > delta))
steal = delta;
rq->prev_steal_time_rq += steal;
delta -= steal;
}
#endif
CPU时间偷窃机制?具体的实现没找到
不过计算原理跟上面差不多
rq->clock_task += delta;
更新clock_task,就是rq中实际进程的运行时间,不包括irq
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
update_irq_load_avg(rq, irq_delta + steal);
#endif
NONTASK_CAPACITY 可能是指非进程实体的运算负载,这里可能是指irq,那么soft可能也是?
irq_delta + steal代表rq中出现了irq或者steal现象,那么就进行负载再计算
update_rq_clock_pelt(rq, delta);
pelt机制计算负载
}
int update_irq_load_avg(struct rq *rq, u64 running)
{
int ret = 0;
running = cap_scale(running, arch_scale_freq_capacity(cpu_of(rq)));
running = cap_scale(running, arch_scale_cpu_capacity(cpu_of(rq)));
计算irq的运行时间,这里基本不变。
ret = ___update_load_sum(rq->clock - running, &rq->avg_irq,
0,
0,
0);
ret += ___update_load_sum(rq->clock, &rq->avg_irq,
1,
1,
1);
不清楚为啥算两次
if (ret) {
___update_load_avg(&rq->avg_irq, 1, 1);
更新irq负载
trace_pelt_irq_tp(rq);
}
return ret;
}
static __always_inline int
___update_load_sum(u64 now, struct sched_avg *sa,
unsigned long load, unsigned long runnable, int running)
{
u64 delta;
delta = now - sa->last_update_time;
计算延迟
if ((s64)delta < 0) {
sa->last_update_time = now;
return 0;
如果输入了无效时间,返回
delta >>= 10;
if (!delta)
return 0;
如果延迟小于1024ns,返回
sa->last_update_time += delta << 10;
更新时间到输入时间
if (!load)
runnable = running = 0;
如果输入的load权重为零,那么可运行与运行时间无意义
if (!accumulate_sum(delta, sa, load, runnable, running))
return 0;
计算pelt
如果输入的延迟delta加上sa->period_contrib还不够1024ns的话,无效计算,返回0
return 1;
}
static __always_inline void
___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runnable)
{
u32 divider = LOAD_AVG_MAX - 1024 + sa->period_contrib;
为啥要这么算除数不太清楚
sa->load_avg = div_u64(load * sa->load_sum, divider);
sa->runnable_load_avg = div_u64(runnable * sa->runnable_load_sum, divider);
WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
}
更新irq负载的平均值
static inline void update_rq_clock_pelt(struct rq *rq, s64 delta)
{
if (unlikely(is_idle_task(rq->curr))) {
rq->clock_pelt = rq_clock_task(rq);
如果task是空闲进程,那么rq->clock_pelt就是rq的运行进程的总时间
return;
}
delta = cap_scale(delta, arch_scale_cpu_capacity(cpu_of(rq)));
delta = cap_scale(delta, arch_scale_freq_capacity(cpu_of(rq)));
经过两次计算,得出最终的pelt值,不过这里貌似两次计算都没有变化
rq->clock_pelt += delta;
}