/******以下结论和代码分析都是基于最新Linux master分支(Linux5.0)******/
1. 负载结构体
每个调度实体都有一个负载结构,用来跟踪调度实体对系统的负载贡献
struct sched_entity {
struct load_weight load;
#ifdef CONFIG_SMP
struct sched_avg avg;
#endif
};
struct sched_avg {
u64 last_update_time;/*上次负载更新时间 */
u64 load_sum;/* 负载贡献*/
u64 runnable_load_sum;/*runable状态负载贡献 */
u32 util_sum; /* running状态负载贡献*/
u32 period_contrib;/*负载计算时,不足一个周期的部分 */
unsigned long load_avg;/* 平均负载*/
unsigned long runnable_load_avg;/* runable 状态平均负载*/
unsigned long util_avg;/* running状态平均负载*/
}
调度实体sched_entity和cfs_rq都内嵌一个shed_avg。
调度实体sched_enity说明:
load_sum: /*累计runable和block衰减负载 */
runnable_load_sum 与load_sum一样
util_sum:/* 累计running衰减时间总和*/
load_avg: 平均负载, (load_sum*load->weight)/最大衰减值
最大衰减累加时间:进程在CPU上运行无限长时,根据PELT算法计算出的衰减值.
当进程无限运行后,load_avg总是无限接近进程权重值(load.weight)
对调度实体来说load_sum=runnable_load_sum, load_avg=runnable_load_avg
对于CFS调度队列来说
load_sum = 整个队列负载 * 整个队列权重
2. 调度实体/CFS队列负载更新
每个调度实体负载更新的时机:
1. 从阻塞状态到runnable状态(入队列)
2. 从running状态到runnable状态(入队列)
3. 从runnable状态到running状态(出队列,CPU运行)
4. 从runable状态到阻塞状态(出队列)
总结: 入队列和出队列更新调度实体负载/CFS负载
进程负载更新函数为update_load_avg,
static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
u64 now = cfs_rq_clock_pelt(cfs_rq);
/*更新调度实体负载 */
if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD))
__update_load_avg_se(now, cfs_rq, se);
/* 更新CFS队列负载*/
decayed = update_cfs_rq_load_avg(now, cfs_rq);
}
2.1 调度实体负载更新
int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se)
{
/*更新load_sun和runnable_load_sun */
if (___update_load_sum(now, &se->avg, !!se->on_rq, !!se->on_rq,
cfs_rq->curr == se)) {
/*更新平均负载load_avg和runnable_load_avg *
___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
return 1;
}
return 0;
}
其中__update_load_sum根据PELT算法计算调度实体负载,从传的参数也可以看出,
对于调度实体load=runnable = !!se->on_rq,非0即1
而平均负载的计算,传了se的weight和runnable weight,对于线程调度实体,weight==runnable weight
static __always_inline void
___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runnable)
{
u32 divider = LOAD_AVG_MAX - 1024 + sa->period_contrib;/* 根据PELT计算的最大负载值*/
/* 计算平均负载*/
sa->load_avg = div_u64(load * sa->load_sum, divider);
sa->runnable_load_avg = div_u64(runnable * sa->runnable_load_sum, divider);
WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
}
2.2 CFS 队列负载更新
/* 实际上跟调度实体调用函数一样,只是参数有差别*/
int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq)
{
if (___update_load_sum(now, &cfs_rq->avg,
scale_load_down(cfs_rq->load.weight),
scale_load_down(cfs_rq->runnable_weight),
cfs_rq->curr != NULL)) {
___update_load_avg(&cfs_rq->avg, 1, 1);
trace_pelt_cfs_tp(cfs_rq);
return 1;
}
return 0;
}
3. 调度实体入队列/出队列对CFS队列负载的影响
3.1 调度实体加入CFS队列时
static void
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
/*更新调度实体虚拟时间和min_vruntime */
update_curr(cfs_rq);
/* 先更新se负载*/
update_load_avg(cfs_rq, se, UPDATE_TG | DO_ATTACH);
/* 把se 的runnable load_sum加到CFS队列的runnable_load_sum*/
enqueue_runnable_load_avg(cfs_rq, se);
/* 把se的权重load.weight加到CFS队列的总权重*/
account_entity_enqueue(cfs_rq, se);
if (!curr)
__enqueue_entity(cfs_rq, se);/*加如运行队列红黑数 */
se->on_rq = 1;/* 标记入在runnable队列上*/
}
3.2 调度实体出队列
static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
/*
更新虚拟时间和min_vruntime
*/
update_curr(cfs_rq);
/*更新调度实体和CFS负载 */
update_load_avg(cfs_rq, se, UPDATE_TG);
/*减去se的runable负载 */
dequeue_runnable_load_avg(cfs_rq, se);
/*标记se没有在运行队列 */
se->on_rq = 0;
/*减去se的权重load.weight */
account_entity_dequeue(cfs_rq, se);
}
4. 负载更新函数
/* 更新se的load_sum和load_avg*/
int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se)
/* 更新cfs的load_sum和load_avg*/
int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq)
/*更新load_sum */
___update_load_sum(u64 now, struct sched_avg *sa,unsigned long load, unsigned long runnable, int running)
/*更新load_avg */
___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runnable)