【数据结构】【rq】【sched_entity】【util_est】

本文链接：https://blog.csdn.net/feifei_csdn/article/details/107106149

本文介绍了Linux调度器中struct rq、sched_entity和struct util_est的概念。struct rq负责维护CPU上所有进程的信息，包括实时进程和根CFS运行队列。sched_entity作为调度实体，可以代表单个进程或进程组。struct util_est用于估算FAIR任务的利用率，通过指数加权移动平均(EWMA)跟踪任务激活时的瞬时利用率，以减少对工作负载瞬时变化的敏感性。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

rq

每个cpu都有自己的struct rq结构，rq描述的是cpu上运行的所有进程，其中包括实时进程和一个根cfs运行队列。因为dl>idle>fair，所以调度器选择进程的先后顺序也为dl>rt>fair

/*
 * This is the main, per-CPU runqueue data structure.
 *
 * Locking rule: those places that want to lock multiple runqueues
 * (such as the load balancing or the thread migration code), lock
 * acquire operations must be ordered by ascending &runqueue.
 */
struct rq {
	/* runqueue lock: */
	raw_spinlock_t lock;
 
	/*
	 * nr_running and cpu_load should be in the same cacheline because
	 * remote CPUs use both these fields when doing load calculation.
	 */
/*这个rq里面存在多少个running task，包括RT，fair，DL sched class的task*/
	unsigned int nr_running;
#ifdef CONFIG_NUMA_BALANCING
	unsigned int nr_numa_running;
	unsigned int nr_preferred_running;
#endif
/*用以表示处理器的负载，在每个处理器的rq中
	都会有对应到该处理器的cpu_load参数配置，在每次
	处理器触发scheduler tick时，都会呼叫函数
	update_cpu_load_active,进行cpu_load的更新。在系统初始化的时候
	会呼叫函数sched_init把rq的cpu_load array初始化为0.
	了解他的更新方式最好的方式是通过函数update_cpu_load,公式如下澹?
	cpu_load[0]会直接等待rq中load.weight的值。
	cpu_load[1]=(cpu_load[1]*(2-1)+cpu_load[0])/2
	cpu_load[2]=(cpu_load[2]*(4-1)+cpu_load[0])/4
	cpu_load[3]=(cpu_load[3]*(8-1)+cpu_load[0])/8
	cpu_load[4]=(cpu_load[4]*(16-1)+cpu_load[0]/16
	呼叫函数this_cpu_load时，所返回的cpu load值是cpu_load[0]
	而在进行cpu blance或migration时，就会呼叫函数
	source_load target_load取得对该处理器cpu_load index值，
	来进行计算*/
	#define CPU_LOAD_IDX_MAX 5
	unsigned long cpu_load[CPU_LOAD_IDX_MAX];//表示该rq所在cpu的历史load，一般有5个
	unsigned long last_load_update_tick;
/*在选择下一个调度实体的时候，需要判断此task是否是misfit task，是否做的决策是
● 不相同的，比如会强制balance等等*/
	unsigned int misfit_task;
#ifdef CONFIG_NO_HZ_COMMON
	u64 nohz_stamp;
	unsigned long nohz_flags;
#endif
#ifdef CONFIG_NO_HZ_FULL
	unsigned long last_sched_tick;
#endif
 
#ifdef CONFIG_CPU_QUIET
	/* time-based average load */
	u64 nr_last_stamp;
	u64 nr_running_integral;
	seqcount_t ave_seqcnt;
#endif
 
	/* capture load from *all* tasks on this cpu: */
/*load->weight值，会是目前所执行的schedule entity的
	load->weight的总和，也就是说rq的load->weight越高，
	也表示所负责的排程单元load->weight总和越高
	表示处理器所负荷的执行单元也越重*/
 /*在rq里面的可运行的所有task的总的load，当nr_running数量发生变化时也会更新*/
	struct load_weight load;//表示当前cpu的load，这个load是它所有就绪进程的load之和（同样包括cfs,rq及正在运行的）
 
 
	/*在每次scheduler tick中呼叫update_cpu_load时，
	这个值就增加一，可以用来反馈目前cpu
	load更新的次数*/
/*在rq里面有多少个task的load需要更新*/
	unsigned long nr_load_updates;
 
/*用来累加处理器进行context switch的次数，会在
	函数schedule呼叫时进行累加，并可以通过函数
	nr_context_switches统计目前所有处理器总共的context switch
	次数，或是可以透过查看档案/proc/stat中的ctxt位得知目前
	整个系统触发context switch的次数*/
 
/*进程发生上下文切换的次数，只有proc 文件系统里面会导出这个统计数值*/
	u64 nr_switches;
 
	struct cfs_rq cfs;//该rq所包括的cfs_rq运行队列，这个是所有cfs_rq的root
	struct rt_rq rt;
	struct dl_rq dl;
 
/*用以支援可以group cfs tasks的机制*/
#ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this cpu: */
	/*在有设置fair group scheduling 的环境下，
	会基于原本cfs rq中包含有若干task的group
	所成的排程集合，也就是说当有一个group a
	就会有自己的cfs rq用来排程自己所属的tasks,
	而属于这group a的tasks所使用到的处理器时间
	就会以这group a总共所分的的时间为上限。
	基于cgroup的fair group scheduling 架构，可以创造出
	有阶层性的task组织，根据不同task的功能群组化
	在配置给该群主对应的处理器资源，让属于
	该群主下的task可以透过rq机制排程。使用属于
	该群主下的资源。
	这个变数主要是管理CFS RQ list，操作上可以透过函数
	list_add_leaf_cfs_rq把一个group cfs rq加入到list中，或透过
	函数list_del_leaf_cfs_rq把一个group cfs rq移除，并可以
	透过for_each_leaf_cfs_rq把一个rq上得所有leaf cfs_rq走一遍*/
 
	/* list of leaf cfs_rq on this cpu: */
	struct list_head leaf_cfs_rq_list;//如果使用的cgroup来创建嵌套的group，那么这个group的cfs_rq通过该变量组织，注：每个cgroup都有一个cfs_rq
	struct list_head *tmp_alone_branch;
#endif /* CONFIG_FAIR_GROUP_SCHED */
 
	/*
	 * This is part of a global counter where only the total sum
	 * over all CPUs matters. A task can increase this counter on
	 * one CPU and if it got migrated afterwards it may decrease
	 * it on another CPU. Always updated under the runqueue lock:
	 */
/*一般来说，linux kernel 的task状态可以为TASK_RUNNING
	 TASK_INTERRUPTIBLE(sleep),
	 TASK_UNINTERRUPTIBLE(Deactivate Task,此时Task会从rq中
	 移除)或TASK_STOPPED.
	 透过这个变数会统计目前rq中有多少task属于
	 TASK_UNINTERRUPTIBLE的状态。当呼叫函数
	 active_task时，会把nr_uninterruptible值减一，并透过 该函数
	enqueue_task把对应的task依据所在的scheduling class
	放在 对应的rq中，并把目前rq中nr_running值加一*/
 
	unsigned long nr_uninterruptible;
idle:指向属于idle-task scheduling class 的idle task;
	stop:指向目前最高等级属于stop-task scheduling class
	的task;*/
	struct task_struct *curr, *idle, *stop;
/*基于处理器的jiffies值，用以记录下次进行处理器
	balancing 的时间点*/
	unsigned long next_balance;
 
	/*用以存储context-switch发生时，前一个task的memory management
	结构并可用在函数finish_task_switch中，透过函数mmdrop释放前一个