进程管理之CFS调度器

最新推荐文章于 2023-02-17 21:05:07 发布

SunnyBeiKe

最新推荐文章于 2023-02-17 21:05:07 发布

阅读量4.6k

点赞数

分类专栏： Linux内核之进程管理文章标签： struct linux内核 statistics class each 算法

本文链接：https://blog.csdn.net/SunnyBeiKe/article/details/6948628

版权

本文详细解析Linux内核中的CFS调度器，包括enqueue_task_fair、dequeue_task_fair、yield_to_task_fair等关键方法，探讨delta_exec_weighted计算，以及在不同场景下的任务调度策略。同时，引用《深入Linux内核框架》和《独辟蹊径》等书籍进行深入解读。

摘要由CSDN通过智能技术生成

/*
 * All the scheduling class methods:
 */
static const struct sched_class fair_sched_class = {
	.next			= &idle_sched_class, //指向下一个调度器
	.enqueue_task		= enqueue_task_fair, 
	.dequeue_task		= dequeue_task_fair,
	.yield_task		= yield_task_fair,
	.yield_to_task		= yield_to_task_fair,

	.check_preempt_curr	= check_preempt_wakeup,

	.pick_next_task		= pick_next_task_fair,
	.put_prev_task		= put_prev_task_fair,

#ifdef CONFIG_SMP
	.select_task_rq		= select_task_rq_fair,

	.rq_online		= rq_online_fair,
	.rq_offline		= rq_offline_fair,

	.task_waking		= task_waking_fair,
#endif

	.set_curr_task          = set_curr_task_fair,
	.task_tick		= task_tick_fair,
	.task_fork		= task_fork_fair,

	.prio_changed		= prio_changed_fair,
	.switched_from		= switched_from_fair,
	.switched_to		= switched_to_fair,

	.get_rr_interval	= get_rr_interval_fair,

#ifdef CONFIG_FAIR_GROUP_SCHED
	.task_move_group	= task_move_group_fair,
#endif
};

下面对调度器中一些重要的方法进行解释：

首先是enqueue_task_fair():

/*
 * The enqueue_task method is called before nr_running is
 * increased. Here we update the fair scheduling stats and
 * then put the task into the rbtree:
 */
static void
enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
	struct cfs_rq *cfs_rq;
	struct sched_entity *se = &p->se;

	for_each_sched_entity(se) {
		if (se->on_rq) //如果se已经在rq队列上了，那么就不再执行enqueue操作了。
			break;
		cfs_rq = cfs_rq_of(se);
		enqueue_entity(cfs_rq, se, flags);
		flags = ENQUEUE_WAKEUP;
	}

	for_each_sched_entity(se) {
		struct cfs_rq *cfs_rq = cfs_rq_of(se);

		update_cfs_load(cfs_rq, 0);
		update_cfs_shares(cfs_rq);
	}

	hrtick_update(rq);
}

将给定进程连入rq队列的函数中，关键的操作就是enqueue_entity():

static void
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
	/*
	 * Update the normalized vruntime before updating min_vruntime
	 * through callig update_curr().
	 */
	if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
		se->vruntime += cfs_rq->min_vruntime; //将se的虚拟时钟初始化为cfs_rq队列的最小的虚拟时钟的值（有关min_vruntime可以参考《深入Linux内核框架》）

	/*
	 * Update run-time statistics of the 'current'.
	 */
	update_curr(cfs_rq);  //更新实际时钟和虚拟时钟
	update_cfs_load(cfs_rq, 0); //与组调度相关
	account_entity_enqueue(cfs_rq, se); //增加nr_running，即增加cfs队列中进程的个数。
	update_cfs_shares(cfs_rq);  //与组调度相关

	if (flags & ENQUEUE_WAKEUP) {
		place_entity(cfs_rq, se, 0); //确定进程正确的虚拟时间
		enqueue_sleeper(cfs_rq, se);
	}

	update_stats_enqueue(cfs_rq, se);
	check_spread(cfs_rq, se);
	if (se != cfs_rq->curr)
		__enqueue_entity(cfs_rq, se); //将进程置于红黑树中。使用内核的标准方法将进程排序到红黑树中。
	se->on_rq = 1;

	if (cfs_rq->nr_running == 1)
		list_add_leaf_cfs_rq(cfs_rq);
}

首先来看一下update_curr函数，它用来更新任务的物理时间和虚拟时间，还有min_vruntime。

static void update_curr(struct cfs_rq *cfs_rq)
{
	struct sched_entity *curr = cfs_rq->curr;
	u64 now = rq_of(cfs_rq)->clock_task; //获取当前时间
	unsigned long delta_exec;

	if (unlikely(!curr))
		return;

	/*
	 * Get the amount of time the current task was running
	 * since the last time we changed load (this cannot
	 * overflow on 32 bits):
	 */
	delta_exec = (unsigned long)(now - curr->exec_start); //计算当前时间和上一次更新负荷统计量时的时间差
	if (!delta_exec) //如果为0，则不做任何事情
		return;

	__update_curr(cfs_rq, curr, delta_exec);
	curr->exec_start = now; //重新设置开始执行的时间

	if (entity_is_task(curr)) {
		struct task_struct *curtask = task_of(curr);

		trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
		cpuacct_charge(curtask, delta_exec);
		account_group_exec_runtime(curtask, delta_exec);
	}
}

这个函数的核心操作当然是__update_curr()：

/*
 * Update the current task's runtime statistics. Skip current tasks that
 * are not in our scheduling class.
 */
static inline void
__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
	      unsigned long delta_exec)
{
	unsigned long delta_exec_weighted;

	schedstat_set(curr->statistics.exec_max,
		      max((u64)delta_exec, curr->statistics.exec_max));

	curr->sum_exec_runtime += delta_exec;
	schedstat_add(cfs_rq, exec_clock, delta_exec);
	delta_exec_weighted = calc_delta_fair(delta_exec, curr); 

	curr->vruntime += delta_exec_weighted;
	update_min_vruntime(cfs_rq);

#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
	cfs_rq->load_unacc_exec_time += d