Kernel Scheduler学习之七：CFS调度器之pick_next_task

最新推荐文章于 2023-07-25 15:18:28 发布

sucjhwaxp

最新推荐文章于 2023-07-25 15:18:28 发布

阅读量651

点赞数 1

分类专栏： Kernel Android

本文链接：https://blog.csdn.net/sucjhwaxp/article/details/108291012

版权

Kernel 同时被 2 个专栏收录

11 篇文章 13 订阅

订阅专栏

Android

4 篇文章 1 订阅

订阅专栏

Overview
根据Kernel Scheduler学习之七：CFS调度器之task管理知道cfs调度器采用红黑树的方式按照虚拟runtime(vruntime)进行排序。本次博客主要搞清如下的问题：
1.如何获取下一个task

如何获取下一个task
cfs调度器选出一个task出来执行的函数为pick_next_task_fair

static struct task_struct *
pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
	struct cfs_rq *cfs_rq = &rq->cfs;
	struct sched_entity *se;
	struct task_struct *p;
	int new_tasks;

again:
	if (!sched_fair_runnable(rq))  //如果cfs_rq中没有task，说明cfs_rq是idle,则尝试做ible balance.
		goto idle;
//为简单化，先不看FAIR_GROUP
#ifdef CONFIG_FAIR_GROUP_SCHED
	if (!prev || prev->sched_class != &fair_sched_class)
		goto simple;

	/*
	 * Because of the set_next_buddy() in dequeue_task_fair() it is rather
	 * likely that a next task is from the same cgroup as the current.
	 *
	 * Therefore attempt to avoid putting and setting the entire cgroup
	 * hierarchy, only change the part that actually changes.
	 */

	do {
		struct sched_entity *curr = cfs_rq->curr;

		/*
		 * Since we got here without doing put_prev_entity() we also
		 * have to consider cfs_rq->curr. If it is still a runnable
		 * entity, update_curr() will update its vruntime, otherwise
		 * forget we've ever seen it.
		 */
		if (curr) {
			if (curr->on_rq)
				update_curr(cfs_rq);
			else
				curr = NULL;

			/*
			 * This call to check_cfs_rq_runtime() will do the
			 * throttle and dequeue its entity in the parent(s).
			 * Therefore the nr_running test will indeed
			 * be correct.
			 */
			if (unlikely(check_cfs_rq_runtime(cfs_rq))) {
				cfs_rq = &rq->cfs;

				if (!cfs_rq->nr_running)
					goto idle;

				goto simple;
			}
		}

		se = pick_next_entity(cfs_rq, curr);
		cfs_rq = group_cfs_rq(se);
	} while (cfs_rq);

	p = task_of(se);

	/*
	 * Since we haven't yet done put_prev_entity and if the selected task
	 * is a different task than we started out with, try and touch the
	 * least amount of cfs_rqs.
	 */
	if (prev != p) {
		struct sched_entity *pse = &prev->se;

		while (!(cfs_rq = is_same_group(se, pse))) {
			int se_depth = se->depth;
			int pse_depth = pse->depth;

			if (se_depth <= pse_depth) {
				put_prev_entity(cfs_rq_of(pse), pse);
				pse = parent_entity(pse);
			}
			if (se_depth >= pse_depth) {
				set_next_entity(cfs_rq_of(se), se);
				se = parent_entity(se);
			}
		}

		put_prev_entity(cfs_rq, pse);
		set_next_entity(cfs_rq, se);
	}

	goto done;
simple:
#endif
	if (prev)
		put_prev_task(rq, prev);

	do {
		se = pick_next_entity(cfs_rq, NULL); //选择下一个task出来
		set_next_entity(cfs_rq, se);    //记录cfs_rq当前正在running task为SE
		cfs_rq = group_cfs_rq(se);
	} while (cfs_rq);

	p = task_of(se);

done: __maybe_unused;
#ifdef CONFIG_SMP
	/*
	 * Move the next running task to the front of
	 * the list, so our cfs_tasks list becomes MRU
	 * one.
	 */
	list_move(&p->se.group_node, &rq->cfs_tasks);
#endif

	if (hrtick_enabled(rq))
		hrtick_start_fair(rq, p);

	update_misfit_status(p, rq); //更新当前rq是否发生能力不匹配的情况（即task需要的能力大，但是cpu可提供的能力小的状况）

	return p;

idle:
	if (!rf)
		return NULL;
//如果没有选到task,则认为此时cfs_rq为idle状态，那么进行idle balance，向其他cpu做均衡。
	new_tasks = newidle_balance(rq, rf);

	/*
	 * Because newidle_balance() releases (and re-acquires) rq->lock, it is
	 * possible for any higher priority task to appear. In that case we
	 * must re-start the pick_next_entity() loop.
	 */
	if (new_tasks < 0) 
		return RETRY_TASK;

	if (new_tasks > 0) //如果有拉到task过来，则重新挑选task.
		goto again;

	/*
	 * rq is about to be idle, check if we need to update the
	 * lost_idle_time of clock_pelt
	 */
	update_idle_rq_clock_pelt(rq);

	return NULL;
}

流程图如下：

所以，选择task的时候，首先判断cfs_rq中是否有task，如果没有的话，说明接下来会进入idle，这时候，尝试从其他cpu中拉一些task过来，为其他cpu做负载均衡，如果有拉到task则重新开始选取task。
如果选取到task,记录cfs_rq中的current为所选择的task，然后，判断这个task所需要的能力与cpu可提供的能力是否匹配。
涉及如下的问题：
a.如何选择next task
b.标记current做了什么事情
c.如果判断task与cpu是否匹配

a.选取next task

/*
 * Pick the next process, keeping these things in mind, in this order:
 * 1) keep things fair between processes/task groups
 * 2) pick the "next" process, since someone really wants that to run
 * 3) pick the "last" process, for cache locality
 * 4) do not run the "skip" process, if something else is available
 */
static struct sched_entity *
pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
{
	struct sched_entity *left = __pick_first_entity(cfs_rq);//选取最左边的task
	struct sched_entity *se;

	/*
	 * If curr is set we have to see if its left of the leftmost entity
	 * still in the tree, provided there was anything in the tree at all.
	 */
//如果最左边的为空，或者当前task的runtime还是最小的，则选择正在run的task.
	if (!left || (curr && entity_before(curr, left))) 
		left = curr;

	se = left; /* ideally we run the leftmost entity */
//正常上面的就够了，接下来处理skip的情况。
	/*
	 * Avoid running the skip buddy, if running something else can
	 * be done without getting too unfair.
	 */
	if (cfs_rq->skip == se) {
		struct sched_entity *second;

		if (se == curr) {
			second = __pick_first_entity(cfs_rq);
		} else {
			second = __pick_next_entity(se);
			if (!second || (curr && entity_before(curr, second)))
				second = curr;
		}

		if (second && wakeup_preempt_entity(second, left) < 1)
			se = second;
	}

	/*
	 * Prefer last buddy, try to return the CPU to a preempted task.
	 */
	if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1)
		se = cfs_rq->last;

	/*
	 * Someone really wants this to run. If it's not unfair, run it.
	 */
	if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
		se = cfs_rq->next;

	clear_buddies(cfs_rq, se);

	return se;
}

所以，pick_next_entity是选取了cfs_rq中最左边的task。但是这个地方有个疑点，__pick_first_entity并没有将到的task从红黑树中删除。下次来选的时候，岂不是又选到这个task呢？接下来看下set_next_entity。
b.设定当前正在running的task

static void
set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
	/* 'current' is not kept within the tree. */
	if (se->on_rq) {//因为是从cfs_rq中选取task，所以，这个条件判断是true的。
		/*
		 * Any task has to be enqueued before it get to execute on
		 * a CPU. So account for the time it spent waiting on the
		 * runqueue.
		 */
        //更新等待的时间状况
		update_stats_wait_end(cfs_rq, se);
    //如下面的code所示，这时候就将se从红黑树中拿掉，所以，下一交再选的时候，就不会再选到了。
		__dequeue_entity(cfs_rq, se);
    //更新负载状况
		update_load_avg(cfs_rq, se, UPDATE_TG);
	}

	update_stats_curr_start(cfs_rq, se);
	cfs_rq->curr = se;

	/*
	 * Track our maximum slice length, if the CPU's load is at
	 * least twice that of our own weight (i.e. dont track it
	 * when there are only lesser-weight tasks around):
	 */
	if (schedstat_enabled() &&
	    rq_of(cfs_rq)->cfs.load.weight >= 2*se->load.weight) {
		schedstat_set(se->statistics.slice_max,
			max((u64)schedstat_val(se->statistics.slice_max),
			    se->sum_exec_runtime - se->prev_sum_exec_runtime));
	}

	se->prev_sum_exec_runtime = se->sum_exec_runtime;
}

static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
	rb_erase_cached(&se->run_node, &cfs_rq->tasks_timeline);
}

所以，这个函数主要做了如下几件事情：
1.将选中的task从红黑树中拿掉，这样就避免下次再选到同一个人
2.设定当前task
3.更新负载信息

c.如何判断task与cpu能力是不匹配的

static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
{
	if (!static_branch_unlikely(&sched_asym_cpucapacity))
		return;

	if (!p) {
		rq->misfit_task_load = 0;
		return;
	}
    //注意：传入的参数为：cpu的capacity,此处的capacity为原始能力扣除rt,dl task用去的能力之后剩余值。
	if (task_fits_capacity(p, capacity_of(cpu_of(rq)))) {
		rq->misfit_task_load = 0;
		return;
	}

	rq->misfit_task_load = task_h_load(p);
}

static inline int task_fits_capacity(struct task_struct *p, long capacity)
{
	return fits_capacity(task_util_est(p), capacity);
}
//判断是否匹配的条件为：task需要的能力是否占到扣除rt,dl使用能力之后剩余能力的80%,如果超过80%则认为misfit，否则认为是fit的。
/*
 * The margin used when comparing utilization with CPU capacity.
 *
 * (default: ~20%)
 */
#define fits_capacity(cap, max)	((cap) * 1280 < (max) * 1024)

所以，判断是否匹配的条件为：预估的task需要的能力占到cpu原始能力扣除rt,dl使用能力之后剩余80%，如果超过80%，则认为不匹配。

Summary
pick next task从rq的cfs_rq管理的红黑树中选择最左边的task，最左边表明vrunimte最小。而vruntime是一个虚拟运行时间。所以，CFS所谓的完全公平，则是指虚拟时间的公平。
选取task的时候，更新了cfs_rq与所选 task的负载及其他统计信息
而且还判断了当前rq中是否存在task需要的能力与cpu可提供能力是否匹配。

sucjhwaxp

关注

1
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
Kernel Scheduler学习之七：CFS调度器之pick_next_task

Overview 根据Kernel Scheduler学习之七：CFS调度器之task管理知道cfs调度器采用红黑树的方式按照虚拟runtime(vruntime)进行排序。本次博客主要搞清如下的问题： 1.如何获取下一个task 如何获取下一个task cfs调度器选出一个task出来执行的函数为pick_next_task_fair static struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struc
复制链接

扫一扫