/*
* All the scheduling class methods:
*/
static const struct sched_class fair_sched_class = {
.next = &idle_sched_class, //指向下一个调度器
.enqueue_task = enqueue_task_fair,
.dequeue_task = dequeue_task_fair,
.yield_task = yield_task_fair,
.yield_to_task = yield_to_task_fair,
.check_preempt_curr = check_preempt_wakeup,
.pick_next_task = pick_next_task_fair,
.put_prev_task = put_prev_task_fair,
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_fair,
.rq_online = rq_online_fair,
.rq_offline = rq_offline_fair,
.task_waking = task_waking_fair,
#endif
.set_curr_task = set_curr_task_fair,
.task_tick = task_tick_fair,
.task_fork = task_fork_fair,
.prio_changed = prio_changed_fair,
.switched_from = switched_from_fair,
.switched_to = switched_to_fair,
.get_rr_interval = get_rr_interval_fair,
#ifdef CONFIG_FAIR_GROUP_SCHED
.task_move_group = task_move_group_fair,
#endif
};
下面对调度器中一些重要的方法进行解释:
首先是enqueue_task_fair():
将给定进程连入rq队列的函数中,关键的操作就是enqueue_entity():/* * The enqueue_task method is called before nr_running is * increased. Here we update the fair scheduling stats and * then put the task into the rbtree: */ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; for_each_sched_entity(se) { if (se->on_rq) //如果se已经在rq队列上了,那么就不再执行enqueue操作了。 break; cfs_rq = cfs_rq_of(se); enqueue_entity(cfs_rq, se, flags); flags = ENQUEUE_WAKEUP; } for_each_sched_entity(se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); update_cfs_load(cfs_rq, 0); update_cfs_shares(cfs_rq); } hrtick_update(rq); }
首先来看一下update_curr函数,它用来更新任务的物理时间和虚拟时间,还有min_vruntime。static void enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { /* * Update the normalized vruntime before updating min_vruntime * through callig update_curr(). */ if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING)) se->vruntime += cfs_rq->min_vruntime; //将se的虚拟时钟初始化为cfs_rq队列的最小的虚拟时钟的值(有关min_vruntime可以参考《深入Linux内核框架》) /* * Update run-time statistics of the 'current'. */ update_curr(cfs_rq); //更新实际时钟和虚拟时钟 update_cfs_load(cfs_rq, 0); //与组调度相关 account_entity_enqueue(cfs_rq, se); //增加nr_running,即增加cfs队列中进程的个数。 update_cfs_shares(cfs_rq); //与组调度相关 if (flags & ENQUEUE_WAKEUP) { place_entity(cfs_rq, se, 0); //确定进程正确的虚拟时间 enqueue_sleeper(cfs_rq, se); } update_stats_enqueue(cfs_rq, se); check_spread(cfs_rq, se); if (se != cfs_rq->curr) __enqueue_entity(cfs_rq, se); //将进程置于红黑树中。使用内核的标准方法将进程排序到红黑树中。 se->on_rq = 1; if (cfs_rq->nr_running == 1) list_add_leaf_cfs_rq(cfs_rq); }
这个函数的核心操作当然是__update_curr():static void update_curr(struct cfs_rq *cfs_rq) { struct sched_entity *curr = cfs_rq->curr; u64 now = rq_of(cfs_rq)->clock_task; //获取当前时间 unsigned long delta_exec; if (unlikely(!curr)) return; /* * Get the amount of time the current task was running * since the last time we changed load (this cannot * overflow on 32 bits): */ delta_exec = (unsigned long)(now - curr->exec_start); //计算当前时间和上一次更新负荷统计量时的时间差 if (!delta_exec) //如果为0,则不做任何事情 return; __update_curr(cfs_rq, curr, delta_exec); curr->exec_start = now; //重新设置开始执行的时间 if (entity_is_task(curr)) { struct task_struct *curtask = task_of(curr); trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime); cpuacct_charge(curtask, delta_exec); account_group_exec_runtime(curtask, delta_exec); } }
/* * Update the current task's runtime statistics. Skip current tasks that * are not in our scheduling class. */ static inline void __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, unsigned long delta_exec) { unsigned long delta_exec_weighted; schedstat_set(curr->statistics.exec_max, max((u64)delta_exec, curr->statistics.exec_max)); curr->sum_exec_runtime += delta_exec; schedstat_add(cfs_rq, exec_clock, delta_exec); delta_exec_weighted = calc_delta_fair(delta_exec, curr); curr->vruntime += delta_exec_weighted; update_min_vruntime(cfs_rq); #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED cfs_rq->load_unacc_exec_time += d