dl队列结构体
struct dl_rq {
/* runqueue is an rbtree, ordered by deadline */
struct rb_root_cached root;
unsigned long dl_nr_running;
#ifdef CONFIG_SMP
struct {
u64 curr; //当前running的任务的deadline值
u64 next; //next任务的deadline值
} earliest_dl;
unsigned long dl_nr_migratory;
int overloaded;
struct rb_root_cached pushable_dl_tasks_root; //红黑树上的任务是可以被移出队列的任务
#else
struct dl_bw dl_bw;
#endif
//省略部分代码
}
其中,主要的组织和关键指针如下图所示。
这里关注下pushable_dl_tasks的红黑树,其主要作用是什么呢?
这里存放的任务是可以推送到其他CPU上去抢占执行的
选择下一个任务
deadline队列是红黑树组织调度实体任务的,选择下一个任务就是摘取红黑树最左侧节点。
static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
struct dl_rq *dl_rq)
{
struct rb_node *left = rb_first_cached(&dl_rq->root); //红黑树最左侧节点
//省略代码
return rb_entry(left, struct sched_dl_entity, rb_node);
}
static struct task_struct *pick_next_task_dl(struct rq *rq)
{
//省略代码
dl_se = pick_next_dl_entity(rq, dl_rq); //选择下一个任务
p = dl_task_of(dl_se);
set_next_task_dl(rq, p, true); //选中下一任务后,需要set设置其准备入队
return p;
}
设置下一个任务
static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
{
p->se.exec_start = rq_clock_task(rq); //p指向next任务,同步其exec_start为当前rq->clock
/* You can't push away the running task */
dequeue_pushable_dl_task(rq, p);
//省略代码
deadline_queue_push_tasks(rq);
}
其中,dequeue_pushable_dl_task()主要作用是:
- 如果选中的next任务可以被push away,则需要将其从pushable红黑树中移出,因为当该任务开始执行了running状态了就不能再被push到其他CPU上去
static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
{
struct dl_rq *dl_rq = &rq->dl;
if (RB_EMPTY_NODE(&p->pushable_dl_tasks)) //如果pushable_dl_tasksk为空,则退出
return;
/*如果pushable红黑树中最左子节点刚好就是选中的next任务,则需要更新dl_rq的nextdl值为next的next的dl值*/
if (dl_rq->pushable_dl_tasks_root.rb_leftmost == &p->pushable_dl_tasks) {
struct rb_node *next_node;
next_node = rb_next(&p->pushable_dl_tasks);
if (next_node) {
dl_rq->earliest_dl.next = rb_entry(next_node,
struct task_struct, pushable_dl_tasks)->dl.deadline;
}
}
rb_erase_cached(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
RB_CLEAR_NODE(&p->pushable_dl_tasks);
}
任务入队
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
{
//省略
enqueue_dl_entity(&p->dl, pi_se, flags);
//如果任务不是当前正在运行且其cpu亲和度大于1,则也需要将其入pushable红黑树队列
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
enqueue_pushable_dl_task(rq, p);
}
static void enqueue_dl_entity(struct sched_dl_entity *dl_se,
struct sched_dl_entity *pi_se, int flags)
{
//省略代码;然后任务是唤醒还是新任务分不同的处理,唤醒的则update其deadline和runtime数据,新任务则设置新数据
if (flags & ENQUEUE_WAKEUP) {
task_contending(dl_se, flags);
update_dl_entity(dl_se, pi_se);
} else if (flags & ENQUEUE_REPLENISH) {
replenish_dl_entity(dl_se, pi_se);
} else if ((flags & ENQUEUE_RESTORE) &&
dl_time_before(dl_se->deadline,
rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
setup_new_dl_entity(dl_se);
}
__enqueue_dl_entity(dl_se);
}
static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
{
//省略
while (*link) {
parent = *link;
entry = rb_entry(parent, struct sched_dl_entity, rb_node);
if (dl_time_before(dl_se->deadline, entry->deadline)) //比较deadline
link = &parent->rb_left;
else {
link = &parent->rb_right;
leftmost = 0;
}
}
rb_link_node(&dl_se->rb_node, parent, link);
rb_insert_color_cached(&dl_se->rb_node, &dl_rq->root, leftmost);
inc_dl_tasks(dl_se, dl_rq);
}
计算deadline值
当一个新的任务假如deadline队列时,需要对其deadline和runtime进行赋值:
- 任务绝对deadline = 当前时间点 + 相对deadline;
- 任务剩余运行时间runtime = 最大运行时间值;
static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
{
//省略
dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline;
dl_se->runtime = dl_se->dl_runtime;
}
那么,dl_deadline和dl_runtime是如何确定的?
void __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
{
struct sched_dl_entity *dl_se = &p->dl;
dl_se->dl_runtime = attr->sched_runtime;
dl_se->dl_deadline = attr->sched_deadline;
dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
dl_se->flags = attr->sched_flags;
dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
}
原来,初始时候的dl_deadline和dl_runtime都是可预测赋值的。不是自动计算出来的。
任务出队
出队操作相对来说简单,除了将任务移出dl的红黑树,还需要将其移出pushable红黑树
static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
{
update_curr_dl(rq);
__dequeue_task_dl(rq, p, flags);
if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
sub_running_bw(&p->dl, &rq->dl);
sub_rq_bw(&p->dl, &rq->dl);
}
/*
* This check allows to start the inactive timer (or to immediately
* decrease the active utilization, if needed) in two cases:
* when the task blocks and when it is terminating
* (p->state == TASK_DEAD). We can handle the two cases in the same
* way, because from GRUB's point of view the same thing is happening
* (the task moves from "active contending" to "active non contending"
* or "inactive")
*/
if (flags & DEQUEUE_SLEEP)
task_non_contending(p);
}
static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
{
dequeue_dl_entity(&p->dl);
dequeue_pushable_dl_task(rq, p);
}
更新时间
deadline与rt共享bandwidth,都属于实时调度策略。
这里面有个dl->runtime -= scaled_delta_exec,这里在更新deadline任务的剩余运行时间,当runtime小于等于0,则被切换。
static void update_curr_dl(struct rq *rq)
{
//函数中部分代码都已省略
now = rq_clock_task(rq); //记录时间戳
delta_exec = now - curr->se.exec_start; //计算当前时间与开始运行时间差
curr->se.sum_exec_runtime += delta_exec; //dl的运行时间通过CFS的sum_exec_runtime统计
curr->se.exec_start = now; //更新exec_start,为下一次计算做准备
if (dl_entity_is_special(dl_se))
return;
//这里在计算scaled_delta_exec时间
if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) {
scaled_delta_exec = grub_reclaim(delta_exec,
rq,
&curr->dl);
} else {
unsigned long scale_freq = arch_scale_freq_capacity(cpu);
unsigned long scale_cpu = arch_scale_cpu_capacity(cpu);
scaled_delta_exec = cap_scale(delta_exec, scale_freq);
scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
}
dl_se->runtime -= scaled_delta_exec;
throttle:
//省略
}
如何保证latency
任务在运行过程中,deadline和runtime的值会不断更新,并在tick中判断runtime是否已经用完。有以下一些点可以用来保证latency:
- dl_rq除了维护自身红黑树外,还维护一个pushable_dl_tasks红黑树,当deadline任务来不及执行或者其他cpu空闲balance的时候,可以将pushable红黑树中的任务推送到其他CPU或由其他CPU来pull拉取任务执行。
- update_curr_dl会更新任务的deadline和runtime值,分别在任务出队、周期性tick处理、让出cpu、put任务至队列等四个地方更新时间戳。
- tick周期性时钟处理函数会不断检查runtime是否耗尽。