等待队列
Sleep相关函数将进程的状态设置为非运行态,在下一次调度来时,将在schedule函数中将本进程从运行队列中移除。sleep函数将进程加入等待队列,然后调用schedule函数选择并重新开始另一个程序的执行。当调用wake_up类函数将进程唤醒时,wake_up类函数将进程加入运行队列中,调度程序重新从sleep函数中下一条没有执行的指令开始执行。
sleep类函数都调用sleep_on_common函数实现,只是传入的参数有别。
static long __sched
sleep_on_common(wait_queue_head_t *q, int state, long timeout)
{
unsigned long flags;
wait_queue_t wait;
/*初始化等待队列*/
init_waitqueue_entry(&wait, current);
/*设置当前进程状态*/
__set_current_state(state);
spin_lock_irqsave(&q->lock, flags);
__add_wait_queue(q, &wait);/*加入等待队列中*/
spin_unlock(&q->lock);
/*sleep until timeout,在本进程睡眠的过程中会调用别的进程运行*/
timeout = schedule_timeout(timeout);
spin_lock_irq(&q->lock);
/*当本进程被唤醒时,从这里继续开始运行
也就是将该进程从等待队列中移除*/
__remove_wait_queue(q, &wait);
spin_unlock_irqrestore(&q->lock, flags);
return timeout;
}
static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
{
q->flags = 0;
q->private = p;/*将进程保存为队列私有属性*/
q->func = default_wake_function;/*设定为缺省的唤醒函数*/
}
我们看唤醒函数,default_wake_function最终调用函数try_to_wake_up
/***
* try_to_wake_up - wake up a thread
* @p: the to-be-woken-up thread
* @state: the mask of task states that can be woken
* @sync: do a synchronous wakeup?
*
* Put it on the run-queue if it's not already there. The "current"
* thread is always on the run-queue (except when the actual
* re-schedule is in progress), and as such you're allowed to do
* the simpler "current->state = TASK_RUNNING" to mark yourself
* runnable without the overhead of this.
*
* returns failure only if the task is already active.
*/
static int try_to_wake_up(struct task_struct *p, unsigned int state,
int wake_flags)
{
int cpu, orig_cpu, this_cpu, success = 0;
unsigned long flags;
struct rq *rq, *orig_rq;
if (!sched_feat(SYNC_WAKEUPS))
wake_flags &= ~WF_SYNC;/* waker not goes to sleep after wakup */
this_cpu = get_cpu();/*cpu id*/
smp_wmb();
rq = orig_rq = task_rq_lock(p, &flags);/*获得进程的rq*/
update_rq_clock(rq);/*更新rq的时钟*/
if (!(p->state & state))
goto out;
if (p->se.on_rq)/*如果进程已经在运行队列中*/
goto out_running;
cpu = task_cpu(p);/*返回进程对应的cpu*/
orig_cpu = cpu;
#ifdef CONFIG_SMP
if (unlikely(task_running(rq, p)))/*如果当前进程时p,也就是waker*/
goto out_activate;
/*
* In order to handle concurrent wakeups and release the rq->lock
* we put the task in TASK_WAKING state.
*
* First fix up the nr_uninterruptible count:
*/
if (task_contributes_to_load(p))
rq->nr_uninterruptible--;
p->state = TASK_WAKING;
task_rq_unlock(rq, &flags);
/*通常用在執行一個新的程序,或是WakeUp
一個Task時,會根據目前SMP下每個處理器的
負荷,決定Task是否要切換到另一個處理器
的RunQueue去執行,執行時會返回最後目標
處理器的值.*/
cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
if (cpu != orig_cpu)
set_task_cpu(p, cpu);/*设置task在制定的cpu上运行*/
rq = task_rq_lock(p, &flags);/*task对应的rq*/
if (rq != orig_rq)
update_rq_clock(rq);/*更新clock*/
WARN_ON(p->state != TASK_WAKING);
cpu = task_cpu(p);
#ifdef CONFIG_SCHEDSTATS/*yes*/
schedstat_inc(rq, ttwu_count);/*Wake Up Task的次數加一.*/
if (cpu == this_cpu)
/*Wake Up 同一個處理器Task的次數加一.*/
schedstat_inc(rq, ttwu_local);
else {
struct sched_domain *sd;
for_each_domain(this_cpu, sd) {
if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
schedstat_inc(sd, ttwu_wake_remote);
break;
}
}
}
#endif /* CONFIG_SCHEDSTATS */
out_activate:
#endif /* CONFIG_SMP */
/*下面为设置相关计数变量*/
schedstat_inc(rq, field)(p, se.nr_wakeups);
if (wake_flags & WF_SYNC)
schedstat_inc(p, se.nr_wakeups_sync);
if (orig_cpu != cpu)
schedstat_inc(p, se.nr_wakeups_migrate);
if (cpu == this_cpu)
schedstat_inc(p, se.nr_wakeups_local);
else
schedstat_inc(p, se.nr_wakeups_remote);
/*将进程移动到对应调度类的运行队列*/
activate_task(rq, p, 1);
success = 1;
/*
* Only attribute actual wakeups done by this task.
*/
if (!in_interrupt()) {/*下面为对se中变量last_wakeup和
avg_wakeup的更新*/
struct sched_entity *se = ¤t->se;
u64 sample = se->sum_exec_runtime;
if (se->last_wakeup)
sample -= se->last_wakeup;
else
sample -= se->start_runtime;
update_avg(&se->avg_wakeup, sample);
se->last_wakeup = se->sum_exec_runtime;
}
out_running:
trace_sched_wakeup(rq, p, success);
/*用以決定一個Task是否可以中斷目前正在
運作的Task,取得執行權.*/
check_preempt_curr(rq, p, wake_flags);
p->state = TASK_RUNNING;
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
p->sched_class->task_wake_up(rq, p);
if (unlikely(rq->idle_stamp)) {/*该值可用以表示這個
處理器是何時進入到Idle的
狀態,在这里得到更新*/
u64 delta = rq->clock - rq->idle_stamp;
u64 max = 2*sysctl_sched_migration_cost;
if (delta > max)
rq->avg_idle = max;
else/*avg_idle可反應目前處理器進入Idle狀態的時間長短*/
update_avg(&rq->avg_idle, delta);
rq->idle_stamp = 0;
}
#endif
out:
task_rq_unlock(rq, &flags);
put_cpu();
return success;
}
所有的wake_up类函数都最终调用__wake_up_common函数实现
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int wake_flags, void *key)
{
wait_queue_t *curr, *next;
list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
unsigned flags = curr->flags;
if (curr->func(curr, mode, wake_flags, key) &&/*在这里会调用上面注册的try_to_wake_up函数*/
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
}
wait_event方式
考虑到sleep_on类函数在以下条件中不能使用,那就是必须测试条件并且当条件还没哟得到验证时又紧接着让进城去睡眠;为实现这样的功能,内核采用wait_event的方式实现。
#define __wait_event(wq, condition) \
do { \
DEFINE_WAIT(__wait); \
\
for (;;) { /*加入等待队列,设置进程状态*/ \
prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
if (condition) \
break; \
schedule();/*调用其他进程运行*/ \
}/*当进程被唤醒时继续如下执行*/ \
finish_wait(&wq, &__wait); \
} while (0)
当下一次调度到来时,调度程序把设置为非运行的当前进程从运行队列里面删除,而进程被wake_up类函数唤醒时,wake_up类函数将其加入运行队列,继续执行上面没有执行完成的wait_event函数(执行finish_wait函数),finish_wait函数将其从等待队列中删除。