一.schedule()函数的尝试分析
asmlinkage void __sched schedule(void)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
struct rq *rq;
int cpu;
need_resched:
preempt_disable(); //暂时关闭内核抢占
cpu = smp_processor_id(); //获取cpu
rq = cpu_rq(cpu); //取得cpu上的运行队列
rcu_qsctr_inc(cpu);
prev = rq->curr; //取得运行队列上当前正在执行进程
switch_count = &prev->nivcsw;
release_kernel_lock(prev);
need_resched_nonpreemptible:
schedule_debug(prev);
hrtick_clear(rq);
/*
* Do the rq-clock update outside the rq lock:
*/
local_irq_disable();
update_rq_clock(rq);
spin_lock(&rq->lock);
clear_tsk_need_resched(prev);
检查prev(将被替换掉的进程)是否存有还未被处理的信号,若有,则保持prev的TASK_RUNNING状态,否则就使其无效(deactivate)
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
if (unlikely(signal_pending_state(prev->state, prev)))
prev->state = TASK_RUNNING;
else
deactivate_task(rq, prev, 1);
switch_count = &prev->nvcsw;
}
#ifdef CONFIG_SMP
if (prev->sched_class->pre_schedule)
prev->sched_class->pre_schedule(rq, prev);
#endif
if (unlikely(!rq->nr_running))
idle_balance(cpu, rq);
//将当前进程插入运行队列中(rq)
prev->sched_class->put_prev_task(rq, prev);
//从运行队列中获取将要被调度的进程
next = pick_next_task(rq, prev);
if (likely(prev != next)) {
//开始切换
sched_info_switch(prev, next);
//切换计数+1,当前正在执行进程curr指向next
rq->nr_switches++;
rq->curr = next;
++*switch_count;
//上下文切换,重点关注!
context_switch(rq, prev, next); /* unlocks the rq */
/*
* the context switch might have flipped the stack from under
* us, hence refresh the local variables.
*/
cpu = smp_processor_id();
rq = cpu_rq(cpu);
} else
spin_unlock_irq(&rq->lock);
hrtick_set(rq);
if (unlikely(reacquire_kernel_lock(current) < 0))
goto need_resched_nonpreemptible;
//重新打开内核抢占式机制
preempt_enable_no_resched();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;
}
二.context_switch()函数分析
context_switch()函数实际上是个预处理宏
#define switch_to(prev, next, last) \
do { \
/* \
* Context-switching clobbers all registers, so we clobber \
* them explicitly, via unused output variables. \
* (EAX and EBP is not listed because EBP is saved/restored \
* explicitly for wchan access and EAX is the return value of \
* __switch_to()) \
*/ \
unsigned long ebx, ecx, edx, esi, edi; \
\
以下为C语言的内联汇编代码,也是关键上下文切换的核心所在
asm volatile("pushfl\n\t" /* save flags */ \
"pushl %%ebp\n\t" /* save EBP */ \
"movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
"movl %[next_sp],%%esp\n\t" /* restore ESP */ \
"movl $1f,%[prev_ip]\n\t" /* save EIP */ \
"pushl %[next_ip]\n\t" /* restore EIP */ \
"jmp __switch_to\n" /* regparm call */ \
"1:\t" \
"popl %%ebp\n\t" /* restore EBP */ \
"popfl\n" /* restore flags */ \
\
/* output parameters */ \
: [prev_sp] "=m" (prev->thread.sp), \
[prev_ip] "=m" (prev->thread.ip), \
"=a" (last), \
\
/* clobbered output registers: */ \
"=b" (ebx), "=c" (ecx), "=d" (edx), \
"=S" (esi), "=D" (edi) \
\
/* input parameters: */ \
: [next_sp] "m" (next->thread.sp), \
[next_ip] "m" (next->thread.ip), \
\
/* regparm parameters for __switch_to(): */ \
[prev] "a" (prev), \
[next] "d" (next)); \
} while (0)
1.内联汇编参数分析
这里的C语言内联汇编代码比较晦涩,先简要分析一下参数,稍候我们将其还原为汇编代码来分析。
/* output parameters */ \
: [prev_sp] "=m" (prev->thread.sp), \
[prev_ip] "=m" (prev->thread.ip), \
"=a" (last), \
这里有三个输出参数:prev_sp、prev_ip、和加载代码“a”(代表EAX寄存器),分别用%0,%1和%2来表示,代表这段汇编代码执行完毕后被改动的变量,分别对应于prev->thread.sp,prev->thread.ip和last,后面还有其他被改动的寄存器,不过不是重点。
/* input parameters: */ \
: [next_sp] "m" (next->thread.sp), \
[next_ip] "m" (next->thread.ip), \
/* regparm parameters for __switch_to(): */ \
[prev] "a" (prev), \
[next] "d" (next)); \
和上面的分析类似,前两个参数代表将被调度运行的进程的堆栈和指令指针寄存器,然后是将被调度出去的进程和调度运行的进程prev,next,分别读入eax和edx寄存器中
2.汇编代码分析
我们将上面的汇编代码改写一下:
1) pushfl /* save flags */
2) pushl %ebp /* save EBP */
3) movl %esp, %prev->thread.sp /* save ESP */
4) movl %next->thread.sp, %esp /* restore ESP */
5) movl $1f, %prev->thread.ip /* save EIP */
6) pushl %next->thread.ip /* restore EIP */
7) jmp __switch_to /* regparm call */
8) 1:
9) popl %ebp /* restore EBP */
10) popfl
这段代码虽然小,但是暗藏玄机,进程切换的精髓就在其中,第1,2行将标志寄存器和ebp寄存器压入堆栈,第3行将当前进程(prev)的系统空间堆栈指针存入prev->thread.sp。第四行就是进程切换的关键,在这一行中next进程的系统空间堆栈指针被拷贝给了esp寄存器,那么,从这一行开始就是在使用新调度进来的next进程的堆栈了,也就是说此时“当前进程”这个概念就是指next,而不是prev。随后,第5行将标号为1(f代表forward),也就是第9行代码的地址存入prev->thread.ip,作为prev进程下一次被调度而切入运行的“返回地址”;第6行将next->thread.ip的地址压入堆栈中,它指向的就是上一次next进程切出时在第5行存入的返回地址。第7行跳转到__switch_to函数(它主要处理TSS,继续切换其余存储在TSS中的上下文信息,如fs和gs等寄存器其中关键是将内核空间堆栈指针恢复成next->esp0),__switch_to函数ret指令返回后,刚才第6行压入堆栈的“返回地址”信息就被弹到eip寄存器中,于是next进程执行第9和10行,恢复ebp和标志寄存器,next进程也就正式开始执行了。