schedule mm_struct and CPU register

最新推荐文章于 2022-08-20 21:16:29 发布

shuai_wen

最新推荐文章于 2022-08-20 21:16:29 发布

阅读量1.3k

点赞数

分类专栏： kernel

本文链接：https://blog.csdn.net/u011279649/article/details/10120115

版权

kernel 专栏收录该内容

74 篇文章 1 订阅

订阅专栏

/******************/
进程切换函数schedule在文件 kernel/sched/core.c中，
asmlinkage void __sched schedule(void)
{
   struct task_struct *tsk = current;

   sched_submit_work(tsk);/*不关注该函数*/
   __schedule();
}

/*

__schedule() is the main scheduler function.

*/
static void __sched __schedule(void)
{
   struct task_struct *prev, *next;
   unsigned long *switch_count;
   struct rq *rq;
   int cpu;

   cpu = smp_processor_id();
   rq = cpu_rq(cpu);
   prev = rq->curr;

   switch_count = &prev->nivcsw;

   pre_schedule(rq, prev);
/*
* idle_balance is called by schedule() if this_cpu is about to become
* idle. Attempts to pull tasks from other CPUs.
*/
   if (unlikely(!rq->nr_running))
       idle_balance(cpu, rq);

   put_prev_task(rq, prev);
   next = pick_next_task(rq);

   if (likely(prev != next)) {
       rq->nr_switches++;
       rq->curr = next;
       ++*switch_count;
       /*这里就切换走，不返回了吗？*/
       context_switch(rq, prev, next); /* unlocks the rq */
       /*
       * The context switch have flipped the stack from under us
       * and restored the local variables which were saved when
       * this task called schedule() in the past. prev == current
       * is still correct, but it can be moved to another cpu/rq.
       */
       cpu = smp_processor_id();
       rq = cpu_rq(cpu);
   }
}

得到cpu_id,得到该cpu_id对应的runqueues, 得到runqueues 当前的task_struct

#define smp_processor_id() raw_smp_processor_id()
#define raw_smp_processor_id() (current_thread_info()->cpu)

#define cpu_rq(cpu)       (&per_cpu(runqueues, (cpu)))
/*根据每CPU变量：runqueues and CPU ID得到当前的 task_struct*/
   crash> runqueues
   PER-CPU DATA TYPE:
      struct rq runqueues;
   PER-CPU ADDRESSES:
      [1]: c0e54e00

   crash> struct rq.cpu c0e54e00
      cpu = 1
   crash> struct rq.nr_running c0e54e00
      nr_running = 1
   crash> struct rq.curr c0e54e00
      curr = 0xee268120

有关task_struct的shed_class成员
从下面可知：从task_struct可以得到sched_class, sched_class对应一组函数
static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
{
   if (prev->sched_class->pre_schedule)
       prev->sched_class->pre_schedule(rq, prev);
}
crash> task_struct.sched_class 0xee268120
sched_class = 0xc0454e24 <fair_sched_class>
crash> sched_class 0xc0454e24
struct sched_class {
next = 0xc0454dc8 <idle_sched_class>,
enqueue_task = 0xc0051a14 <enqueue_task_fair>,
dequeue_task = 0xc0050f10 <dequeue_task_fair>,
yield_task = 0xc0050cb8 <yield_task_fair>,
yield_to_task = 0xc0050d58 <yield_to_task_fair>,
check_preempt_curr = 0xc0050b30 <check_preempt_wakeup>,
pick_next_task = 0xc0050e4c <pick_next_task_fair>,
put_prev_task = 0xc00519a0 <put_prev_task_fair>,
select_task_rq = 0xc00511f8 <select_task_rq_fair>,
pre_schedule = 0x0,
post_schedule = 0x0,
task_waking = 0xc00501a0 <task_waking_fair>,
task_woken = 0x0,
set_cpus_allowed = 0x0,
rq_online = 0xc005118c <rq_online_fair>,
rq_offline = 0xc0051178 <rq_offline_fair>,
set_curr_task = 0xc0050e08 <set_curr_task_fair>,
task_tick = 0xc0050a58 <task_tick_fair>,
task_fork = 0xc0050938 <task_fork_fair>,
switched_from = 0xc0050528 <switched_from_fair>,
switched_to = 0xc005081c <switched_to_fair>,
prio_changed = 0xc00507d4 <prio_changed_fair>,
get_rr_interval = 0xc0050438 <get_rr_interval_fair>
}

通过函数pick_next_task(rq)从rq中得到另一个task_struct,然后调用context_switch(rq, prev, next)。
/*

context_switch - switch to the new MM and the new thread's register state.

*/
static inline void
context_switch(struct rq *rq, struct task_struct *prev,
           struct task_struct *next)
{
   struct mm_struct *mm, *oldmm;

   mm = next->mm;
   oldmm = prev->active_mm;

   if (!mm) {
       next->active_mm = oldmm;
       atomic_inc(&oldmm->mm_count);
       enter_lazy_tlb(oldmm, next);/*for arm: NULL*/
   } else
       switch_mm(oldmm, mm, next);

   if (!prev->mm) {
       prev->active_mm = NULL;
       rq->prev_mm = oldmm;
   }

   /* Here we just switch the register state and the stack. */
   switch_to(prev, next, prev);

   barrier();

}

context_switch分为两部分switch_mm(oldmm, mm, next) and switch_to(prev, next, prev);

switch_mm

switch_mm(struct mm_struct *prev, struct mm_struct *next,
      struct task_struct *tsk)
{

   unsigned int cpu = smp_processor_id();

   /* check for possible thread migration */
   if (!cpumask_empty(mm_cpumask(next)) &&
        !cpumask_test_cpu(cpu, mm_cpumask(next)))
       __flush_icache_all();
   if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) {
       /*这里又引入了每CPU变量current_mm，更新current_mm*/
       struct mm_struct **crt_mm = &per_cpu(current_mm, cpu);
       *crt_mm = next;
       check_context(next);
       cpu_switch_mm(next->pgd, next);
       if (cache_is_vivt())
           cpumask_clear_cpu(cpu, mm_cpumask(prev));
   }
}

switch_mm -> cpu_switch_mm:
/*这个宏定义，隐含的把mm_struct的pgd改变为物理地址*/
#define cpu_switch_mm(pgd, mm) cpu_do_switch_mm(virt_to_phys(pgd), mm)
extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);

cpu_do_switch_mm的内容，不管怎么说肯定是设置TTB寄存器
#define cpu_do_switch_mm       __glue(CPU_NAME,_switch_mm)
   ENTRY(cpu_v7_switch_mm)
   #ifdef CONFIG_MMU
       mov   r2, #0
       ldr   r1, [r1, #MM_CONTEXT_ID]   @ get mm->context.id
       ALT_SMP(orr   r0, r0, #TTB_FLAGS_SMP)
       ALT_UP(orr   r0, r0, #TTB_FLAGS_UP)

       mrc   p15, 0, r2, c2, c0, 1       @ load TTB 1
       mcr   p15, 0, r2, c2, c0, 0       @ into TTB 0
       isb

       mcr   p15, 0, r1, c13, c0, 1       @ set context ID
       isb
       mcr   p15, 0, r0, c2, c0, 0       @ set TTB 0
       isb
   #endif
       mov   pc, lr
   ENDPROC(cpu_v7_switch_mm)
crash> dis -l cpu_v7_switch_mm
0xc0019820 <cpu_v7_switch_mm>: mov     r2, #0
0xc0019824 <cpu_v7_switch_mm+4>:        ldr     r1, [r1, #348] ; 0x15c
0xc0019828 <cpu_v7_switch_mm+8>:        orr     r0, r0, #106    ; 0x6a
0xc001982c <cpu_v7_switch_mm+12>:       mrc     15, 0, r2, cr2, cr0, {1}
0xc0019830 <cpu_v7_switch_mm+16>:       mcr     15, 0, r2, cr2, cr0, {0}
0xc0019834 <cpu_v7_switch_mm+20>:       isb     sy
0xc0019838 <cpu_v7_switch_mm+24>:       mcr     15, 0, r1, cr13, cr0, {1}
0xc001983c <cpu_v7_switch_mm+28>:       isb     sy
0xc0019840 <cpu_v7_switch_mm+32>:       mcr     15, 0, r0, cr2, cr0, {0}
0xc0019844 <cpu_v7_switch_mm+36>:       isb     sy
0xc0019848 <cpu_v7_switch_mm+40>:       mov     pc, lr

oops中会打印出Table: adaec04a

从CP15协处理器的C2读出的内容为：
asm("mrc p15, 0, %0, c2, c0\n\t"
: "=r" (transbase));
Table: adaec04a也就是TTB寄存器的内容是： adaec04a
又TTB的基地址对应31:14即adaec000
31:14 Pointer to first-level translation table base. Read/write.
crash> eval (adaec04a >> 14)
hexadecimal: 2b6bb
crash> eval (2b6bb << 14)
hexadecimal: adaec000 (2845616KB)
基地址为：PA: adaec000

crash> current_mm

PER-CPU DATA TYPE:
struct mm_struct *current_mm;
PER-CPU ADDRESSES:
[1]: c0e541a0
这里得到是指针变量的地址，要得到mm_struct的内容必须：
crash> rd c0e541a0
c0e541a0: edb82be0 .+..
crash> mm_struct.pgd edb82be0
pgd = 0xedaec000

这里得到的pgd 是虚拟地址，转换为物理地址：
virtual to pyhsical:
0xedaec000-0xc0000000+0x80000000 = adaec000;
这个结果和从TTB读出的内容是一致的。

MMU是如何把VA映射为PA的呢？ Translation Table Walk:整个过程是MMU硬件完成的，只需设定好TTB。

首先把32的VA分为3段，前两段[31:20]和[19:12]作为两次查表的索引，第三段[11:0]作为页内的偏移。
1. CP15协处理器的TTB c2保存着第一级页表的基地址，这个地址是PA，也就是说页表是直接按这个地址存在物理内存中的；
2. 以TTB做为基地址，[31:20]作为index,得到第一级页表基地址的PA，不是虚拟地址
3. 以第一级页表基地址的PA为基地址，[19:12]为index得到第二级页表的基地址 PA
4. 以第二级页表基地址的PA为基地址，[11:0]为offset得到物理地址

Here we just switch the register state and the stack.

switch_to(prev, next, prev);

extern struct task_struct *__switch_to(struct task_struct *, struct thread_info *, struct thread_info *);

#define switch_to(prev,next,last)                   \
do {                                   \
   last = __switch_to(prev,task_thread_info(prev), task_thread_info(next));   \
} while (0)

/*
* Register switch for ARMv3 and ARMv4 processors
* r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info
* previous and next are guaranteed not to be the same.
*/
ENTRY(__switch_to)
UNWIND(.fnstart   )
UNWIND(.cantunwind   )
   add   ip, r1, #TI_CPU_SAVE
   ldr   r3, [r2, #TI_TP_VALUE]
ARM(   stmia   ip!, {r4 - sl, fp, sp, lr} )   @ Store most regs on stack
   set_tls   r3, r4, r5
   mov   r5, r0
   add   r4, r2, #TI_CPU_SAVE
   ldr   r0, =thread_notify_head
   mov   r1, #THREAD_NOTIFY_SWITCH
   bl   atomic_notifier_call_chain

   mov   r0, r5
ARM(   ldmia   r4, {r4 - sl, fp, sp, pc} )   @ Load all regs saved previously
UNWIND(.fnend       )
ENDPROC(__switch_to)

../arch/arm/kernel/entry-armv.S
* r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info

struct thread_info {
   [28] struct cpu_context_save cpu_context;
}
add     r12, r1, #28 //r12指向 previous thread_info的cpu_context成员
struct cpu_context_save {
    __u32 r4;
    __u32 r5;
    __u32 r6;
    __u32 r7;
    __u32 r8;
    __u32 r9;
    __u32 sl;
    __u32 fp;
    __u32 sp;
    __u32 pc;
    __u32 extra[2];
}
SIZE: 48

0xc000dea8 <__switch_to>:       add     r12, r1, #28
struct thread_info {
   [96] unsigned long tp_value;//这里存储的是什么值？
}
0xc000deac <__switch_to+4>:     ldr     r3, [r2, #96]   ; 0x60
把寄存器{r4, r5, r6, r7, r8, r9, r10, r11, sp, lr}保存到thread_info的cpu_context_save
0xc000deb0 <__switch_to+8>:     stmia   r12!, {r4, r5, r6, r7, r8, r9, r10, r11, sp, lr}

将next 进程的tp_value 和cpu_domain 写入arm 的TP 和域寄存器。
0xc000deb4 <__switch_to+12>:    mcr     15, 0, r3, cr13, cr0, {3}
0xc000deb8 <__switch_to+16>:    mov     r4, #0
0xc000debc <__switch_to+20>:    mcr     15, 0, r4, cr13, cr0, {2}
0xc000dec0 <__switch_to+24>:    mov     r5, r0

/*把next thread_info的cpu_context_save赋值到寄存器 xxxx*/
0xc000dec4 <__switch_to+28>:    add     r4, r2, #28
0xc000dec8 <__switch_to+32>:    ldr     r0, [pc, #12]   ; 0xc000dedc
0xc000decc <__switch_to+36>:    mov     r1, #2
0xc000ded0 <__switch_to+40>:    bl      0xc0047dd4 <atomic_notifier_call_chain>

0xc000ded4 <__switch_to+44>:    mov     r0, r5
0xc000ded8 <__switch_to+48>:    ldm     r4, {r4, r5, r6, r7, r8, r9, r10, r11, sp, pc}
注：根据ATPCS ，r0~r2 用于传递参数，而r4~r11 全部由子程序负责保存（这个和x86 一人负责一部分的方式还不太一样）。所以r4 中的值可以继续使用，而r0 的值需暂时保存到r5 中。