schedule mm_struct and CPU register

/******************/
进程切换函数schedule在文件  kernel/sched/core.c中,
asmlinkage void __sched schedule(void)
{
    struct task_struct *tsk = current;
    
    sched_submit_work(tsk);/*不关注该函数*/
    __schedule();
}

/*

__schedule() is the main scheduler function.

 */
static void __sched __schedule(void)
{
    struct task_struct *prev, *next;
    unsigned long *switch_count;
    struct rq *rq;
    int cpu;

    cpu = smp_processor_id();
    rq = cpu_rq(cpu);
    prev = rq->curr;

    switch_count = &prev->nivcsw;

    pre_schedule(rq, prev);
/*
 * idle_balance is called by schedule() if this_cpu is about to become
 * idle. Attempts to pull tasks from other CPUs.
 */
    if (unlikely(!rq->nr_running))
        idle_balance(cpu, rq);

    put_prev_task(rq, prev);
    next = pick_next_task(rq);

    if (likely(prev != next)) {
        rq->nr_switches++;
        rq->curr = next;
        ++*switch_count;
        /*这里就切换走,不返回了吗?*/
        context_switch(rq, prev, next); /* unlocks the rq */
        /*
         * The context switch have flipped the stack from under us
         * and restored the local variables which were saved when
         * this task called schedule() in the past. prev == current
         * is still correct, but it can be moved to another cpu/rq.
         */
        cpu = smp_processor_id();
        rq = cpu_rq(cpu);
    }
}

得到cpu_id,得到该cpu_id对应的runqueues, 得到runqueues 当前的task_struct

#define smp_processor_id() raw_smp_processor_id()
#define raw_smp_processor_id() (current_thread_info()->cpu)

#define cpu_rq(cpu)        (&per_cpu(runqueues, (cpu)))
 /*根据每CPU变量:runqueues and CPU ID得到当前的 task_struct*/
    crash> runqueues
    PER-CPU DATA TYPE:
      struct rq runqueues;
    PER-CPU ADDRESSES:
      [1]: c0e54e00

    crash> struct rq.cpu c0e54e00
      cpu = 1
    crash> struct rq.nr_running c0e54e00
      nr_running = 1
    crash> struct rq.curr c0e54e00
      curr = 0xee268120

有关task_struct的shed_class成员
从下面可知:从task_struct可以得到sched_class, sched_class对应一组函数
static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
{
    if (prev->sched_class->pre_schedule)
        prev->sched_class->pre_schedule(rq, prev);
}
crash> task_struct.sched_class 0xee268120
  sched_class = 0xc0454e24 <fair_sched_class>
crash> sched_class 0xc0454e24
struct sched_class {
  next = 0xc0454dc8 <idle_sched_class>,
  enqueue_task = 0xc0051a14 <enqueue_task_fair>,
  dequeue_task = 0xc0050f10 <dequeue_task_fair>,
  yield_task = 0xc0050cb8 <yield_task_fair>,
  yield_to_task = 0xc0050d58 <yield_to_task_fair>,
  check_preempt_curr = 0xc0050b30 <check_preempt_wakeup>,
  pick_next_task = 0xc0050e4c <pick_next_task_fair>,
  put_prev_task = 0xc00519a0 <put_prev_task_fair>,
  select_task_rq = 0xc00511f8 <select_task_rq_fair>,
  pre_schedule = 0x0,
  post_schedule = 0x0,
  task_waking = 0xc00501a0 <task_waking_fair>,
  task_woken = 0x0,
  set_cpus_allowed = 0x0,
  rq_online = 0xc005118c <rq_online_fair>,
  rq_offline = 0xc0051178 <rq_offline_fair>,
  set_curr_task = 0xc0050e08 <set_curr_task_fair>,
  task_tick = 0xc0050a58 <task_tick_fair>,
  task_fork = 0xc0050938 <task_fork_fair>,
  switched_from = 0xc0050528 <switched_from_fair>,
  switched_to = 0xc005081c <switched_to_fair>,
  prio_changed = 0xc00507d4 <prio_changed_fair>,
  get_rr_interval = 0xc0050438 <get_rr_interval_fair>
}

通过函数pick_next_task(rq)从rq中得到另一个task_struct,然后调用context_switch(rq, prev, next)。
/*

context_switch - switch to the new MM and the new  thread's register state.

 */
static inline void
context_switch(struct rq *rq, struct task_struct *prev,
           struct task_struct *next)
{
    struct mm_struct *mm, *oldmm;

    mm = next->mm;
    oldmm = prev->active_mm;

    if (!mm) {
        next->active_mm = oldmm;
        atomic_inc(&oldmm->mm_count);
        enter_lazy_tlb(oldmm, next);/*for arm: NULL*/
    } else
        switch_mm(oldmm, mm, next);

    if (!prev->mm) {
        prev->active_mm = NULL;
        rq->prev_mm = oldmm;
    }


    /* Here we just switch the register state and the stack. */
    switch_to(prev, next, prev);

    barrier();

}

context_switch分为两部分switch_mm(oldmm, mm, next) and switch_to(prev, next, prev);

switch_mm

switch_mm(struct mm_struct *prev, struct mm_struct *next,
      struct task_struct *tsk)
{

    unsigned int cpu = smp_processor_id();

    /* check for possible thread migration */
    if (!cpumask_empty(mm_cpumask(next)) &&
        !cpumask_test_cpu(cpu, mm_cpumask(next)))
        __flush_icache_all();
    if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) {
        /*这里又引入了每CPU变量current_mm,更新current_mm*/
        struct mm_struct **crt_mm = &per_cpu(current_mm, cpu);
        *crt_mm = next;
        check_context(next);
        cpu_switch_mm(next->pgd, next);
        if (cache_is_vivt())
            cpumask_clear_cpu(cpu, mm_cpumask(prev));
    }
}

switch_mm -> cpu_switch_mm:
/*这个宏定义,隐含的把mm_struct的pgd改变为  物理地址*/
#define cpu_switch_mm(pgd, mm) cpu_do_switch_mm(virt_to_phys(pgd), mm)
extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);

cpu_do_switch_mm的内容,不管怎么说肯定是设置TTB寄存器

#define cpu_do_switch_mm        __glue(CPU_NAME,_switch_mm)
    ENTRY(cpu_v7_switch_mm)
    #ifdef CONFIG_MMU
        mov    r2, #0
        ldr    r1, [r1, #MM_CONTEXT_ID]    @ get mm->context.id
        ALT_SMP(orr    r0, r0, #TTB_FLAGS_SMP)
        ALT_UP(orr    r0, r0, #TTB_FLAGS_UP)

        mrc    p15, 0, r2, c2, c0, 1        @ load TTB 1
        mcr    p15, 0, r2, c2, c0, 0        @ into TTB 0
        isb

        mcr    p15, 0, r1, c13, c0, 1        @ set context ID
        isb
        mcr    p15, 0, r0, c2, c0, 0        @ set TTB 0
        isb
    #endif
        mov    pc, lr
    ENDPROC(cpu_v7_switch_mm)
crash> dis -l cpu_v7_switch_mm
0xc0019820 <cpu_v7_switch_mm>:  mov     r2, #0
0xc0019824 <cpu_v7_switch_mm+4>:        ldr     r1, [r1, #348]  ; 0x15c
0xc0019828 <cpu_v7_switch_mm+8>:        orr     r0, r0, #106    ; 0x6a
0xc001982c <cpu_v7_switch_mm+12>:       mrc     15, 0, r2, cr2, cr0, {1}
0xc0019830 <cpu_v7_switch_mm+16>:       mcr     15, 0, r2, cr2, cr0, {0}
0xc0019834 <cpu_v7_switch_mm+20>:       isb     sy
0xc0019838 <cpu_v7_switch_mm+24>:       mcr     15, 0, r1, cr13, cr0, {1}
0xc001983c <cpu_v7_switch_mm+28>:       isb     sy
0xc0019840 <cpu_v7_switch_mm+32>:       mcr     15, 0, r0, cr2, cr0, {0}
0xc0019844 <cpu_v7_switch_mm+36>:       isb     sy
0xc0019848 <cpu_v7_switch_mm+40>:       mov     pc, lr

oops中会打印出Table: adaec04a

从CP15协处理器的C2读出的内容为:
    asm("mrc p15, 0, %0, c2, c0\n\t"
         : "=r" (transbase));
Table: adaec04a也就是TTB寄存器的内容是: adaec04a
又TTB的基地址对应31:14即adaec000
31:14 Pointer to first-level translation table base. Read/write.
crash> eval (adaec04a >> 14)
hexadecimal: 2b6bb  
crash> eval (2b6bb << 14)
hexadecimal: adaec000  (2845616KB)
基地址为:PA: adaec000

crash> current_mm

PER-CPU DATA TYPE:
  struct mm_struct *current_mm;
PER-CPU ADDRESSES:
  [1]: c0e541a0
这里得到是指针变量的地址,要得到mm_struct的内容必须:
crash> rd c0e541a0
c0e541a0:  edb82be0                              .+..
crash> mm_struct.pgd edb82be0
  pgd = 0xedaec000

这里得到的pgd 是虚拟地址,转换为物理地址:
virtual to pyhsical:
0xedaec000-0xc0000000+0x80000000 = adaec000;
这个结果和从TTB读出的内容是一致的。

MMU是如何把VA映射为PA的呢? Translation Table Walk:整个过程是MMU硬件完成的,只需设定好TTB。

首先把32的VA分为3段,前两段[31:20]和[19:12]作为两次查表的索引, 第三段[11:0]作为页内的偏移。
1. CP15协处理器的TTB c2保存着第一级页表的基地址,这个地址是PA,也就是说页表是直接按这个地址存在物理内存中的;
2. 以TTB做为基地址,[31:20]作为index,得到第一级页表基地址的PA,不是虚拟地址
3. 以第一级页表基地址的PA为基地址,[19:12]为index得到第二级页表的基地址 PA
4. 以第二级页表基地址的PA为基地址,[11:0]为offset得到物理地址

 Here we just switch the register state and the stack.

switch_to(prev, next, prev);

extern struct task_struct *__switch_to(struct task_struct *, struct thread_info *, struct thread_info *);

#define switch_to(prev,next,last)                    \
do {                                    \
    last = __switch_to(prev,task_thread_info(prev), task_thread_info(next));    \
} while (0)

/*
 * Register switch for ARMv3 and ARMv4 processors
 * r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info
 * previous and next are guaranteed not to be the same.
 */
ENTRY(__switch_to)
 UNWIND(.fnstart    )
 UNWIND(.cantunwind    )
    add    ip, r1, #TI_CPU_SAVE
    ldr    r3, [r2, #TI_TP_VALUE]
 ARM(    stmia    ip!, {r4 - sl, fp, sp, lr} )    @ Store most regs on stack
    set_tls    r3, r4, r5
    mov    r5, r0
    add    r4, r2, #TI_CPU_SAVE
    ldr    r0, =thread_notify_head
    mov    r1, #THREAD_NOTIFY_SWITCH
    bl    atomic_notifier_call_chain

    mov    r0, r5
 ARM(    ldmia    r4, {r4 - sl, fp, sp, pc}  )    @ Load all regs saved previously
 UNWIND(.fnend        )
ENDPROC(__switch_to)

../arch/arm/kernel/entry-armv.S
 * r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info

struct thread_info {
   [28] struct cpu_context_save cpu_context;
}
add     r12, r1, #28 //r12指向 previous thread_info的cpu_context成员
struct cpu_context_save {
    __u32 r4;
    __u32 r5;
    __u32 r6;
    __u32 r7;
    __u32 r8;
    __u32 r9;
    __u32 sl;
    __u32 fp;
    __u32 sp;
    __u32 pc;
    __u32 extra[2];
}
SIZE: 48

0xc000dea8 <__switch_to>:       add     r12, r1, #28
struct thread_info {
   [96] unsigned long tp_value;//这里存储的是什么值?
}
0xc000deac <__switch_to+4>:     ldr     r3, [r2, #96]   ; 0x60
把寄存器{r4, r5, r6, r7, r8, r9, r10, r11, sp, lr}保存到thread_info的cpu_context_save
0xc000deb0 <__switch_to+8>:     stmia   r12!, {r4, r5, r6, r7, r8, r9, r10, r11, sp, lr}

将next 进程的tp_value 和cpu_domain 写入arm 的TP 和域寄存器。
0xc000deb4 <__switch_to+12>:    mcr     15, 0, r3, cr13, cr0, {3}
0xc000deb8 <__switch_to+16>:    mov     r4, #0
0xc000debc <__switch_to+20>:    mcr     15, 0, r4, cr13, cr0, {2}
0xc000dec0 <__switch_to+24>:    mov     r5, r0

/*把next thread_info的cpu_context_save赋值到寄存器 xxxx*/
0xc000dec4 <__switch_to+28>:    add     r4, r2, #28
0xc000dec8 <__switch_to+32>:    ldr     r0, [pc, #12]   ; 0xc000dedc
0xc000decc <__switch_to+36>:    mov     r1, #2
0xc000ded0 <__switch_to+40>:    bl      0xc0047dd4 <atomic_notifier_call_chain>

0xc000ded4 <__switch_to+44>:    mov     r0, r5
0xc000ded8 <__switch_to+48>:    ldm     r4, {r4, r5, r6, r7, r8, r9, r10, r11, sp, pc}
注:根据ATPCS ,r0~r2 用于传递参数,而r4~r11 全部由子程序负责保存(这个和x86 一人负责一部分的方式还不太一样)。所以r4 中的值可以继续使用,而r0 的值需暂时保存到r5 中。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值