Linux 上下文切换的实现

上下文切换的具体步骤

场景:进程A下CPU,进程B上CPU
1. 保存进程A的上下文环境(程序计数器,程序状态字,其他寄存器…)
2. 用新状态和其他相关信息更新进程A的PCB
3. 把进程A移至合适的队列(就绪,阻塞…)
4. 将进程B的状态设置为运行态
5. 从进程B的PCB中恢复上下文(程序计数器,程序状态字,其他寄存器…)

Ref: 陈奶奶2018.3.29的课件

上下文切换的代码实现

schedule 调度函数

// Ref: linux-2.6.0\kernel\sched.c
/*
 * schedule() is the main scheduler function.
 */
asmlinkage void schedule(void)
{
    task_t *prev, *next;
    runqueue_t *rq;
    prio_array_t *array;
    struct list_head *queue;
    unsigned long long now;
    unsigned long run_time;
    int idx;

    /*
     * Test if we are atomic.  Since do_exit() needs to call into
     * schedule() atomically, we ignore that path for now.
     * Otherwise, whine if we are scheduling when we should not be.
     */
    if (likely(!(current->state & (TASK_DEAD | TASK_ZOMBIE)))) {
        if (unlikely(in_atomic())) {
            printk(KERN_ERR "bad: scheduling while atomic!\n");
            dump_stack();
        }
    }

need_resched:
    preempt_disable();
    prev = current;
    rq = this_rq();

    release_kernel_lock(prev);
    now = sched_clock();
    if (likely(now - prev->timestamp < NS_MAX_SLEEP_AVG))
        run_time = now - prev->timestamp;
    else
        run_time = NS_MAX_SLEEP_AVG;

    /*
     * Tasks with interactive credits get charged less run_time
     * at high sleep_avg to delay them losing their interactive
     * status
     */
    if (HIGH_CREDIT(prev))
        run_time /= (CURRENT_BONUS(prev) ? : 1);

    spin_lock_irq(&rq->lock);

    /*
     * if entering off of a kernel preemption go straight
     * to picking the next task.
     */
    if (unlikely(preempt_count() & PREEMPT_ACTIVE))
        goto pick_next_task;

    switch (prev->state) {
    case TASK_INTERRUPTIBLE:
        if (unlikely(signal_pending(prev))) {
            prev->state = TASK_RUNNING;
            break;
        }
    default:
        deactivate_task(prev, rq);
        prev->nvcsw++;
        break;
    case TASK_RUNNING:
        prev->nivcsw++;
    }
pick_next_task:
    if (unlikely(!rq->nr_running)) {
#ifdef CONFIG_SMP
        load_balance(rq, 1, cpu_to_node_mask(smp_processor_id()));
        if (rq->nr_running)
            goto pick_next_task;
#endif
        next = rq->idle;
        rq->expired_timestamp = 0;
        goto switch_tasks;
    }

    array = rq->active;
    if (unlikely(!array->nr_active)) {
        /*
         * Switch the active and expired arrays.
         */
        rq->active = rq->expired;
        rq->expired = array;
        array = rq->active;
        rq->expired_timestamp = 0;
    }

    idx = sched_find_first_bit(array->bitmap);
    queue = array->queue + idx;
    next = list_entry(queue->next, task_t, run_list);

    if (next->activated > 0) {
        unsigned long long delta = now - next->timestamp;

        if (next->activated == 1)
            delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;

        array = next->array;

        dequeue_task(next, array);
        recalc_task_prio(next, next->timestamp + delta);
        enqueue_task(next, array);
    }
    next->activated = 0;
switch_tasks:
    prefetch(next);
    clear_tsk_need_resched(prev);
    RCU_qsctr(task_cpu(prev))++;

    prev->sleep_avg -= run_time;
    if ((long)prev->sleep_avg <= 0){
        prev->sleep_avg = 0;
        if (!(HIGH_CREDIT(prev) || LOW_CREDIT(prev)))
            prev->interactive_credit--;
    }
    prev->timestamp = now;

    if (likely(prev != next)) {
        // 检查prev和next是否为同一个进程      
        next->timestamp = now;
        rq->nr_switches++;
        rq->curr = next;

        prepare_arch_switch(rq, next);
        // Context Switching!
        prev = context_switch(rq, prev, next);
        barrier();

        finish_task_switch(prev);
    } else
        spin_unlock_irq(&rq->lock);

    reacquire_kernel_lock(current);
    preempt_enable_no_resched();
    if (test_thread_flag(TIF_NEED_RESCHED))
        goto need_resched;
}

Context_switch()进程上下文切换

调用switch_mm(),把虚拟内存从一个进程映射切换到新进程中。主要包括加载页表, 刷出地址转换后备缓冲器(部分或者全部), 向内存管理单元(MMU)提供新的信息
调用switch_to(),从上一个进程的处理器状态切换到新进程的处理器状态。包括保存、恢复栈信息和寄存器信息

// Ref: linux-2.6.0\kernel\sched.c
/*
 * context_switch - switch to the new MM and the new
 * thread's register state.
 */
static inline task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next)
{
    struct mm_struct *mm = next->mm;
    struct mm_struct *oldmm = prev->active_mm;

    if (unlikely(!mm)) {
        // 内核线程无虚拟地址空间,mm = NULL
        // 内核线程的active_mm为上一个进程的mm
        next->active_mm = oldmm;
        // 增加mm的引用计数
        atomic_inc(&oldmm->mm_count);
        enter_lazy_tlb(oldmm, next);
    } else
        // 不是内核线程, 则需要切切换虚拟地址空间
        switch_mm(oldmm, mm, next);

    if (unlikely(!prev->mm)) {
        prev->active_mm = NULL;
        WARN_ON(rq->prev_mm);
        rq->prev_mm = oldmm;
    }

    /* Here we just switch the register state and the stack. */
    // 切换进程的执行环境,包括堆栈和寄存器
    switch_to(prev, next, prev);

    return prev;
}

Switch_to() 函数

switch_mm()switch_to()是体系结构相关的代码,以x86为例来展示代码。
内核在switch_to中执行如下操作
1. 进程切换, 即rsp的切换, 从rsp可以找到进程的描述符
2. 硬件上下文切换,jmp到__switch_to函数

// Ref: linux-2.6.0\include\asm-x86_64\system.h

#define switch_to(prev,next,last) \
    asm volatile(SAVE_CONTEXT                           \
             // 保存旧进程的RSP
             "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */   \
             // 用新进程的RSP填入RSP寄存器,切换内核栈
             "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */    \
             "call __switch_to\n\t"                   \
             ".globl thread_return\n"                   \
             "thread_return:\n\t"                       \
             "movq %%gs:%P[pda_pcurrent],%%rsi\n\t"           \
             "movq %P[thread_info](%%rsi),%%r8\n\t"           \
             "btr  %[tif_fork],%P[ti_flags](%%r8)\n\t"            \
             "movq %%rax,%%rdi\n\t"                       \
             "jc   ret_from_fork\n\t"                     \

             RESTORE_CONTEXT                            \
             : "=a" (last)                        \
             : [next] "S" (next), [prev] "D" (prev),              \
               [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \
               [ti_flags] "i" (offsetof(struct thread_info, flags)),\
               [tif_fork] "i" (TIF_FORK),             \
               [thread_info] "i" (offsetof(struct task_struct, thread_info)), \
               [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent))   \

             : "memory", "cc" __EXTRA_CLOBBER)

extern void load_gs_index(unsigned); 
// Ref: linux-2.6.0\include\asm-x86_64\thread_info.h
struct thread_info {
    struct task_struct  *task;      /* main task structure */
    struct exec_domain  *exec_domain;   /* execution domain */
    __u32           flags;      /* low level flags */
    __u32           status;     /* thread synchronous flags */
    __u32           cpu;        /* current CPU */
    int             preempt_count;

    mm_segment_t        addr_limit; 
    struct restart_block    restart_block;
};
// Ref: linux-2.6.0\include\asm-x86_64\system.h
#define RESTORE_CONTEXT \
    __RESTORE(rbx, 12) __RESTORE(rdi,  1)                   \
    __RESTORE(rdx,  6) __RESTORE(rcx,  7)                   \
    __RESTORE(r12,  2) __RESTORE(r13,  3)                   \
    __RESTORE(r14,  4) __RESTORE(r15,  5)                   \
    __RESTORE(r10, 10) __RESTORE(r11, 11)                   \
    __RESTORE(r8,   8) __RESTORE(r9,   9)                   \
    __RESTORE(rbp, 13) __RESTORE(rsi, 0)                        \
    "addq $14*8,%%rsp\n\t"                             \
    "popfq\n\t"

#define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"

Ref: https://blog.csdn.net/gatieme/article/details/51872659

  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值