SMP多核启动以及CPU热插拔驱动

转自:http://blog.csdn.net/21cnbao/article/details/8545088


4.   SMP多核启动以及CPU热插拔驱动

在Linux系统中,对于多核的ARM芯片而言,Bootrom代码中,CPU0会率先起来,引导Bootloader和Linux内核执行,而其他的核则在上电时Bootrom一般将自身置于WFI或者WFE状态,并等待CPU0给其发CPU核间中断(IPI)或事件(一般透过SEV指令)唤醒之。一个典型的启动过程如下图:


被CPU0唤醒的CPUn可以在运行过程中进行热插拔。譬如运行如下命令即可卸载CPU1并且将CPU1上的任务全部迁移到其他CPU:

# echo 0 >/sys/devices/system/cpu/cpu1/online

同样地,运行如下命令可以再次启动CPU1:

# echo 1 >/sys/devices/system/cpu/cpu1/online

之后CPU1会主动参与系统中各个CPU之间要运行任务的负载均衡工作。

CPU0唤醒其他 CPU的动作在内核中被封装为一个smp_operations的结构体,该结构体的成员如下:

[cpp]  view plain  copy
  1.  83struct smp_operations {  
  2.   
  3.  84#ifdef CONFIG_SMP  
  4.   
  5.  85       /* 
  6.  
  7.  86        * Setup the set of possible CPUs (via set_cpu_possible) 
  8.  
  9.  87        */  
  10.   
  11.  88       void (*smp_init_cpus)(void);  
  12.   
  13.  89       /* 
  14.  
  15.  90        * Initialize cpu_possible map, and enable coherency 
  16.  
  17.  91        */  
  18.   
  19.  92       void (*smp_prepare_cpus)(unsigned int max_cpus);  
  20.   
  21.  93  
  22.   
  23.  94       /* 
  24.  
  25.  95        * Perform platform specific initialisation of the specified CPU. 
  26.  
  27.  96        */  
  28.   
  29.  97       void (*smp_secondary_init)(unsigned int cpu);  
  30.   
  31.  98       /* 
  32.  
  33.  99         * Boot a secondary CPU, and assign it thespecified idle task. 
  34.  
  35. 100        * This also gives us the initial stack to use for this CPU. 
  36.  
  37. 101        */  
  38.   
  39. 102       int (*smp_boot_secondary)(unsigned int cpu, struct task_struct *idle);  
  40.   
  41. 103#ifdef CONFIG_HOTPLUG_CPU  
  42.   
  43. 104       int  (*cpu_kill)(unsigned intcpu);  
  44.   
  45. 105       void (*cpu_die)(unsigned int cpu);  
  46.   
  47. 106       int  (*cpu_disable)(unsigned intcpu);  
  48.   
  49. 107#endif  
  50.   
  51. 108#endif  
  52.   
  53. 109};  
我们从arch/arm/mach-vexpress/v2m.c看到VEXPRESS电路板用到的smp_ops为vexpress_smp_ops:

666DT_MACHINE_START(VEXPRESS_DT,"ARM-Versatile Express")

 667       .dt_compat      = v2m_dt_match,

 668       .smp            =smp_ops(vexpress_smp_ops),

 669       .map_io         = v2m_dt_map_io,

 670       .init_early     =v2m_dt_init_early,

 671       .init_irq       = v2m_dt_init_irq,

 672       .timer          =&v2m_dt_timer,

 673       .init_machine   = v2m_dt_init,

 674       .handle_irq     = gic_handle_irq,

 675       .restart        = v2m_restart,

 676MACHINE_END

透过arch/arm/mach-vexpress/platsmp.c的实现代码可以看出,smp_operations的成员函数smp_init_cpus() 即vexpress_smp_init_cpus()会探测SoC内CPU核的个数,并设置了核间通信的方式为gic_raise_softirq()。可见于vexpress_smp_init_cpus()中调用的vexpress_dt_smp_init_cpus():

103staticvoid __init vexpress_dt_smp_init_cpus(void)

 104{

 

128        for (i = 0; i < ncores; ++i)

 129                set_cpu_possible(i, true);

 130

 131       set_smp_cross_call(gic_raise_softirq);

 132}

而smp_operations的成员函数smp_prepare_cpus()即vexpress_smp_prepare_cpus()则会透过v2m_flags_set(virt_to_phys(versatile_secondary_startup))设置其他CPU的启动地址为versatile_secondary_startup:

179staticvoid __init vexpress_smp_prepare_cpus(unsigned int max_cpus)

 180{

 181       …

 189

 190       /*

 191        * Write the address of secondary startup into the

 192        * system-wide flags register. The boot monitor waits

 193        * until it receives a soft interrupt, and then the

 194        * secondary CPU branches to this address.

 195        */

 196       v2m_flags_set(virt_to_phys(versatile_secondary_startup));

 197}

注意这部分的具体实现方法是SoC相关的,由芯片的设计以及芯片内部的Bootrom决定。对于VEXPRESS来讲,设置方法如下:

139void__init v2m_flags_set(u32 data)

 140{

 141       writel(~0, v2m_sysreg_base + V2M_SYS_FLAGSCLR);

 142       writel(data, v2m_sysreg_base + V2M_SYS_FLAGSSET);

 143}

即填充v2m_sysreg_base +V2M_SYS_FLAGSCLR地址为0xFFFFFFFF,将其他CPU初始启动执行的指令地址填入v2m_sysreg_base +V2M_SYS_FLAGSSET。这2个地址属于芯片实现时候设定的。填入的CPUn的起始地址都透过virt_to_phys()转化为物理地址,因为此时CPUn的MMU尚未开启。

比较关键的是smp_operations的成员函数smp_boot_secondary(),它完成最终的CPUn的唤醒工作:

  27static void __cpuinit write_pen_release(intval)

  28{

  29       pen_release = val;

  30       smp_wmb();

  31       __cpuc_flush_dcache_area((void *)&pen_release, sizeof(pen_release));

  32       outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1));

  33}

 

  59int __cpuinitversatile_boot_secondary(unsigned int cpu, struct task_struct *idle)

  60{

  61       unsigned long timeout;

  62

  63       /*

  64        * Set synchronisation state between this boot processor

  65        * and the secondary one

  66        */

  67       spin_lock(&boot_lock);

  68

  69       /*

  70        * This is really belt and braces; we hold unintended secondary

  71        * CPUs in the holding pen until we're ready for them.  However,

  72        * since we haven't sent them a soft interrupt, they shouldn't

  73        * be there.

  74        */

  75       write_pen_release(cpu_logical_map(cpu));

  76

  77       /*

  78        * Send the secondary CPU a soft interrupt, thereby causing

  79        * the boot monitor to read the system wide flags register,

  80        * and branch to the address found there.

  81        */

  82       gic_raise_softirq(cpumask_of(cpu), 0);

  83

  84       timeout = jiffies + (1 * HZ);

  85       while (time_before(jiffies, timeout)) {

  86                smp_rmb();

  87                if (pen_release == -1)

  88                        break;

  89

  90                udelay(10);

  91       }

  92

  93       /*

  94        * now the secondary core is starting up let it run its

  95        * calibrations, then wait for it to finish

  96        */

  97       spin_unlock(&boot_lock);

  98

  99       return pen_release != -1 ? -ENOSYS : 0;

 100}

上述代码中高亮的部分首先会将pen_release变量设置为要唤醒的CPU核的CPU号cpu_logical_map(cpu),而后透过gic_raise_softirq(cpumask_of(cpu), 0)给CPUcpu发起0号IPI,这个时候,CPUcpu核会从前面smp_operations中的smp_prepare_cpus()成员函数即vexpress_smp_prepare_cpus()透过v2m_flags_set()设置的其他CPU核的起始地址versatile_secondary_startup开始执行,如果顺利的话,该CPU会将原先为正数的pen_release写为-1,以便CPU0从等待pen_release成为-1的循环中跳出。

versatile_secondary_startup实现于arch/arm/plat-versatile/headsmp.S,是一段汇编:

  21ENTRY(versatile_secondary_startup)

  22       mrc     p15, 0, r0, c0, c0, 5

  23       and     r0, r0, #15

  24       adr     r4, 1f

  25       ldmia   r4, {r5, r6}

  26       sub     r4, r4, r5

  27       add     r6, r6, r4

  28pen:   ldr     r7, [r6]

  29       cmp     r7, r0

  30       bne     pen

  31

  32       /*

  33        * we've been released from the holding pen: secondary_stack

  34        * should now contain the SVC stack for this core

  35        */

  36       b       secondary_startup

  37

  38       .align

  391:     .long   .

  40       .long   pen_release

  41ENDPROC(versatile_secondary_startup)

第1段高亮的部分实际上是等待pen_release成为CPU0设置的cpu_logical_map(cpu),一般直接就成立了。第2段高亮的部分则调用到内核通用的secondary_startup()函数,经过一系列的初始化如MMU等,最终新的被唤醒的CPU将调用到smp_operations的smp_secondary_init()成员函数,对于本例为versatile_secondary_init():

  37void __cpuinitversatile_secondary_init(unsigned int cpu)

  38{

  39       /*

  40        * if any interrupts are already enabled for the primary

  41        * core (e.g. timer irq), then they will not have been enabled

  42        * for us: do so

  43        */

  44       gic_secondary_init(0);

  45

  46       /*

  47        * let the primary processor know we're out of the

  48        * pen, then head off into the C entry point

  49        */

  50       write_pen_release(-1);

  51

  52       /*

  53        * Synchronise with the boot thread.

  54        */

  55       spin_lock(&boot_lock);

  56       spin_unlock(&boot_lock);

  57}

上述代码中高亮的那1行会将pen_release写为-1,于是CPU0还在执行的versatile_boot_secondary()函数中的如下循环就退出了:

  85       while (time_before(jiffies, timeout)) {

  86                smp_rmb();

  87                if (pen_release == -1)

  88                        break;

  89

  90                udelay(10);

  91       }

此后CPU0和新唤醒的其他CPU各自狂奔。整个系统在运行过程中会进行实时进程和正常进程的动态负载均衡。

CPU hotplug的实现也是芯片相关的,对于VEXPRESS而言,实现了smp_operations的cpu_die()成员函数即vexpress_cpu_die()。它会在进行CPUn的拔除操作时将CPUn投入低功耗的WFI状态,相关代码位于arch/arm/mach-vexpress/hotplug.c:

  90void __ref vexpress_cpu_die(unsigned intcpu)

  91{

  92       int spurious = 0;

  93

  94       /*

  95        * we're ready for shutdown now, so do it

  96        */

  97       cpu_enter_lowpower();

  98       platform_do_lowpower(cpu, &spurious);

  99

 100       /*

 101        * bring this CPU back into the world of cache

 102         * coherency, and then restore interrupts

 103        */

 104       cpu_leave_lowpower();

 105

 106       if (spurious)

 107                pr_warn("CPU%u: %uspurious wakeup calls\n", cpu, spurious);

 108}

  57static inline void platform_do_lowpower(unsignedint cpu, int *spurious)

  58{

  59       /*

  60        * there is no power-control hardware on this platform, so all

  61        * we can do is put the core into WFI; this is safe as the calling

  62        * code will have already disabled interrupts

  63        */

  64       for (;;) {

  65                wfi();

  66

  67                if (pen_release ==cpu_logical_map(cpu)) {

  68                        /*

  69                         * OK, proper wakeup,we're done

  70                         */

  71                        break;

  72                }

  73

  74                /*

  75                 * Getting here, means that wehave come out of WFI without

  76                 * having been woken up - thisshouldn't happen

  77                 *

  78                 * Just note it happening -when we're woken, we can report

  79                 * its occurrence.

  80                 */

  81                (*spurious)++;

  82       }

  83}

CPUn睡眠于wfi(),之后再次online的时候,又会因为CPU0给它发出的IPI而从wfi()函数返回继续执行,醒来时CPUn也判决了是否pen_release == cpu_logical_map(cpu)成立,以确定该次醒来确确实实是由CPU0唤醒的一次正常醒来。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值