这个on_each_cpu_cond函数的主要作用每个online cpu上执行cond_func,在返回为true的cpu上继续执行func,
这两个函数执行的时候抢断是被禁止的.
我们直接看其源码的实现。
672行指示执行者函数的时候可能sleep.
674行申请一个cpumask_var_t 结构,注意有加likely说明大部分情况下走这个case,只有在内存紧张时才会走
else的case
676~678行再online的cup上执行cond_func,如果这个函数返回true,则设定678行的cpu
680行使能抢占
681行释放内存
665 void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
666 smp_call_func_t func, void *info, bool wait,
667 gfp_t gfp_flags)
668 {
669 cpumask_var_t cpus;
670 int cpu, ret;
671
672 might_sleep_if(gfpflags_allow_blocking(gfp_flags));
673
674 if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
675 preempt_disable();
676 for_each_online_cpu(cpu)
677 if (cond_func(cpu, info))
678 cpumask_set_cpu(cpu, cpus);
679 on_each_cpu_mask(cpus, func, info, wait);
680 preempt_enable();
681 free_cpumask_var(cpus);
682 } else {
683 /*
684 * No free cpumask, bother. No matter, we'll
685 * just have to IPI them one by one.
686 */
687 preempt_disable();
688 for_each_online_cpu(cpu)
689 if (cond_func(cpu, info)) {
690 ret = smp_call_function_single(cpu, func,
691 info, wait);
692 WARN_ON_ONCE(ret);
693 }
694 preempt_enable();
695 }
696 }
on_each_cpu_cond 函数的679行再调用on_each_cpu_mask 函数以cpus为参数调用func。我们看看on_each_cpu_mask的实现
622 void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
623 void *info, bool wait)
624 {
625 int cpu = get_cpu();
626
627 smp_call_function_many(mask, func, info, wait);
628 if (cpumask_test_cpu(cpu, mask)) {
629 unsigned long flags;
630 local_irq_save(flags);
631 func(info);
632 local_irq_restore(flags);
633 }
634 put_cpu();
635 }
在625行得到当前的cpu,code如下:主要这个函数已经disable 抢占了个人感觉没必要调用preempt_disable,直接调用smp_processor_id 就可以得到当前cpu id.
185 #define get_cpu() ({ preempt_disable(); smp_processor_id(); })
627行调用smp_call_function_many 在非当前cpu上调用func 函数,这个函数后面分析
628行通过cpumask_test_cpu 检测当前cpu是否可以执行func,如果可以的话,就在当前cpu上运行func
在来看看on_each_cpu_cond 中682行else的条件
执行else 说明zmlloc_cpumask_var 申请memory 失败了
cond_func的执行情况和likely的情况一样,我们重点看cond_func 返回true后在其他cpu上执行func的情况.
int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
271 int wait)
272 {
299 err = generic_exec_single(cpu, csd, func, info);
306 return err;
307 }
继续调用generic_exec_single
143 static int generic_exec_single(int cpu, struct call_single_data *csd,
144 smp_call_func_t func, void *info)
145 {
146 if (cpu == smp_processor_id()) {
147 unsigned long flags;
148
149 /*
150 * We can unlock early even for the synchronous on-stack case,
151 * since we're doing this from the same CPU..
152 */
153 csd_unlock(csd);
154 local_irq_save(flags);
155 func(info);
156 local_irq_restore(flags);
157 return 0;
158 }
159
160
161 if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
162 csd_unlock(csd);
163 return -ENXIO;
164 }
165
166 csd->func = func;
167 csd->info = info;
168
169 /*
170 * The list addition should be visible before sending the IPI
171 * handler locks the list to pull the entry off it because of
172 * normal cache coherency rules implied by spinlocks.
173 *
174 * If IPIs can go out of order to the cache coherency protocol
175 * in an architecture, sufficient synchronisation should be added
176 * to arch code to make it appear to obey cache coherency WRT
177 * locking and barrier primitives. Generic code isn't really
178 * equipped to do the right thing...
179 */
180 if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
181 arch_send_call_function_single_ipi(cpu);
182
183 return 0;
184 }
可以看到如果146行判断是当前cpu ,则直接实行这个func。
如果不是当前cpu 则通过arch_send_call_function_single_ipi 来发ipi中断让其他cpu执行
770 void arch_send_call_function_single_ipi(int cpu)
771 {
772 smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
773 }
就行看smp_cross_call的实现
487 static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
488 {
489 trace_ipi_raise_rcuidle(target, ipi_types[ipinr]);
490 __smp_cross_call(target, ipinr);
491 }
而__smp_cross_call 是通过set_smp_cross_call 赋值的
470 void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int))
471 {
472 if (!__smp_cross_call)
473 __smp_cross_call = fn;
474 }
而这个set_smp_cross_call 函数是在gic_smp_init 中设定的
http://lxr.free-electrons.com/source/drivers/irqchip/irq-gic-v3.c#L636
634 static void gic_smp_init(void)
635 {
636 set_smp_cross_call(gic_raise_softirq);
637 register_cpu_notifier(&gic_cpu_notifier);
638 }
所以__smp_cross_call == gic_raise_softirq
所以调用arch_send_call_function_single_ipi就是调用gic_raise_softirq
609 static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
610 {
611 int cpu;
612
613 if (WARN_ON(irq >= 16))
614 return;
615
616 /*
617 * Ensure that stores to Normal memory are visible to the
618 * other CPUs before issuing the IPI.
619 */
620 smp_wmb();
621
622 for_each_cpu(cpu, mask) {
623 unsigned long cluster_id = cpu_logical_map(cpu) & ~0xffUL;
624 u16 tlist;
625
626 tlist = gic_compute_target_list(&cpu, mask, cluster_id);
627 gic_send_sgi(cluster_id, tlist, irq);
628 }
629
630 /* Force the above writes to ICC_SGI1R_EL1 to be executed */
631 isb();
632 }
分别向mask的cpu发送sgi中断,而中断号就是IPI_CALL_FUNC,而这个中断是在handle_ipi中处理的(之前的博文有讲过,不明白的同学可以再过去看看)
594 void handle_IPI(int ipinr, struct pt_regs *regs)
595 {
596 unsigned int cpu = smp_processor_id();
597 struct pt_regs *old_regs = set_irq_regs(regs);
598
599 if ((unsigned)ipinr < NR_IPI) {
600 trace_ipi_entry_rcuidle(ipi_types[ipinr]);
601 __inc_irq_stat(cpu, ipi_irqs[ipinr]);
602 }
603
604 switch (ipinr) {
605 case IPI_WAKEUP:
606 break;
607
619
620 case IPI_CALL_FUNC:
621 irq_enter();
622 generic_smp_call_function_interrupt();
623 irq_exit();
624 break;
看622行的终端号为IPI_CALL_FUNC的出来函数generic_smp_call_function_interrupt
http://lxr.free-electrons.com/source/include/linux/smp.h#L110
110 #define generic_smp_call_function_interrupt \
111 generic_smp_call_function_single_interrupt
原来是个宏
192 void generic_smp_call_function_single_interrupt(void)
193 {
194 flush_smp_call_function_queue(true);
195 }
继续看
210 */
211 static void flush_smp_call_function_queue(bool warn_cpu_offline)
212 {
213 struct llist_head *head;
214 struct llist_node *entry;
215 struct call_single_data *csd, *csd_next;
216 static bool warned;
217
218 WARN_ON(!irqs_disabled());
219
220 head = this_cpu_ptr(&call_single_queue);
221 entry = llist_del_all(head);
222 entry = llist_reverse_order(entry);
223
224 /* There shouldn't be any pending callbacks on an offline CPU. */
225 if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
226 !warned && !llist_empty(head))) {
227 warned = true;
228 WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
229
230 /*
231 * We don't have to use the _safe() variant here
232 * because we are not invoking the IPI handlers yet.
233 */
234 llist_for_each_entry(csd, entry, llist)
235 pr_warn("IPI callback %pS sent to offline CPU\n",
236 csd->func);
237 }
238
239 llist_for_each_entry_safe(csd, csd_next, entry, llist) {
240 smp_call_func_t func = csd->func;
241 void *info = csd->info;
242
243 /* Do we wait until *after* callback? */
244 if (csd->flags & CSD_FLAG_SYNCHRONOUS) {
245 func(info);
246 csd_unlock(csd);
247 } else {
248 csd_unlock(csd);
249 func(info);
250 }
251 }
252
253 /*
254 * Handle irq works queued remotely by irq_work_queue_on().
255 * Smp functions above are typically synchronous so they
256 * better run first since some other CPUs may be busy waiting
257 * for them.
258 */
259 irq_work_run();
260 }
可以看到执行func的时候还分同步还是异步,具体在244~250之间。可以看到func终于被执行到了。
需要注意的是func可能同时被除当前cpu外的其他cpu同时执行.
这两个函数执行的时候抢断是被禁止的.
我们直接看其源码的实现。
672行指示执行者函数的时候可能sleep.
674行申请一个cpumask_var_t 结构,注意有加likely说明大部分情况下走这个case,只有在内存紧张时才会走
else的case
676~678行再online的cup上执行cond_func,如果这个函数返回true,则设定678行的cpu
680行使能抢占
681行释放内存
665 void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
666 smp_call_func_t func, void *info, bool wait,
667 gfp_t gfp_flags)
668 {
669 cpumask_var_t cpus;
670 int cpu, ret;
671
672 might_sleep_if(gfpflags_allow_blocking(gfp_flags));
673
674 if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
675 preempt_disable();
676 for_each_online_cpu(cpu)
677 if (cond_func(cpu, info))
678 cpumask_set_cpu(cpu, cpus);
679 on_each_cpu_mask(cpus, func, info, wait);
680 preempt_enable();
681 free_cpumask_var(cpus);
682 } else {
683 /*
684 * No free cpumask, bother. No matter, we'll
685 * just have to IPI them one by one.
686 */
687 preempt_disable();
688 for_each_online_cpu(cpu)
689 if (cond_func(cpu, info)) {
690 ret = smp_call_function_single(cpu, func,
691 info, wait);
692 WARN_ON_ONCE(ret);
693 }
694 preempt_enable();
695 }
696 }
on_each_cpu_cond 函数的679行再调用on_each_cpu_mask 函数以cpus为参数调用func。我们看看on_each_cpu_mask的实现
622 void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
623 void *info, bool wait)
624 {
625 int cpu = get_cpu();
626
627 smp_call_function_many(mask, func, info, wait);
628 if (cpumask_test_cpu(cpu, mask)) {
629 unsigned long flags;
630 local_irq_save(flags);
631 func(info);
632 local_irq_restore(flags);
633 }
634 put_cpu();
635 }
在625行得到当前的cpu,code如下:主要这个函数已经disable 抢占了个人感觉没必要调用preempt_disable,直接调用smp_processor_id 就可以得到当前cpu id.
185 #define get_cpu() ({ preempt_disable(); smp_processor_id(); })
627行调用smp_call_function_many 在非当前cpu上调用func 函数,这个函数后面分析
628行通过cpumask_test_cpu 检测当前cpu是否可以执行func,如果可以的话,就在当前cpu上运行func
在来看看on_each_cpu_cond 中682行else的条件
执行else 说明zmlloc_cpumask_var 申请memory 失败了
cond_func的执行情况和likely的情况一样,我们重点看cond_func 返回true后在其他cpu上执行func的情况.
int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
271 int wait)
272 {
299 err = generic_exec_single(cpu, csd, func, info);
306 return err;
307 }
继续调用generic_exec_single
143 static int generic_exec_single(int cpu, struct call_single_data *csd,
144 smp_call_func_t func, void *info)
145 {
146 if (cpu == smp_processor_id()) {
147 unsigned long flags;
148
149 /*
150 * We can unlock early even for the synchronous on-stack case,
151 * since we're doing this from the same CPU..
152 */
153 csd_unlock(csd);
154 local_irq_save(flags);
155 func(info);
156 local_irq_restore(flags);
157 return 0;
158 }
159
160
161 if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
162 csd_unlock(csd);
163 return -ENXIO;
164 }
165
166 csd->func = func;
167 csd->info = info;
168
169 /*
170 * The list addition should be visible before sending the IPI
171 * handler locks the list to pull the entry off it because of
172 * normal cache coherency rules implied by spinlocks.
173 *
174 * If IPIs can go out of order to the cache coherency protocol
175 * in an architecture, sufficient synchronisation should be added
176 * to arch code to make it appear to obey cache coherency WRT
177 * locking and barrier primitives. Generic code isn't really
178 * equipped to do the right thing...
179 */
180 if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
181 arch_send_call_function_single_ipi(cpu);
182
183 return 0;
184 }
可以看到如果146行判断是当前cpu ,则直接实行这个func。
如果不是当前cpu 则通过arch_send_call_function_single_ipi 来发ipi中断让其他cpu执行
770 void arch_send_call_function_single_ipi(int cpu)
771 {
772 smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
773 }
就行看smp_cross_call的实现
487 static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
488 {
489 trace_ipi_raise_rcuidle(target, ipi_types[ipinr]);
490 __smp_cross_call(target, ipinr);
491 }
而__smp_cross_call 是通过set_smp_cross_call 赋值的
470 void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int))
471 {
472 if (!__smp_cross_call)
473 __smp_cross_call = fn;
474 }
而这个set_smp_cross_call 函数是在gic_smp_init 中设定的
http://lxr.free-electrons.com/source/drivers/irqchip/irq-gic-v3.c#L636
634 static void gic_smp_init(void)
635 {
636 set_smp_cross_call(gic_raise_softirq);
637 register_cpu_notifier(&gic_cpu_notifier);
638 }
所以__smp_cross_call == gic_raise_softirq
所以调用arch_send_call_function_single_ipi就是调用gic_raise_softirq
609 static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
610 {
611 int cpu;
612
613 if (WARN_ON(irq >= 16))
614 return;
615
616 /*
617 * Ensure that stores to Normal memory are visible to the
618 * other CPUs before issuing the IPI.
619 */
620 smp_wmb();
621
622 for_each_cpu(cpu, mask) {
623 unsigned long cluster_id = cpu_logical_map(cpu) & ~0xffUL;
624 u16 tlist;
625
626 tlist = gic_compute_target_list(&cpu, mask, cluster_id);
627 gic_send_sgi(cluster_id, tlist, irq);
628 }
629
630 /* Force the above writes to ICC_SGI1R_EL1 to be executed */
631 isb();
632 }
分别向mask的cpu发送sgi中断,而中断号就是IPI_CALL_FUNC,而这个中断是在handle_ipi中处理的(之前的博文有讲过,不明白的同学可以再过去看看)
594 void handle_IPI(int ipinr, struct pt_regs *regs)
595 {
596 unsigned int cpu = smp_processor_id();
597 struct pt_regs *old_regs = set_irq_regs(regs);
598
599 if ((unsigned)ipinr < NR_IPI) {
600 trace_ipi_entry_rcuidle(ipi_types[ipinr]);
601 __inc_irq_stat(cpu, ipi_irqs[ipinr]);
602 }
603
604 switch (ipinr) {
605 case IPI_WAKEUP:
606 break;
607
619
620 case IPI_CALL_FUNC:
621 irq_enter();
622 generic_smp_call_function_interrupt();
623 irq_exit();
624 break;
看622行的终端号为IPI_CALL_FUNC的出来函数generic_smp_call_function_interrupt
http://lxr.free-electrons.com/source/include/linux/smp.h#L110
110 #define generic_smp_call_function_interrupt \
111 generic_smp_call_function_single_interrupt
原来是个宏
192 void generic_smp_call_function_single_interrupt(void)
193 {
194 flush_smp_call_function_queue(true);
195 }
继续看
210 */
211 static void flush_smp_call_function_queue(bool warn_cpu_offline)
212 {
213 struct llist_head *head;
214 struct llist_node *entry;
215 struct call_single_data *csd, *csd_next;
216 static bool warned;
217
218 WARN_ON(!irqs_disabled());
219
220 head = this_cpu_ptr(&call_single_queue);
221 entry = llist_del_all(head);
222 entry = llist_reverse_order(entry);
223
224 /* There shouldn't be any pending callbacks on an offline CPU. */
225 if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
226 !warned && !llist_empty(head))) {
227 warned = true;
228 WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
229
230 /*
231 * We don't have to use the _safe() variant here
232 * because we are not invoking the IPI handlers yet.
233 */
234 llist_for_each_entry(csd, entry, llist)
235 pr_warn("IPI callback %pS sent to offline CPU\n",
236 csd->func);
237 }
238
239 llist_for_each_entry_safe(csd, csd_next, entry, llist) {
240 smp_call_func_t func = csd->func;
241 void *info = csd->info;
242
243 /* Do we wait until *after* callback? */
244 if (csd->flags & CSD_FLAG_SYNCHRONOUS) {
245 func(info);
246 csd_unlock(csd);
247 } else {
248 csd_unlock(csd);
249 func(info);
250 }
251 }
252
253 /*
254 * Handle irq works queued remotely by irq_work_queue_on().
255 * Smp functions above are typically synchronous so they
256 * better run first since some other CPUs may be busy waiting
257 * for them.
258 */
259 irq_work_run();
260 }
可以看到执行func的时候还分同步还是异步,具体在244~250之间。可以看到func终于被执行到了。
需要注意的是func可能同时被除当前cpu外的其他cpu同时执行.