The symbol call_function_single_interrupt is defined in entry_64.S, and it's an assembly routine. It will save a partial stack frame and prepare stack for the IPI irq context and call smp_call_function_single_interrupt to handle the IPI for call function single. So we can confirm that the back trace is on IRQ stack. But the reported stack overflow is on exception stack - it's reported while servicing the page fault caused by the corrupted thread_info.
I will continue to exam this irq stack a bit more to ensure there is nothing wrong with this irq stack.
Another thing noticed,
do_IRQ will also lead to a similar stack overflow check as the code does in process context.
do_IRQ -> handle_irq -> stack_overflow_check
However do_IRQ code path is not executed for handling SMP IPI interrupts. So it doesn't really check the irq stack. why???
#check stack overran in process context
stackend = end_of_stack(tsk);
if (tsk != &init_task && *stackend != STACK_END_MAGIC)
printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
#check only if CONFIG_DEBUG_STACKOVERFLOW turned on
static inline void stack_overflow_check(struct pt_regs *regs)
{
#ifdef CONFIG_DEBUG_STACKOVERFLOW
#define STACK_TOP_MARGIN 128
struct orig_ist *oist;
u64 irq_stack_top, irq_stack_bottom;
u64 estack_top, estack_bottom;
u64 curbase = (u64)task_stack_page(current);
if (user_mode_vm(regs))
return;
if (regs->sp >= curbase + sizeof(struct thread_info) +
sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
regs->sp <= curbase + THREAD_SIZE)
return;
irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack) +
STACK_TOP_MARGIN;
irq_stack_bottom = (u64)__get_cpu_var(irq_stack_ptr);
if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
return;
oist = &__get_cpu_var(orig_ist);
estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN;
estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1];
if (regs->sp >= estack_top && regs->sp <= estack_bottom)
return;
WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
current->comm, curbase, regs->sp,
irq_stack_top, irq_stack_bottom,
estack_top, estack_bottom);
if (sysctl_panic_on_stackoverflow)
panic("low stack detected by irq handler - check messages\n");
#endif
}
#prepare stack
cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
#the list of pointers to irq stacks
DECLARE_PER_CPU(char *, irq_stack_ptr);
union irq_stack_union {
char irq_stack[IRQ_STACK_SIZE];
/*
* GCC hardcodes the stack canary as %gs:40. Since the
* irq_stack is the object at %gs:0, we reserve the bottom
* 48 bytes of the irq stack for the canary.
*/
struct {
char gs_base[40];
unsigned long stack_canary;
};
};
#allocate per-cpu irq stack
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
per_cpu(irq_stack_union.irq_stack, cpu) +
IRQ_STACK_SIZE - 64;
crash> p irq_stack_ptr
PER-CPU DATA TYPE:
char *irq_stack_ptr;
PER-CPU ADDRESSES:
[0]: ffff88103fa0b7e8
[1]: ffff88103fa2b7e8
[2]: ffff88103fa4b7e8
[3]: ffff88103fa6b7e8
[4]: ffff88103fa8b7e8
[5]: ffff88103faab7e8
[6]: ffff88103facb7e8
[7]: ffff88103faeb7e8
[8]: ffff88103fb0b7e8
[9]: ffff88103fb2b7e8
[10]: ffff88103fb4b7e8
[11]: ffff88103fb6b7e8
[12]: ffff88203f20b7e8
[13]: ffff88203f22b7e8
[14]: ffff88203f24b7e8
[15]: ffff88203f26b7e8
[16]: ffff88203f28b7e8
[17]: ffff88203f2ab7e8
[18]: ffff88203f2cb7e8
[19]: ffff88203f2eb7e8
[20]: ffff88203f30b7e8
[21]: ffff88203f32b7e8
[22]: ffff88203f34b7e8
[23]: ffff88203f36b7e8
[24]: ffff88103fb8b7e8
[25]: ffff88103fbab7e8
[26]: ffff88103fbcb7e8
[27]: ffff88103fbeb7e8
[28]: ffff88103fc0b7e8
[29]: ffff88103fc2b7e8
[30]: ffff88103fc4b7e8
[31]: ffff88103fc6b7e8
[32]: ffff88103fc8b7e8
[33]: ffff88103fcab7e8
[34]: ffff88103fccb7e8
[35]: ffff88103fceb7e8
[36]: ffff88203f38b7e8
[37]: ffff88203f3ab7e8
[38]: ffff88203f3cb7e8
[39]: ffff88203f3eb7e8
[40]: ffff88203f40b7e8
[41]: ffff88203f42b7e8
[42]: ffff88203f44b7e8
[43]: ffff88203f46b7e8
[44]: ffff88203f48b7e8
[45]: ffff88203f4ab7e8
[46]: ffff88203f4cb7e8
[47]: ffff88203f4eb7e8
irq_stack_ptr [24]: ffff88103fb8b7e8
plan to compare the stack base address to each frame address in the back trace. but the first frame is too large,,, why??? nested too much?? looking at __preempt_count which I think should be incremented for each nested irq
crash> rd ffff88103fb8b7e8
ffff88103fb8b7e8: ffff88103fb83fc0 .?.?....
The irq stack is just alright. forgot to derefrence it :)
crash> p __preempt_count
PER-CPU DATA TYPE:
int __preempt_count;
PER-CPU ADDRESSES:
[0]: ffff88103fa0b7e0
[1]: ffff88103fa2b7e0
[2]: ffff88103fa4b7e0
[3]: ffff88103fa6b7e0
[4]: ffff88103fa8b7e0
[5]: ffff88103faab7e0
[6]: ffff88103facb7e0
[7]: ffff88103faeb7e0
[8]: ffff88103fb0b7e0
[9]: ffff88103fb2b7e0
[10]: ffff88103fb4b7e0
[11]: ffff88103fb6b7e0
[12]: ffff88203f20b7e0
[13]: ffff88203f22b7e0
[14]: ffff88203f24b7e0
[15]: ffff88203f26b7e0
[16]: ffff88203f28b7e0
[17]: ffff88203f2ab7e0
[18]: ffff88203f2cb7e0
[19]: ffff88203f2eb7e0
[20]: ffff88203f30b7e0
[21]: ffff88203f32b7e0
[22]: ffff88203f34b7e0
[23]: ffff88203f36b7e0
[24]: ffff88103fb8b7e0
[25]: ffff88103fbab7e0
[26]: ffff88103fbcb7e0
[27]: ffff88103fbeb7e0
[28]: ffff88103fc0b7e0
[29]: ffff88103fc2b7e0
[30]: ffff88103fc4b7e0
[31]: ffff88103fc6b7e0
[32]: ffff88103fc8b7e0
[33]: ffff88103fcab7e0
[34]: ffff88103fccb7e0
[35]: ffff88103fceb7e0
[36]: ffff88203f38b7e0
[37]: ffff88203f3ab7e0
[38]: ffff88203f3cb7e0
[39]: ffff88203f3eb7e0
[40]: ffff88203f40b7e0
[41]: ffff88203f42b7e0
[42]: ffff88203f44b7e0
[43]: ffff88203f46b7e0
[44]: ffff88203f48b7e0
[45]: ffff88203f4ab7e0
[46]: ffff88203f4cb7e0
[47]: ffff88203f4eb7e0
nested 8 times, so the IRQ already overflowed? That would be all smp ipi interrupts. Since other irq handling would found the overflow, if CONFIG_DEBUG_STACKOVERFLOW is turned one. Need to check this config.
crash> rd ffff88103fb8b7e0
ffff88103fb8b7e0: 0000000080000100 ........
nested eight times. seem alright. BTW, CONFIG_DEBUG_STACKOVERFLOW is off for ubuntu kernel.
I will continue to exam this irq stack a bit more to ensure there is nothing wrong with this irq stack.
Another thing noticed,
do_IRQ will also lead to a similar stack overflow check as the code does in process context.
do_IRQ -> handle_irq -> stack_overflow_check
However do_IRQ code path is not executed for handling SMP IPI interrupts. So it doesn't really check the irq stack. why???
#check stack overran in process context
stackend = end_of_stack(tsk);
if (tsk != &init_task && *stackend != STACK_END_MAGIC)
printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
#check only if CONFIG_DEBUG_STACKOVERFLOW turned on
static inline void stack_overflow_check(struct pt_regs *regs)
{
#ifdef CONFIG_DEBUG_STACKOVERFLOW
#define STACK_TOP_MARGIN 128
struct orig_ist *oist;
u64 irq_stack_top, irq_stack_bottom;
u64 estack_top, estack_bottom;
u64 curbase = (u64)task_stack_page(current);
if (user_mode_vm(regs))
return;
if (regs->sp >= curbase + sizeof(struct thread_info) +
sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
regs->sp <= curbase + THREAD_SIZE)
return;
irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack) +
STACK_TOP_MARGIN;
irq_stack_bottom = (u64)__get_cpu_var(irq_stack_ptr);
if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
return;
oist = &__get_cpu_var(orig_ist);
estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN;
estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1];
if (regs->sp >= estack_top && regs->sp <= estack_bottom)
return;
WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
current->comm, curbase, regs->sp,
irq_stack_top, irq_stack_bottom,
estack_top, estack_bottom);
if (sysctl_panic_on_stackoverflow)
panic("low stack detected by irq handler - check messages\n");
#endif
}
#prepare stack
cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
#the list of pointers to irq stacks
DECLARE_PER_CPU(char *, irq_stack_ptr);
union irq_stack_union {
char irq_stack[IRQ_STACK_SIZE];
/*
* GCC hardcodes the stack canary as %gs:40. Since the
* irq_stack is the object at %gs:0, we reserve the bottom
* 48 bytes of the irq stack for the canary.
*/
struct {
char gs_base[40];
unsigned long stack_canary;
};
};
#allocate per-cpu irq stack
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
per_cpu(irq_stack_union.irq_stack, cpu) +
IRQ_STACK_SIZE - 64;
crash> p irq_stack_ptr
PER-CPU DATA TYPE:
char *irq_stack_ptr;
PER-CPU ADDRESSES:
[0]: ffff88103fa0b7e8
[1]: ffff88103fa2b7e8
[2]: ffff88103fa4b7e8
[3]: ffff88103fa6b7e8
[4]: ffff88103fa8b7e8
[5]: ffff88103faab7e8
[6]: ffff88103facb7e8
[7]: ffff88103faeb7e8
[8]: ffff88103fb0b7e8
[9]: ffff88103fb2b7e8
[10]: ffff88103fb4b7e8
[11]: ffff88103fb6b7e8
[12]: ffff88203f20b7e8
[13]: ffff88203f22b7e8
[14]: ffff88203f24b7e8
[15]: ffff88203f26b7e8
[16]: ffff88203f28b7e8
[17]: ffff88203f2ab7e8
[18]: ffff88203f2cb7e8
[19]: ffff88203f2eb7e8
[20]: ffff88203f30b7e8
[21]: ffff88203f32b7e8
[22]: ffff88203f34b7e8
[23]: ffff88203f36b7e8
[24]: ffff88103fb8b7e8
[25]: ffff88103fbab7e8
[26]: ffff88103fbcb7e8
[27]: ffff88103fbeb7e8
[28]: ffff88103fc0b7e8
[29]: ffff88103fc2b7e8
[30]: ffff88103fc4b7e8
[31]: ffff88103fc6b7e8
[32]: ffff88103fc8b7e8
[33]: ffff88103fcab7e8
[34]: ffff88103fccb7e8
[35]: ffff88103fceb7e8
[36]: ffff88203f38b7e8
[37]: ffff88203f3ab7e8
[38]: ffff88203f3cb7e8
[39]: ffff88203f3eb7e8
[40]: ffff88203f40b7e8
[41]: ffff88203f42b7e8
[42]: ffff88203f44b7e8
[43]: ffff88203f46b7e8
[44]: ffff88203f48b7e8
[45]: ffff88203f4ab7e8
[46]: ffff88203f4cb7e8
[47]: ffff88203f4eb7e8
irq_stack_ptr [24]: ffff88103fb8b7e8
plan to compare the stack base address to each frame address in the back trace. but the first frame is too large,,, why??? nested too much?? looking at __preempt_count which I think should be incremented for each nested irq
crash> rd ffff88103fb8b7e8
ffff88103fb8b7e8: ffff88103fb83fc0 .?.?....
The irq stack is just alright. forgot to derefrence it :)
crash> p __preempt_count
PER-CPU DATA TYPE:
int __preempt_count;
PER-CPU ADDRESSES:
[0]: ffff88103fa0b7e0
[1]: ffff88103fa2b7e0
[2]: ffff88103fa4b7e0
[3]: ffff88103fa6b7e0
[4]: ffff88103fa8b7e0
[5]: ffff88103faab7e0
[6]: ffff88103facb7e0
[7]: ffff88103faeb7e0
[8]: ffff88103fb0b7e0
[9]: ffff88103fb2b7e0
[10]: ffff88103fb4b7e0
[11]: ffff88103fb6b7e0
[12]: ffff88203f20b7e0
[13]: ffff88203f22b7e0
[14]: ffff88203f24b7e0
[15]: ffff88203f26b7e0
[16]: ffff88203f28b7e0
[17]: ffff88203f2ab7e0
[18]: ffff88203f2cb7e0
[19]: ffff88203f2eb7e0
[20]: ffff88203f30b7e0
[21]: ffff88203f32b7e0
[22]: ffff88203f34b7e0
[23]: ffff88203f36b7e0
[24]: ffff88103fb8b7e0
[25]: ffff88103fbab7e0
[26]: ffff88103fbcb7e0
[27]: ffff88103fbeb7e0
[28]: ffff88103fc0b7e0
[29]: ffff88103fc2b7e0
[30]: ffff88103fc4b7e0
[31]: ffff88103fc6b7e0
[32]: ffff88103fc8b7e0
[33]: ffff88103fcab7e0
[34]: ffff88103fccb7e0
[35]: ffff88103fceb7e0
[36]: ffff88203f38b7e0
[37]: ffff88203f3ab7e0
[38]: ffff88203f3cb7e0
[39]: ffff88203f3eb7e0
[40]: ffff88203f40b7e0
[41]: ffff88203f42b7e0
[42]: ffff88203f44b7e0
[43]: ffff88203f46b7e0
[44]: ffff88203f48b7e0
[45]: ffff88203f4ab7e0
[46]: ffff88203f4cb7e0
[47]: ffff88203f4eb7e0
nested 8 times, so the IRQ already overflowed? That would be all smp ipi interrupts. Since other irq handling would found the overflow, if CONFIG_DEBUG_STACKOVERFLOW is turned one. Need to check this config.
crash> rd ffff88103fb8b7e0
ffff88103fb8b7e0: 0000000080000100 ........
nested eight times. seem alright. BTW, CONFIG_DEBUG_STACKOVERFLOW is off for ubuntu kernel.