[Kernel_exception6] BUG: scheduling while atomic

一、实际问题:

1、开机出现原子上下文调度的bug:

[    6.290494] <1>.(1)[285:init]BUG: scheduling while atomic: init/285/0x00000000
[    6.290528] <1>-(1)[285:init][<c010e61c>] (dump_backtrace) from [<c010e954>] (show_stack+0x18/0x1c)
[    6.290533] <1>-(1)[285:init] r6:600b0013 r5:c14556d0 r4:00000000 r3:00040975
[    6.290543] <1>-(1)[285:init][<c010e93c>] (show_stack) from [<c04b5e88>] (dump_stack+0x94/0xa8)
[    6.290551] <1>-(1)[285:init][<c04b5df4>] (dump_stack) from [<c0159a14>] (__schedule_bug+0x58/0x6c)
[    6.290555] <1>-(1)[285:init] r6:c1404a54 r5:c13a4b00 r4:00000000 r3:600b0013
[    6.290561] <1>-(1)[285:init][<c01599bc>] (__schedule_bug) from [<c0ddf568>] (__schedule+0x660/0xa68)
[    6.290564] <1>-(1)[285:init] r4:df68bb00 r3:c13a4b00
[    6.290568] <1>-(1)[285:init][<c0ddef08>] (__schedule) from [<c0ddf9c4>] (schedule+0x54/0xb8)
[    6.290572] <1>-(1)[285:init] r10:00000000 r9:00000000 r8:00000000 r7:00000000 r6:00000000 r5:00000000
[    6.290574] <1>-(1)[285:init] r4:dd7ca000
[    6.290579] <1>-(1)[285:init][<c0ddf970>] (schedule) from [<c0de3668>] (schedule_hrtimeout_range_clock+0x1ac/0x1b8)
[    6.290582] <1>-(1)[285:init] r4:c1404548 r3:00040975
[    6.290587] <1>-(1)[285:init][<c0de34bc>] (schedule_hrtimeout_range_clock) from [<c0de3694>] (schedule_hrtimeout_range+0x20/0x28)
[    6.290647] <1>-(1)[285:init]Kernel BUG at c0159a24 [verbose debug info unavailable]
[    6.290649] <1>-(1)[285:init]Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM

[    7.290612] <1>-(1)[285:init]PC is at __schedule_bug+0x68/0x6c
[    7.290617] <1>-(1)[285:init]LR is at add_taint+0x28/0x6c

2、使用GDB 解析对应的堆栈信息如下:

(gdb) bt
#0  0xc0159a24 in __schedule_bug (prev=<optimized out>)
    at /home/rlk-buildsrv-tf29/jenkins/workspace/BAK_PY_UNIFIED_VERSION_BUILD/code/kernel-4.9/kernel/sched/core.c:3633
#1  0xc0ddf568 in schedule_debug (prev=<optimized out>)
    at /home/rlk-buildsrv-tf29/jenkins/workspace/BAK_PY_UNIFIED_VERSION_BUILD/code/kernel-4.9/kernel/sched/core.c:3647
#2  __schedule (preempt=<optimized out>)
    at /home/rlk-buildsrv-tf29/jenkins/workspace/BAK_PY_UNIFIED_VERSION_BUILD/code/kernel-4.9/kernel/sched/core.c:3748
#3  0xc0ddf9c4 in schedule ()
    at /home/rlk-buildsrv-tf29/jenkins/workspace/BAK_PY_UNIFIED_VERSION_BUILD/code/kernel-4.9/kernel/sched/core.c:3876
#4  0xc0de3668 in schedule_hrtimeout_range_clock (expires=0x0, delta=0, mode=HRTIMER_MODE_ABS, clock=1)
    at /home/rlk-buildsrv-tf29/jenkins/workspace/BAK_PY_UNIFIED_VERSION_BUILD/code/kernel-4.9/kernel/time/hrtimer.c:1847
#5  0xc0de3694 in schedule_hrtimeout_range (expires=<optimized out>, delta=<optimized out>, 
    mode=<optimized out>)
    at /home/rlk-buildsrv-tf29/jenkins/workspace/BAK_PY_UNIFIED_VERSION_BUILD/code/kernel-4.9/kernel/time/hrtimer.c:1900
#6  0xc02ccbd8 in poll_schedule_timeout (pwq=0xdd7cbcb0, state=<optimized out>, expires=<optimized out>, 
    slack=0)
    at /home/rlk-buildsrv-tf29/jenkins/workspace/BAK_PY_UNIFIED_VERSION_BUILD/code/kernel-4.9/fs/select.c:246
#7  0xc02ce8a0 in do_poll (end_time=<optimized out>, wait=<optimized out>, list=0x0)
(gdb) p {struct thread_info}0xdd7ca000
$2 = {flags = 0, preempt_count = 0, addr_limit = 3204448256, task = 0xdda93000, cpu = 1, cpu_domain = 0, 
  cpu_context = {r4 = 3748182784, r5 = 3736350720, r6 = 0, r7 = 3717657024, r8 = 3718852608, r9 = 3718854016, 

这里我们重点关注preempt_count = 0;

二、代码解析:

1、可以看到看到是如下地方报的BUG:

/*
 * Print scheduling while atomic bug:
 */
static noinline void __schedule_bug(struct task_struct *prev)
{
	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
		prev->comm, prev->pid, preempt_count());
        //......

	dump_stack();
	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
	BUG_ON(1);
}

2、下面可以看到如下进入的__schedule_bug:

(1)查看schedule()函数:

asmlinkage __visible void __sched schedule(void)
{
	struct task_struct *tsk = current;

	sched_submit_work(tsk);
	do {
		preempt_disable();//发生__schedule 前关闭抢占,preempt_count + 1
		__schedule(false);
		sched_preempt_enable_no_resched();
	} while (need_resched());
}

#define __preempt_count_inc() __preempt_count_add(1)
#define __preempt_count_dec() __preempt_count_sub(1)

#define preempt_count_inc() preempt_count_add(1)
#define preempt_count_dec() preempt_count_sub(1)

(2)__schedule会调用schedule_debug:

static void __sched notrace __schedule(bool preempt)
{
	cpu = smp_processor_id();
	rq = cpu_rq(cpu);
	prev = rq->curr;

	schedule_debug(prev);
......
}

(3)我们重点看in_atomic_preempt_off:


static inline void schedule_debug(struct task_struct *prev)
{

	if (unlikely(in_atomic_preempt_off())) {//对原子上下文进行判断
		__schedule_bug(prev);
		preempt_count_set(PREEMPT_DISABLED);
	}
	rcu_sleep_check();

	profile_hit(SCHED_PROFILING, __builtin_return_address(0));

	schedstat_inc(this_rq()->sched_count);
}

这里in_atomic_preempt_off():

/*
 * Check whether we were atomic before we did preempt_disable():
 * (used by the scheduler)
 */
#define PREEMPT_SHIFT	0

#define PREEMPT_OFFSET	(1UL << PREEMPT_SHIFT)

# define PREEMPT_DISABLE_OFFSET	PREEMPT_OFFSET

#define in_atomic_preempt_off() (preempt_count() != PREEMPT_DISABLE_OFFSET)

可知这里判断preempt_count 是否等于1,不等于1时返回true,也就是判断在调用schedule前的环境:

A、处于可调度状态preempt_count = 0,在执行preempt_disable时会进行+1操作,in_atomic_preempt_off返回false;

B、处于不可调度状态preempt_count != 0,这里preempt_count的值又存在两种情况:

  • preempt_count = 1,比如调用在spin_lock中调用了sleep,发生原子调度的情况;
  • preempt_count = 0xffffffff,这种可能是调用了两次spin_unlock之后的preempt_count的值;

       

3、分析到这里可以知道,我们的这个BUG的出现是因为在调用schdule前,preempt_count = 0xffffffff,

在schedule()时,preempt_count + 1 = 0,所以这里in_atomic_preempt_off()返回了true出现的问题,

这个bug我们并没有找到问题的根源,可以打开spin_lock debug的宏去复现问题,才可以找到问题根源。

 

作者:frank_zyp
您的支持是对博主最大的鼓励,感谢您的认真阅读。
本文无所谓版权,欢迎转载。

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值