第21章　Linux设备驱动的调试之BUG_ON（）和WARN_ON（）

最新推荐文章于 2024-05-07 13:42:11 发布

静能生悟

最新推荐文章于 2024-05-07 13:42:11 发布

阅读量5.6k

点赞数 2

分类专栏： Linux驱动开发

本文链接：https://blog.csdn.net/xiezhi123456/article/details/80665059

版权

Linux驱动开发专栏收录该内容

239 篇文章 112 订阅

订阅专栏

21.7　BUG_ON（）和WARN_ON（）

内核中有许多地方调用类似BUG（）的语句，它非常像一个内核运行时的断言，意味着本来不该执行到BUG（）这条语句，一旦执行即抛出Oops。BUG（）的定义为：

include/asm-generic/bug.h

#define BUG() do { \
printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
panic("BUG!"); \
} while (0)

其中panic（）定义在kernel/panic.c中，会导致内核崩溃，并打印Oops。

/**
* panic - halt the system
* @fmt: The text string to print
*
* Display a message, then perform cleanups.
*
* This function never returns.
*/
void panic(const char *fmt, ...)
{
static char buf[1024];
va_list args;
long i, i_next = 0;
int state = 0;
int old_cpu, this_cpu;
bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;

/*
* Disable local interrupts. This will prevent panic_smp_self_stop
* from deadlocking the first cpu that invokes the panic, since
* there is nothing to prevent an interrupt handler (that runs
* after setting panic_cpu) from invoking panic() again.
*/
local_irq_disable();

/*
* It's possible to come here directly from a panic-assertion and
* not have preempt disabled. Some functions called from here want
* preempt to be disabled. No point enabling it later though...
*
* Only one CPU is allowed to execute the panic code from here. For
* multiple parallel invocations of panic, all other CPUs either
* stop themself or will wait until they are stopped by the 1st CPU
* with smp_send_stop().
*
* `old_cpu == PANIC_CPU_INVALID' means this is the 1st CPU which
* comes here, so go ahead.
* `old_cpu == this_cpu' means we came from nmi_panic() which sets
* panic_cpu to this CPU. In this case, this is also the 1st CPU.
*/
this_cpu = raw_smp_processor_id();
old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);

if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
panic_smp_self_stop();

console_verbose();
bust_spinlocks(1);
va_start(args, fmt);
vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
pr_emerg("Kernel panic - not syncing: %s\n", buf);
#ifdef CONFIG_DEBUG_BUGVERBOSE
/*
* Avoid nested stack-dumping if a panic occurs during oops processing
*/
if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
dump_stack();
#endif

/*
* If we have crashed and we have a crash kernel loaded let it handle
* everything else.
* If we want to run this after calling panic_notifiers, pass
* the "crash_kexec_post_notifiers" option to the kernel.
*
* Bypass the panic_cpu check and call __crash_kexec directly.
*/
if (!_crash_kexec_post_notifiers) {
printk_safe_flush_on_panic();
__crash_kexec(NULL);

/*
* Note smp_send_stop is the usual smp shutdown function, which
* unfortunately means it may not be hardened to work in a
* panic situation.
*/
smp_send_stop();
} else {
/*
* If we want to do crash dump after notifier calls and
* kmsg_dump, we will need architecture dependent extra
* works in addition to stopping other CPUs.
*/
crash_smp_send_stop();
}

/*
* Run any panic handlers, including those that might need to
* add information to the kmsg dump output.
*/
atomic_notifier_call_chain(&panic_notifier_list, 0, buf);

/* Call flush even twice. It tries harder with a single online CPU */
printk_safe_flush_on_panic();
kmsg_dump(KMSG_DUMP_PANIC);

/*
* If you doubt kdump always works fine in any situation,
* "crash_kexec_post_notifiers" offers you a chance to run
* panic_notifiers and dumping kmsg before kdump.
* Note: since some panic_notifiers can make crashed kernel
* more unstable, it can increase risks of the kdump failure too.
*
* Bypass the panic_cpu check and call __crash_kexec directly.
*/
if (_crash_kexec_post_notifiers)
__crash_kexec(NULL);

bust_spinlocks(0);

/*
* We may have ended up stopping the CPU holding the lock (in
* smp_send_stop()) while still having some valuable data in the console
* buffer. Try to acquire the lock then release it regardless of the
* result. The release will also print the buffers out. Locks debug
* should be disabled to avoid reporting bad unlock balance when
* panic() is not being callled from OOPS.
*/
debug_locks_off();
console_flush_on_panic();

if (!panic_blink)
panic_blink = no_blink;

if (panic_timeout > 0) {
/*
* Delay timeout seconds before rebooting the machine.
* We can't use the "normal" timers since we just panicked.
*/
pr_emerg("Rebooting in %d seconds..\n", panic_timeout);

for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) {
touch_nmi_watchdog();
if (i >= i_next) {
i += panic_blink(state ^= 1);
i_next = i + 3600 / PANIC_BLINK_SPD;
}
mdelay(PANIC_TIMER_STEP);
}
}
if (panic_timeout != 0) {
/*
* This will not be a clean reboot, with everything
* shutting down. But if there is a chance of
* rebooting the system it will be rebooted.
*/
emergency_restart();
}
#ifdef __sparc__
{
extern int stop_a_enabled;
/* Make sure the user can actually press Stop-A (L1-A) */
stop_a_enabled = 1;
pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n"
"twice on console to return to the boot prom\n");
}
#endif
#if defined(CONFIG_S390)
{
unsigned long caller;

caller = (unsigned long)__builtin_return_address(0);
disabled_wait(caller);
}
#endif
pr_emerg("---[ end Kernel panic - not syncing: %s\n", buf);
local_irq_enable();
for (i = 0; ; i += PANIC_TIMER_STEP) {
touch_softlockup_watchdog();
if (i >= i_next) {
i += panic_blink(state ^= 1);
i_next = i + 3600 / PANIC_BLINK_SPD;
}
mdelay(PANIC_TIMER_STEP);
}
}

EXPORT_SYMBOL(panic);

比如arch/arm/kernel/dma.c中的enable_dma（）函数：

void enable_dma (unsigned int chan)
{
dma_t *dma = dma_channel(chan);

if (!dma->lock)
goto free_dma;

if (dma->active == 0) {
dma->active = 1;
dma->d_ops->enable(chan, dma);
}
return;

free_dma:
pr_err("dma%d: trying to enable free DMA\n", chan);
BUG();
}
EXPORT_SYMBOL(enable_dma);

分析：

上述代码的含义是，如果在dma->lock不成立的情况下，驱动直接调用了enable_dma（），实际上意味
着内核的一个bug。

BUG（）还有一个变体叫BUG_ON（），其内部会引用BUG（），形式为：

include/asm-generic/bug.h

#define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0)

对于BUG_ON（），只有当括号内的条件成立时，才抛出Oops。

比如drivers/char/random.c中的类似代码：

/*
* Used as a workqueue function so that when the input pool is getting
* full, we can "spill over" some entropy to the output pools. That
* way the output pools can store some of the excess entropy instead
* of letting it go to waste.
*/
static void push_to_pool(struct work_struct *work)
{
struct entropy_store *r = container_of(work, struct entropy_store, push_work);
BUG_ON(!r); /* 当括号内的条件成立时，才抛出Oops */
_xfer_secondary_pool(r, random_read_wakeup_bits/8);
trace_push_to_pool(r->name, r->entropy_count >> ENTROPY_SHIFT,
r->pull->entropy_count >> ENTROPY_SHIFT);
}

除了BUG_ON（）外，内核有个稍微弱一些WARN_ON（），在括号中的条件成立时，内核会抛出栈回溯，但是不会panic（），这通常用于内核抛出一个警告，暗示某种不太合理的事情发生了。

如在kernel/locking/mutex-debug.c中的debug_mutex_unlock（）函数发现：

mutex_unlock（）的调用者和mutex_lock（）的调用者不是同一个线程的时候或者mutex的owner为空的时候，会抛出警告信息：

void debug_mutex_unlock(struct mutex *lock)
{
if (likely(debug_locks)) {
DEBUG_LOCKS_WARN_ON(lock->magic != lock);
if (!lock->owner)
DEBUG_LOCKS_WARN_ON(!lock->owner);
else
DEBUG_LOCKS_WARN_ON(lock->owner != current);
DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
mutex_clear_owner(lock);
}
}

备注：

有时，WARN_ON（）也可以作为一个调试技巧。比如，进到内核某个函数后，不知道这个函数是怎么一级一级被调用进来的，可以在该函数中加入一个WARN_ON（1）。

静能生悟

关注

2
点赞
踩
11

收藏

觉得还不错? 一键收藏
0
评论
第21章　Linux设备驱动的调试之BUG_ON（）和WARN_ON（）

21.7　BUG_ON（）和WARN_ON（）内核中有许多地方调用类似BUG（）的语句，它非常像一个内核运行时的断言，意味着本来不该执行到BUG（）这条语句，一旦执行即抛出Oops。BUG（）的定义为：include/asm-generic/bug.h#define BUG() do { \ printk("BUG: failure at %s:%d/%s()!\n", __F...
复制链接

扫一扫