linux内核异常 log,Linux Kernel WARN()/BUG(), Oops/Panic, Tainted分析

最新推荐文章于 2024-05-29 17:17:21 发布

郑燕侠

最新推荐文章于 2024-05-29 17:17:21 发布

阅读量2.8k

点赞数

文章标签： linux内核异常 log

稳定性范畴, 参考5.x kernel。

kernel Oops

Oops指的就是内核的不正确行为，比如对驱动来说：static int i82092aa_pci_probe(struct pci_dev *dev,

const struct pci_device_id *id)

{

unsigned char configbyte;

int i, ret;

ret = pci_enable_device(dev);

if (ret)

return ret;

/* PCI Configuration Control */

pci_read_config_byte(dev, 0x40, &configbyte);

switch (configbyte&6) {

case 0:

socket_count = 2;

break;

case 2:

socket_count = 1;

break;

case 4:

case 6:

socket_count = 4;

break;

default:

dev_err(&dev->dev,

"Oops, you did something we didn't think of.\n");

ret = -EIO;

goto err_out_disable;

}

这里的PCI配置读出来有异常，我们就认为他是一个Oops，打印一个错误，探测失败。

分配内存失败也算一种Oops，只不过不需要打出错误信息。td = kmalloc (sizeof (struct FS_BPENTRY), GFP_ATOMIC);

fs_dprintk (FS_DEBUG_ALLOC, "Alloc transd: %p(%zd)\n", td, sizeof (struct FS_BPENTRY));

if (!td) {

/* Oops out of mem */

return -ENOMEM;

}

在体系架构方面的Oops，比如arm64的bug Oops:static int bug_handler(struct pt_regs *regs, unsigned int esr)

{

switch (report_bug(regs->pc, regs)) {

case BUG_TRAP_TYPE_BUG:

die("Oops - BUG", regs, 0);

break;

如果report_bug()返回的是BUG_TRAP_TYPE_BUG，那就报个Oops log。

再比如非法访问也会走die("Oops", ):static void die_kernel_fault(const char *msg, unsigned long addr,

unsigned int esr, struct pt_regs *regs)

{

bust_spinlocks(1);

pr_alert("Unable to handle kernel %s at virtual address %016lx\n", msg,

addr);

mem_abort_decode(esr);

show_pte(addr);

die("Oops", regs, esr); //tj

bust_spinlocks(0);

do_exit(SIGKILL);

}

看下die():void die(const char *str, struct pt_regs *regs, int err)

{

int ret;

unsigned long flags;

raw_spin_lock_irqsave(&die_lock, flags);

oops_enter();

console_verbose();

bust_spinlocks(1);

ret = __die(str, err, regs); //tj

if (regs && kexec_should_crash(current))

crash_kexec(regs);

bust_spinlocks(0);

add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);

oops_exit();

if (in_interrupt())

panic("Fatal exception in interrupt"); //tj

if (panic_on_oops)

panic("Fatal exception"); //tj

raw_spin_unlock_irqrestore(&die_lock, flags);

if (ret != NOTIFY_STOP)

do_exit(SIGSEGV);

}

再看下__die():static int __die(const char *str, int err, struct pt_regs *regs)

{

static int die_counter;

int ret;

pr_emerg("Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n",

str, err, ++die_counter);

/* trap and error numbers are mostly meaningless on ARM */

ret = notify_die(DIE_OOPS, str, regs, err, 0, SIGSEGV);

if (ret == NOTIFY_STOP)

return ret;

print_modules();

show_regs(regs);

dump_kernel_instr(KERN_EMERG, regs);

return ret;

}

打印类似如下log:35.449887: <6> Internal error: Oops - BUG: 0 [#1] PREEMPT SMP

35.449893: <6> Modules linked in:

35.449901: <6> Process init (pid: 1, stack limit = 0x00000000826895f7)

后面会call panic()，不过是有条件的:if (in_interrupt())

panic("Fatal exception in interrupt");

if (panic_on_oops)

panic("Fatal exception");

如果这个Oops在中断里，会走panic()。如果不在但if (panic_on_oops)成立，也走panic()。

可见，Oops不一定会导致panic。bug_handle()对BUG_TRAP_TYPE_BUG还不默认panic?

btw: arm64的Oops是怎么触发的了？稍后看。

Kernel panic

kernel panic就是不可恢复的错误了，怎么处理？我想复位or我就想定这。/**

* panic - halt the system

* @fmt: The text string to print

* Display a message, then perform cleanups.

* This function never returns.

void panic(const char *fmt, ...)

{

...

pr_emerg("Kernel panic - not syncing: %s\n", buf);

...

if (panic_timeout > 0) { //tj: 延迟重启

* Delay timeout seconds before rebooting the machine.

* We can't use the "normal" timers since we just panicked.

pr_emerg("Rebooting in %d seconds..\n", panic_timeout); //tj

for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) {

touch_nmi_watchdog();

if (i >= i_next) {

i += panic_blink(state ^= 1);

i_next = i + 3600 / PANIC_BLINK_SPD;

}

mdelay(PANIC_TIMER

最低0.47元/天解锁文章

郑燕侠

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
linux内核异常 log,Linux Kernel WARN()/BUG(), Oops/Panic, Tainted分析

稳定性范畴, 参考5.x kernel。kernel OopsOops指的就是内核的不正确行为，比如对驱动来说：static int i82092aa_pci_probe(struct pci_dev *dev,const struct pci_device_id *id){unsigned char configbyte;int i, ret;ret = pci_enable_device(de...
复制链接

扫一扫