目录
一、背景:
单台机器出现2例KE,且位置随机都在开机几秒内出现,从经验来看,偏单体,可以通过gdb + minidump的方式进行简单推导
二、第一份KE分析
1、环境准备
../../../../0_code/r0-trunk-0422/prebuilts/gdb/linux-x86/bin/gdb
(gdb) exec-file vmlinux
(gdb) info files
Local exec file:
`/data/sf_user2/bin/KE/X662/XLLEPUB-704/vmlinux', file type elf64-littleaarch64.
Entry point: 0xffffff8008080000
0xffffff8008080000 - 0xffffff8008080040 is .head.text
0xffffff8008080040 - 0xffffff80080800b0 is .plt
0xffffff8008080800 - 0xffffff8008f8c2e0 is .text
0xffffff8008f90000 - 0xffffff80093c284a is .rodata
0xffffff80093c2850 - 0xffffff80093c2858 is ".mmuoff.data.read"
0xffffff80093c2858 - 0xffffff80093c2898 is .got.plt
0xffffff80093c2898 - 0xffffff80093c2898 is .rodata1
0xffffff80093c2898 - 0xffffff80093c2898 is .pci_fixup
0xffffff80093c2898 - 0xffffff80093c2898 is .builtin_fw
0xffffff80093c2898 - 0xffffff80093d88a8 is __ksymtab
0xffffff80093d88a8 - 0xffffff80093ea318 is __ksymtab_gpl
0xffffff80093ea318 - 0xffffff80093ea318 is __ksymtab_unused
0xffffff80093ea318 - 0xffffff80093ea318 is __ksymtab_unused_gpl
0xffffff80093ea318 - 0xffffff80093ea318 is __ksymtab_gpl_future
0xffffff80093ea318 - 0xffffff80093efb1c is __kcrctab
0xffffff80093efb1c - 0xffffff80093f41b8 is __kcrctab_gpl
0xffffff80093f41b8 - 0xffffff80093f41b8 is __kcrctab_unused
0xffffff80093f41b8 - 0xffffff80093f41b8 is __kcrctab_unused_gpl
0xffffff80093f41b8 - 0xffffff80093f41b8 is __kcrctab_gpl_future
0xffffff80093f41b8 - 0xffffff800942634a is __ksymtab_strings
0xffffff800942634a - 0xffffff800942634a is __init_rodata
0xffffff8009426350 - 0xffffff800942b1e8 is __param
0xffffff800942b1e8 - 0xffffff800942c000 is __modver
0xffffff800942c000 - 0xffffff8009431420 is __ex_table
0xffffff8009431420 - 0xffffff8009431438 is .notes
--Type <RET> for more, q to quit, c to continue without paging--
0xffffff8009440000 - 0xffffff80094b9898 is .init.text
0xffffff80094b9898 - 0xffffff80094c1f84 is .exit.text
0xffffff80094c1f84 - 0xffffff8009502eec is .altinstructions
0xffffff8009502eec - 0xffffff80095190c0 is .altinstr_replacement
0xffffff800951a000 - 0xffffff8009530250 is .init.data
0xffffff8009531000 - 0xffffff800953d658 is .data..percpu
0xffffff800953d658 - 0xffffff80098a3490 is .rela.dyn
0xffffff80098b0000 - 0xffffff8009a7c610 is .data
0xffffff8009a7c610 - 0xffffff8009a8306e is __bug_table
0xffffff8009a83800 - 0xffffff8009a83810 is .mmuoff.data.write
0xffffff8009a84000 - 0xffffff8009a84000 is .mmuoff.data.read
0xffffff8009a84000 - 0xffffff8009a84000 is .sbss
0xffffff8009a84000 - 0xffffff800a26de70 is .bss
(gdb) add-symbol-file vmlinux 0xffffff8008080800+0x73f600000 -s .data 0xffffff80098b0000+0x73f600000 -s .bss 0x73f600000+0xffffff8009a84000 -s .data..percpu 0xffffff8009531000+0x73f600000
add symbol table from file "vmlinux" at
.text_addr = 0xffffff8747680800
.data_addr = 0xffffff8748eb0000
.bss_addr = 0xffffff8749084000
.data..percpu_addr = 0xffffff8748b31000
(y or n) y
Reading symbols from vmlinux...
(gdb) core-file SYS_MINI_RDUMP
[New LWP 106]
[New LWP 1]
[New LWP 2]
[New LWP 3]
[New LWP 4]
[New LWP 5]
[New LWP 6]
[New LWP 7]
[New LWP 8]
Core was generated by `console=tty0 console=ttyS0,921600n1 root=/dev/ram vmalloc=400M slub_debug=OFZPU'.
#0 freelist_dereference (s=<optimized out>, ptr_addr=<optimized out>)
at /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/mm/slub.c:272
272 /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/mm/slub.c: No such file or directory.
[Current thread is 1 (LWP 106)]
2、首先看看bt
(gdb) bt
#0 freelist_dereference (s=<optimized out>, ptr_addr=<optimized out>)
at /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/mm/slub.c:272
#1 get_freepointer (s=0xffffffc273003680, object=0x3e8) =》object的值是异常的
at /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/mm/slub.c:278
#2 get_freepointer_safe (s=0xffffffc273003680, object=0x3e8)
at /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/mm/slub.c:292
#3 slab_alloc_node (s=0xffffffc273003680, gfpflags=22086336, node=-1, addr=<optimized out>)
at /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/mm/slub.c:2755
3、check代码
static inline void *freelist_dereference(const struct kmem_cache *s,
void *ptr_addr)
{
return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr),
(unsigned long)ptr_addr);
}
static inline void *get_freepointer(struct kmem_cache *s, void *object)
{
return freelist_dereference(s, object + s->offset); =》here异常
}
static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
{
unsigned long freepointer_addr;
void *p;
if (!debug_pagealloc_enabled())
return get_freepointer(s, object); =》here
static __always_inline void *slab_alloc_node(struct kmem_cache *s,
gfp_t gfpflags, int node, unsigned long addr)
{
void *object;
struct kmem_cache_cpu *c;
struct page *page;
unsigned long tid;
s = slab_pre_alloc_hook(s, gfpflags);
if (!s)
return NULL;
redo:
do {
tid = this_cpu_read(s->cpu_slab->tid);
c = raw_cpu_ptr(s->cpu_slab); =》关键点
object = c->freelist;
page = c->page;
if (unlikely(!object || !node_match(page, node))) {
object = __slab_alloc(s, gfpflags, node, addr, c);
stat(s, ALLOC_SLOWPATH);
} else {
void *next_object = get_freepointer_safe(s, object); =》here
4、查看汇编
(gdb) disas
Dump of assembler code for function __kmalloc_track_caller: =>为啥 并不是最后的bt f 0的函数呢,其实因为f 5之后的都是内联的,所以都放到一个栈帧里面了
0xffffff874785ca3c <+0>: sub sp, sp, #0x60
0xffffff874785ca40 <+4>: stp x29, x30, [sp, #16]
0xffffff874785ca44 <+8>: stp x26, x25, [sp, #32]
0xffffff874785ca48 <+12>: stp x24, x23, [sp, #48]
0xffffff874785ca4c <+16>: stp x22, x21, [sp, #64]
0xffffff874785ca50 <+20>: stp x20, x19, [sp, #80]
0xffffff874785ca54 <+24>: add x29, sp, #0x10
0xffffff874785ca58 <+28>: mov x19, x0
0xffffff874785ca5c <+32>: cmp x0, #0x2, lsl #12
0xffffff874785ca60 <+36>: mov w20, w1
0xffffff874785ca64 <+40>: b.hi 0xffffff874785ccc4 <__kmalloc_track_caller+648> // b.pmore
0xffffff874785ca68 <+44>: mov x0, x19
0xffffff874785ca6c <+48>: mov w1, w20
0xffffff874785ca70 <+52>: mov x21, x2
0xffffff874785ca74 <+56>: bl 0xffffff87478316e0 <kmalloc_slab>
0xffffff874785ca78 <+60>: mov x22, x0
0xffffff874785ca7c <+64>: cmp x0, #0x10
0xffffff874785ca80 <+68>: b.ls 0xffffff874785ccf4 <__kmalloc_track_caller+696> // b.plast
0xffffff874785ca84 <+72>: adrp x8, 0xffffff8748ebe000 <event_hash+848>
0xffffff874785ca88 <+76>: adrp x25, 0xffffff874929a000 <memblock_reserved_init_regions+2968>
0xffffff874785ca8c <+80>: ldr w8, [x8, #480]
0xffffff874785ca90 <+84>: ldr w9, [x25, #896]
0xffffff874785ca94 <+88>: mov x24, x22
0xffffff874785ca98 <+92>: cmp w9, #0x1
0xffffff874785ca9c <+96>: b.lt 0xffffff874785cac0 <__kmalloc_track_caller+132> // b.tstop
0xffffff874785caa0 <+100>: and w8, w20, w8
0xffffff874785caa4 <+104>: tbnz w8, #20, 0xffffff874785cab4 <__kmalloc_track_caller+120>
0xffffff874785caa8 <+108>: ldrb w8, [x22, #11]
0xffffff874785caac <+112>: mov x24, x22
0xffffff874785cab0 <+116>: tbz w8, #2, 0xffffff874785cac0 <__kmalloc_track_caller+132>
0xffffff874785cab4 <+120>: mov x0, x22
0xffffff874785cab8 <+124>: bl 0xffffff8747862b1c <memcg_kmem_get_cache>
0xffffff874785cabc <+128>: mov x24, x0
0xffffff874785cac0 <+132>: cbz x24, 0xffffff874785cc00 <__kmalloc_track_caller+452>
0xffffff874785cac4 <+136>: mrs x26, sp_el0
0xffffff874785cac8 <+140>: ldr w8, [x26, #24]
0xffffff874785cacc <+144>: add w8, w8, #0x1
0xffffff874785cad0 <+148>: str w8, [x26, #24]
0xffffff874785cad4 <+152>: ldr x8, [x24]
0xffffff874785cad8 <+156>: mrs x9, tpidr_el1
0xffffff874785cadc <+160>: add x8, x8, #0x8
0xffffff874785cae0 <+164>: ldr x8, [x9, x8]
0xffffff874785cae4 <+168>: ldr w9, [x26, #24]
0xffffff874785cae8 <+172>: subs w9, w9, #0x1
0xffffff874785caec <+176>: str w9, [x26, #24]
0xffffff874785caf0 <+180>: b.ne 0xffffff874785cafc <__kmalloc_track_caller+192> // b.any
0xffffff874785caf4 <+184>: ldr x9, [x26]
0xffffff874785caf8 <+188>: tbnz w9, #1, 0xffffff874785cb80 <__kmalloc_track_caller+324>
0xffffff874785cafc <+192>: ldr x9, [x24]
0xffffff874785cb00 <+196>: mrs x10, tpidr_el1
0xffffff874785cb04 <+200>: add x9, x10, x9
0xffffff874785cb08 <+204>: ldr x10, [x9, #8]
0xffffff874785cb0c <+208>: cmp x8, x10
0xffffff874785cb10 <+212>: b.ne 0xffffff874785cac8 <__kmalloc_track_caller+140> // b.any
0xffffff874785cb14 <+216>: ldr x23, [x9]
0xffffff874785cb18 <+220>: cbz x23, 0xffffff874785ccfc <__kmalloc_track_caller+704>
0xffffff874785cb1c <+224>: ldrsw x9, [x24, #32]
0xffffff874785cb20 <+228>: add x12, x8, #0x8
=> 0xffffff874785cb24 <+232>: ldr x9, [x23, x9] =>最后死的位置,取x23和x9寄存器和对应的内存内容
5、查看寄存器内容
(gdb) i reg
x0 0xffffffc273003680 -264358578560
x1 0x15102c0 22086336
x2 0xffffff874837ebdc -518479418404
x3 0xffffff800fec3d14 -549488673516
x4 0x15000c0 22020288
x5 0x3 3
x6 0x4 4
x7 0x20 32
x8 0x319 793
x9 0x0 0
x10 0x319 793
x11 0x70 112
x12 0x321 801
x13 0x0 0
x14 0x10 16
x15 0x20 32
x16 0xffffff8748369238 -518479506888
x17 0x7fa288f258 548187730520
x18 0x7d0df32000 537104949248
x19 0x180 384
x20 0x15102c0 22086336
x21 0xffffff874837ebdc -518479418404
x22 0xffffffc273003680 -264358578560
x23 0x3e8 1000
所以:x9 + x23 = 0x3e8,与对应的kernel log 打印符合
[ 6.653369] <1>.(1)[443:init]Unable to handle kernel NULL pointer dereference at virtual address 000003e8
所以问题点有2个:
a/ 一个是x9为啥变成了0;b/ 一个是x23为啥是0x3e8
由上面的代码可知,其实异常点是:object + s->offset
1、首先分析为啥出现了空指针
通过上述大概能推导其实空指针对应的是s->offset,所以看看为啥突然为0了
(gdb) f 0 =》第0帧已经看不到数据了
#0 freelist_dereference (s=<optimized out>, ptr_addr=<optimized out>)
at /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/mm/slub.c:272
272 in /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/mm/slub.c
(gdb) info locals
No locals.
(gdb) info args
s = <optimized out>
ptr_addr = <optimized out>
(gdb) f 1 =》可以看到s是有值的
#1 get_freepointer (s=0xffffffc273003680, object=0x3e8)
at /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/mm/slub.c:278
278 in /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/mm/slub.c
对应的代码:
static inline void *get_freepointer(struct kmem_cache *s, void *object)
{
return freelist_dereference(s, object + s->offset); =》278行
}
static inline void *freelist_dereference(const struct kmem_cache *s,
void *ptr_addr)
{
return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr), =》272行
(unsigned long)ptr_addr);
}
如上所述,从f 1到f 0其实并没有修改到s,根据f 1的地址看看它的成员struct kmem_cache
(gdb) p {struct kmem_cache} 0xffffffc273003680 =》确实看到offset已经为0,这个值代表的含义是下一个object相当于这个object的偏移,所以此项为0,应该是没有对应的object了或者后面的object出异常了
$1 = {cpu_slab = 0xffffff8748b3f720, flags = 1073741824, min_partial = 5, size = 512, object_size = 512,
offset = 0, cpu_partial = 13, oo = {x = 131104}, max = {x = 131104}, min = {x = 8}, allocflags = 16384,
refcount = 3, ctor = 0x0, inuse = 512, align = 64, reserved = 0, red_left_pad = 0,
name = 0xffffff8748885847 <error: Cannot access memory at address 0xffffff8748885847>, list = {
next = 0xffffffc273003868, prev = 0xffffffc273003568}, kobj = {
name = 0xffffffc26fe0ff80 <error: Cannot access memory at address 0xffffffc26fe0ff80>, entry = {
next = 0xffffffc273003880, prev = 0xffffffc273003580}, parent = 0xffffffc26fd15518,
kset = 0xffffffc26fd15500, ktype = 0xffffff8748f0fdb0 <slab_ktype>, sd = 0xffffffc26fe20220, kref = {
refcount = {refs = {counter = 1}}}, state_initialized = 1, state_in_sysfs = 1,
state_add_uevent_sent = 1, state_remove_uevent_sent = 0, uevent_suppress = 0}, kobj_remove_work = {
data = {counter = 68719476704}, entry = {next = 0xffffffc273003740, prev = 0xffffffc273003740},
func = 0xffffff874785eac0 <sysfs_slab_remove_workfn>}, memcg_params = {root_cache = 0x0, {{
memcg_caches = 0x0, __root_caches_node = {next = 0xffffffc2730038e8, prev = 0xffffffc2730035e8},
children = {next = 0xffffffc273003778, prev = 0xffffffc273003778}}, {memcg = 0x0, children_node = {
next = 0xffffffc2730038e8, prev = 0xffffffc2730035e8}, kmem_caches_node = {
next = 0xffffffc273003778, prev = 0xffffffc273003778}, deact_fn = 0x0, {deact_rcu_head = {
next = 0x0, func = 0x0}, deact_work = {data = {counter = 0}, entry = {next = 0x0, prev = 0x0},
func = 0x0}}}}}, max_attr_size = 0, memcg_kset = 0x0, node = {0xffffffc273000ec0}}
2、object怎么变成了0x3e8
看bt,在第2帧后异常地址已经出现了,所以从f 3开始看为啥
(gdb) bt
#0 freelist_dereference (s=<optimized out>, ptr_addr=<optimized out>)
at /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/mm/slub.c:272
#1 get_freepointer (s=0xffffffc273003680, object=0x3e8)
at /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/mm/slub.c:278
#2 get_freepointer_safe (s=0xffffffc273003680, object=0x3e8)
at /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/mm/slub.c:292
#3 slab_alloc_node (s=0xffffffc273003680, gfpflags=22086336, node=-1, addr=<optimized out>)
at /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/mm/slub.c:2755
代码:object来自于c->freelist,c来自于raw_cpu_ptr(s->cpu_slab)
static __always_inline void *slab_alloc_node(struct kmem_cache *s,
gfp_t gfpflags, int node, unsigned long addr)
{
void *object;
struct kmem_cache_cpu *c;
struct page *page;
unsigned long tid;
s = slab_pre_alloc_hook(s, gfpflags);
if (!s)
return NULL;
redo:
do {
tid = this_cpu_read(s->cpu_slab->tid);
c = raw_cpu_ptr(s->cpu_slab); =》关键点
object = c->freelist;
page = c->page;
if (unlikely(!object || !node_match(page, node))) {
object = __slab_alloc(s, gfpflags, node, addr, c);
stat(s, ALLOC_SLOWPATH);
} else {
void *next_object = get_freepointer_safe(s, object); =》here
继续看kmem_cache里面成员:
(gdb) p {struct kmem_cache} 0xffffffc273003680
$3 = {cpu_slab = 0xffffff8748b3f720, flags = 1073741824, min_partial = 5, size = 512, object_size = 512,
offset = 0, cpu_partial = 13, oo = {x = 131104}, max = {x = 131104}, min = {x = 8}, allocflags = 16384,
refcount = 3, ctor = 0x0, inuse = 512, align = 64, reserved = 0, red_left_pad = 0,
name = 0xffffff8748885847 <error: Cannot access memory at address 0xffffff8748885847>, list = {
next = 0xffffffc273003868, prev = 0xffffffc273003568}, kobj = {
name = 0xffffffc26fe0ff80 <error: Cannot access memory at address 0xffffffc26fe0ff80>, entry = {
next = 0xffffffc273003880, prev = 0xffffffc273003580}, parent = 0xffffffc26fd15518,
kset = 0xffffffc26fd15500, ktype = 0xffffff8748f0fdb0 <slab_ktype>, sd = 0xffffffc26fe20220, kref = {
refcount = {refs = {counter = 1}}}, state_initialized = 1, state_in_sysfs = 1,
state_add_uevent_sent = 1, state_remove_uevent_sent = 0, uevent_suppress = 0}, kobj_remove_work = {
data = {counter = 68719476704}, entry = {next = 0xffffffc273003740, prev = 0xffffffc273003740},
func = 0xffffff874785eac0 <sysfs_slab_remove_workfn>}, memcg_params = {root_cache = 0x0, {{
memcg_caches = 0x0, __root_caches_node = {next = 0xffffffc2730038e8, prev = 0xffffffc2730035e8},
children = {next = 0xffffffc273003778, prev = 0xffffffc273003778}}, {memcg = 0x0, children_node = {
next = 0xffffffc2730038e8, prev = 0xffffffc2730035e8}, kmem_caches_node = {
next = 0xffffffc273003778, prev = 0xffffffc273003778}, deact_fn = 0x0, {deact_rcu_head = {
next = 0x0, func = 0x0}, deact_work = {data = {counter = 0}, entry = {next = 0x0, prev = 0x0},
func = 0x0}}}}}, max_attr_size = 0, memcg_kset = 0x0, node = {0xffffffc273000ec0}}
所以cpu_slab的地址是:0xffffff8748b3f720
看raw_cpu_ptr的实现:总结下ram_cpu_ptr(ptr)的实现就是将ptr + offset,这个offset是保存在一个全局数组里面__per_cpu_offset
#define raw_cpu_ptr(ptr) \
({ \
__verify_pcpu_ptr(ptr); \
arch_raw_cpu_ptr(ptr); \
})
include/asm-generic/percpu.h
#ifndef arch_raw_cpu_ptr
#define arch_raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
#endif
#define SHIFT_PERCPU_PTR(__p, __offset) \
RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset))
#define RELOC_HIDE(ptr, off) \
({ \
unsigned long __ptr; \
__asm__ ("" : "=r"(__ptr) : "0"(ptr)); \
(typeof(ptr)) (__ptr + (off)); \
})
可以打印出来这个值:
(gdb) p __per_cpu_offset =》一共8个cpu,看看最后死在哪个cpu上面,可以看到都是挂在cpu6上面
$6 = {254329384960, 254329475072, 254329565184, 254329655296, 254329745408, 254329835520, 254329925632,
254330015744}
[ 6.667734] <6>-(6)[443:init]Hardware name: MT6769V/CB (DT)
[ 6.668430] <6>-(6)[443:init]task: 00000000acd7970d task.stack: 00000000440264d0
[ 6.669357] <6>-(6)[443:init]pc : __kmalloc_track_caller+0xe8/0x348
[ 6.670139] <6>-(6)[443:init]lr : __kmalloc_track_caller+0x3c/0x348
[ 6.670921] <6>-(6)[443:init]sp : ffffff800fec3b20 pstate : 60400145
所以可以得到struct kmem_cache_cpu的首地址:
254329925632 + cpu_slab = 254329925632 + 0xffffff8748b3f720 = 0xFFFFFFC27FF2A720
(gdb) p {struct kmem_cache_cpu} 0xFFFFFFC27FF2A720
$7 = {freelist = 0xffffffc2706a0e00, tid = 10038, page = 0xffffffbf09c1a800, partial = 0x0}
可以看到freelist的地址是0xffffffc2706a0e00,即object的地址是0xffffffc2706a0e00,但是现在实际读出来的确是0x3e8
三、第二份KE分析
LAST_KMSG log:
[ 3.222593] <6>-(6)[335:kworker/u16:4]Unable to handle kernel NULL pointer dereference at virtual address 00000000
[ 3.259588] <6>-(6)[335:kworker/u16:4]CPU: 6 PID: 335 Comm: kworker/u16:4 Tainted: G S W 4.14.186-01762-g6ec3886ce531-dirty #1
[ 3.259590] <6>-(6)[335:kworker/u16:4]Hardware name: MT6769V/CB (DT)
[ 3.259594] <6>-(6)[335:kworker/u16:4]Workqueue: mtk-tpd tpd_init_work_callback
[ 3.259597] <6>-(6)[335:kworker/u16:4]task: 00000000877e4612 task.stack: 000000008b2d5d26
[ 3.259600] <6>-(6)[335:kworker/u16:4]pc : _raw_spin_lock_irq+0x18/0x48
[ 3.259603] <6>-(6)[335:kworker/u16:4]lr : flush_workqueue_prep_pwqs+0x94/0x1b8
[ 3.259605] <6>-(6)[335:kworker/u16:4]sp : ffffff800f8db820 pstate : 208000c5
[ 3.260388] <6>-(6)[335:kworker/u16:4]Hardware name: MT6769V/CB (DT)
[ 3.260391] <6>-(6)[335:kworker/u16:4]Workqueue: mtk-tpd tpd_init_work_callback
[ 3.260393] <6>-(6)[335:kworker/u16:4]Call trace:
[ 3.260395] <6>-(6)[335:kworker/u16:4] dump_backtrace+0x0/0x180
[ 3.260398] <6>-(6)[335:kworker/u16:4] show_stack+0x14/0x1c
[ 3.260400] <6>-(6)[335:kworker/u16:4] dump_stack+0xd4/0x10c
[ 3.260404] <6>-(6)[335:kworker/u16:4] mrdump_common_die+0xe8/0x174
[ 3.260406] <6>-(6)[335:kworker/u16:4] ipanic_die+0x20/0x2c
[ 3.260409] <6>-(6)[335:kworker/u16:4] notify_die+0x64/0xb4
[ 3.260411] <6>-(6)[335:kworker/u16:4] die+0x118/0x27c
[ 3.260414] <6>-(6)[335:kworker/u16:4] __do_kernel_fault+0x130/0x140
[ 3.260415] <6>-(6)[335:kworker/u16:4] do_page_fault+0x44/0x3f0
[ 3.260417] <6>-(6)[335:kworker/u16:4] do_translation_fault+0x44/0x98
[ 3.260419] <6>-(6)[335:kworker/u16:4] do_mem_abort+0x4c/0xd0
[ 3.260421] <6>-(6)[335:kworker/u16:4] el1_da+0x20/0x38
[ 3.260423] <6>-(6)[335:kworker/u16:4] _raw_spin_lock_irq+0x18/0x48
[ 3.260424] <6>-(6)[335:kworker/u16:4] flush_workqueue+0x22c/0x678
[ 3.260427] <6>-(6)[335:kworker/u16:4] ili_dev_remove+0x94/0xe0
[ 3.260428] <6>-(6)[335:kworker/u16:4] ilitek_plat_probe+0x110/0x124
[ 3.260431] <6>-(6)[335:kworker/u16:4] ilitek_spi_probe+0x25c/0x3c4
[ 3.260433] <6>-(6)[335:kworker/u16:4] spi_drv_probe+0x84/0xac
[ 3.260435] <6>-(6)[335:kworker/u16:4] driver_probe_device+0x474/0x4c4
[ 3.260437] <6>-(6)[335:kworker/u16:4] __driver_attach+0xd0/0x100
[ 3.260439] <6>-(6)[335:kworker/u16:4] bus_for_each_dev+0x80/0xc8
[ 3.260440] <6>-(6)[335:kworker/u16:4] driver_attach+0x20/0x28
[ 3.260442] <6>-(6)[335:kworker/u16:4] bus_add_driver+0x11c/0x1e4
[ 3.260444] <6>-(6)[335:kworker/u16:4] driver_register+0xa4/0xf0
[ 3.260446] <6>-(6)[335:kworker/u16:4] __spi_register_driver+0x58/0x60
[ 3.260448] <6>-(6)[335:kworker/u16:4] ili_interface_dev_init+0x94/0xe8
[ 3.260450] <6>-(6)[335:kworker/u16:4] ili_dev_init+0x4c/0x58
[ 3.260452] <6>-(6)[335:kworker/u16:4] tpd_local_init+0x2c/0xbc
[ 3.260454] <6>-(6)[335:kworker/u16:4] tpd_probe+0x20c/0x5fc
[ 3.260456] <6>-(6)[335:kworker/u16:4] platform_drv_probe+0x64/0xb8
[ 3.260458] <6>-(6)[335:kworker/u16:4] driver_probe_device+0x474/0x4c4
[ 3.260460] <6>-(6)[335:kworker/u16:4] __driver_attach+0xd0/0x100
[ 3.260462] <6>-(6)[335:kworker/u16:4] bus_for_each_dev+0x80/0xc8
[ 3.260464] <6>-(6)[335:kworker/u16:4] driver_attach+0x20/0x28
[ 3.260465] <6>-(6)[335:kworker/u16:4] bus_add_driver+0x11c/0x1e4
[ 3.260467] <6>-(6)[335:kworker/u16:4] driver_register+0xa4/0xf0
[ 3.260470] <6>-(6)[335:kworker/u16:4] __platform_driver_register+0x40/0x48
[ 3.260472] <6>-(6)[335:kworker/u16:4] tpd_init_work_callback+0x30/0x54
[ 3.260473] <6>-(6)[335:kworker/u16:4] process_one_work+0x278/0x4d8
[ 3.260475] <6>-(6)[335:kworker/u16:4] worker_thread+0x2c8/0x568
[ 3.260477] <6>-(6)[335:kworker/u16:4] kthread+0x17c/0x18c
[ 3.260479] <6>-(6)[335:kworker/u16:4] ret_from_fork+0x10/0x18
操作步骤类似第一例,不作详细展开,直接贴过程
(gdb) disas
Dump of assembler code for function _raw_spin_lock_irq:
0xffffff91c8b81c30 <+0>: msr daifset, #0x2
0xffffff91c8b81c34 <+4>: mrs x8, sp_el0
0xffffff91c8b81c38 <+8>: ldr w9, [x8,#24]
0xffffff91c8b81c3c <+12>: add w9, w9, #0x1
0xffffff91c8b81c40 <+16>: str w9, [x8,#24]
0xffffff91c8b81c44 <+20>: prfm pstl1strm, [x0]
=> 0xffffff91c8b81c48 <+24>: ldaxr w8, [x0] =》here, 仿问x0寄存器地址存放的内存内容,到w8出错
0xffffff91c8b81c4c <+28>: add w9, w8, #0x10, lsl #12
0xffffff91c8b81c50 <+32>: stxr w10, w9, [x0]
0xffffff91c8b81c54 <+36>: cbnz w10, 0xffffff91c8b81c48 <_raw_spin_lock_irq+24>
0xffffff91c8b81c58 <+40>: eor w9, w8, w8, ror #16
0xffffff91c8b81c5c <+44>: cbz w9, 0xffffff91c8b81c74 <_raw_spin_lock_irq+68>
0xffffff91c8b81c60 <+48>: sevl
0xffffff91c8b81c64 <+52>: wfe
0xffffff91c8b81c68 <+56>: ldaxrh w10, [x0]
0xffffff91c8b81c6c <+60>: eor w9, w10, w8, lsr #16
0xffffff91c8b81c70 <+64>: cbnz w9, 0xffffff91c8b81c64 <_raw_spin_lock_irq+52>
0xffffff91c8b81c74 <+68>: ret
查看寄存器内容:
(gdb) i reg
x0 0x0 0 =》确实已经是空
x1 0x2 2
x2 0x3 3
x3 0x0 0
x4 0x1 1
x5 0x0 0
x6 0xffffff91c9890013 -473360170989
x7 0x8 8
x8 0xffffffe8ac818000 -100185047040
(gdb) f 0
#0 0xffffff91c8b81c48 in arch_spin_lock (lock=<optimized out>)
at /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/arch/arm64/include/asm/spinlock.h:37
37 in /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/arch/arm64/include/asm/spinlock.h
对应的代码:由
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned int tmp;
arch_spinlock_t lockval, newval;
asm volatile( =》死在汇编里面
/* Atomically increment the next ticket. */
ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" prfm pstl1strm, %3\n" =》预期指令,主要是将*lock的内容放到L1 cache里面,因为lock已经是空了,所以造成空指针访问异常,所以需要追查为啥lock为空
"1: ldaxr %w0, %3\n" =》 从%3地址中读取双字到w0中,标记物理地址为独占访问
" add %w1, %w0, %w5\n"
" stxr %w2, %w1, %3\n"
" cbnz %w2, 1b\n",
/* LSE atomics */
" mov %w2, %w5\n"
" ldadda %w2, %w0, %3\n"
__nops(3)
)
/* Did we get the lock? */
" eor %w1, %w0, %w0, ror #16\n"
" cbz %w1, 3f\n"
/*
* No: spin on the owner. Send a local event to avoid missing an
* unlock before the exclusive load.
*/
" sevl\n"
"2: wfe\n"
" ldaxrh %w2, %4\n"
" eor %w1, %w2, %w0, lsr #16\n"
" cbnz %w1, 2b\n"
/* We got the lock. Critical section starts here. */
"3:"
: "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
: "Q" (lock->owner), "I" (1 << TICKET_SHIFT)
: "memory");
}
继续向上追查:
(gdb) f 1
#1 do_raw_spin_lock (lock=<optimized out>)
at /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/include/linux/spinlock.h:166
166 /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/include/linux/spinlock.h: No such file or directory.
(gdb) f 2
#2 __raw_spin_lock_irq (lock=0x0) =》此时已经为空了
at /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/include/linux/spinlock_api_smp.h:129
129 /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/include/linux/spinlock_api_smp.h: No such file or directory.
(gdb) f 3
#3 _raw_spin_lock_irq (lock=0x0)
at /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/kernel/locking/spinlock.c:168
168 /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/kernel/locking/spinlock.c: No such file or directory.
(gdb) f 4
#4 0xffffff91c7cd41d8 in spin_lock_irq (lock=0x0)
at /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/include/linux/spinlock.h:342
342 /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/include/linux/spinlock.h: No such file or directory.
(gdb) f 5
#5 flush_workqueue_prep_pwqs (wq=0xffffffe8ac104200, flush_color=2, work_color=3)
at /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/kernel/workqueue.c:2615
2615 /work/buildsrv-ci/workspace/BAK_PY_UNIFIED_VERSION_BUILD_DOCKER/code/kernel-4.14/kernel/workqueue.c: No such file or directory.
对应代码:
static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
int flush_color, int work_color)
{
bool wait = false;
struct pool_workqueue *pwq;
if (flush_color >= 0) {
WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
atomic_set(&wq->nr_pwqs_to_flush, 1);
}
for_each_pwq(pwq, wq) {
struct worker_pool *pool = pwq->pool;
spin_lock_irq(&pool->lock); =》here
(gdb) info args
wq = 0xffffffe8ac104200
flush_color = 2
work_color = 3
(gdb) p {struct workqueue_struct} 0xffffffe8ac104200
$4 = {pwqs = {next = 0xffffffe8ac104000, prev = 0xffffffbebff50370}, list = {
next = 0xffffffe8ac104410, prev = 0xdead000000000200}, mutex = {owner = {
counter = -100185047040}, wait_lock = {{rlock = {raw_lock = {owner = 0, next = 0}}}}, osq = {
tail = {counter = 0}}, wait_list = {next = 0xffffffe8ac104230, prev = 0xffffffe8ac104230}},
work_color = 3, flush_color = 2, nr_pwqs_to_flush = {counter = 1},
first_flusher = 0xffffff800f8db890, flusher_queue = {next = 0xffffffe8ac104258,
prev = 0xffffffe8ac104258}, flusher_overflow = {next = 0xffffffe8ac104268,
prev = 0xffffffe8ac104268}, maydays = {next = 0xffffffe8ac104278, prev = 0xffffffe8ac104278},
rescuer = 0x0, nr_drainers = 0, saved_max_active = 256, unbound_attrs = 0x0, dfl_pwq = 0x0, =>已经为空了
wq_dev = 0x0, name = "bat_check", '\000' <repeats 14 times>, rcu = {next = 0x0,
func = 0xffffff91c7cd672c <rcu_free_wq>}, flags = 8, cpu_pwqs = 0xffffff67c91e9300,
numa_pwq_tbl = 0xffffffe8ac104310}
(gdb) info locals
pool = 0x0 =》已经为空了
__warned = false
__warned = false
__warned = false
wait = <optimized out>
pwq = 0xffffffe8ac103f90 =》但是这里还是有效地址,所以整体已经存在矛盾
(gdb) p {struct pool_workqueue} 0xffffffe8ac103f90
$5 = {pool = 0x0, wq = 0x0, work_color = 0, flush_color = 0, refcnt = 0, nr_in_flight = {0, 0, 0, 0,
0, 0, 0, 8388613, 0, 0, 0, 0, 0, 0, 0}, nr_active = 0, max_active = 0, delayed_works = {
next = 0x0, prev = 0x0}, pwqs_node = {next = 0x0, prev = 0xffffffbebff50270}, mayday_node = {
next = 0xffffffe8ac104210, prev = 0xdead000000000200}, unbound_release_work = {data = {
counter = 0}, entry = {next = 0x0, prev = 0xffffffe8ac104030}, func = 0xffffffe8ac104030},
rcu = {next = 0x400000004, func = 0x0}}
再来梳理一下:
struct workqueue_struct *wq 里面的成员struct pool_workqueue *dfl_pwq已经为空,而pwq是通过for_each_pwq(pwq, wq)遍历获取到,因为
dfl_pwq为空,所以获取到的pwq应该也是空,但是实际又是有效地址,但是这个有效地址的成员pool又是为空,导致spin_lock去访问这个pool里面的lock变量存在问题
四、总结
综合以上两例分析情况,基本上可以确认此问题会单机,其实有过相关经验的直接可以下此结论,软件上的推导不过是为了佐证判断,但是对于初学者此类技能需要掌握