The problem symptoms:
"
[ 298.884379] BUG: kernel NULL pointer dereference, address: 0000000000000060
[ 298.884380] #PF: supervisor read access in kernel mode
[ 298.884381] #PF: error_code(0x0000) - not-present page
[ 298.884382] PGD 0 P4D 0
[ 298.884386] Oops: 0000 [#1] SMP PTI
[ 298.884389] CPU: 0 PID: 5 Comm: kworker/0:0 Kdump: loaded Tainted: G S OE 5.9.16+ #16
[ 298.884390] Hardware name: Intel Corporation S2600GZ/S2600GZ, BIOS SE5C600.86B.02.02.0002.122320131210 12/23/2013
[ 298.884397] Workqueue: events work_for_cpu_fn
[ 298.884403] RIP: 0010:bus_add_device.cold.8+0x8c/0x11e
[ 298.884406] Code: 00 00 00 48 83 c7 18 e8 b6 40 b6 ff e9 f7 90 ca ff 4c 8b 45 00 eb 90 48 8b 55 50 48 85 d2 74 2c 48 8b 83 a0 00 00 00 48 89 ee <48> 8b 78 60 48 83 c7 18 e8 9b c5 a5 ff 41 89 c4 85 c0 74 14 48 8b
[ 298.884407] RSP: 0018:ffffbf71800fbd90 EFLAGS: 00010246
[ 298.884409] RAX: 0000000000000000 RBX: ffffffffc077f120 RCX: 0000000000000027
[ 298.884410] RDX: ffff9b6415e24f70 RSI: ffff9b641f3f4028 RDI: ffff9b642ea18008
[ 298.884411] RBP: ffff9b641f3f4028 R08: 0000000000011a86 R09: 0000000000000047
[ 298.884413] R10: 0000000000000000 R11: ffffbf71800fbc08 R12: 0000000000000000
[ 298.884414] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[ 298.884415] FS: 0000000000000000(0000) GS:ffff9b642ea00000(0000) knlGS:0000000000000000
[ 298.884417] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 298.884418] CR2: 0000000000000060 CR3: 000000041883a006 CR4: 00000000000606f0
[ 298.884419] Call Trace:
[ 298.884425] device_add.cold.48+0x3d6/0x6bb
[ 298.884445] ice_probe+0xa8a/0x1080 [ice]
[ 298.884451] local_pci_probe+0x42/0x80
[ 298.884453] work_for_cpu_fn+0x16/0x20
[ 298.884456] process_one_work+0x1a7/0x370
[ 298.884458] worker_thread+0x1c9/0x370
[ 298.884460] ? process_one_work+0x370/0x370
[ 298.884462] kthread+0x116/0x130
[ 298.884464] ? kthread_park+0x80/0x80
[ 298.884467] ret_from_fork+0x22/0x30
[ 298.884469] Modules linked in: ice(OE+) xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_nat_tftp nft_objref nf_conntrack_tftp nft_counter tun bridge stp llc nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat rfkill nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip6_tables nft_compat ip_set nf_tables nfnetlink sunrpc ext4 mbcache jbd2 intel_rapl_msr intel_rapl_common sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul iTCO_wdt ghash_clmulni_intel iTCO_vendor_support rapl intel_cstate mgag200 drm_kms_helper ipmi_si syscopyarea sysfillrect sysimgblt fb_sys_fops ipmi_devintf drm intel_uncore joydev pcspkr ipmi_msghandler mei_me i2c_i801 mei lpc_ich i2c_smbus ioatdma ip_tables xfs libcrc32c sd_mod t10_pi sg ahci libahci libata igb crc32c_intel i2c_algo_bit dca wmi dm_mirror dm_region_hash dm_log dm_mod fuse [last unloaded: ice]
[ 298.884511] CR2: 0000000000000060
"
crash> whatis device_add
<text variable, no debug info> device_add;
crash> whatis bus_add_device
int bus_add_device(struct device *);
Since the "[ 298.884403] RIP: 0010:bus_add_device.cold.8+0x8c/0x11e" exists in dmesg, so beginning with this:
crash> dis -lr bus_add_device.cold.8+0x8c
/images/yzhu/stable-linux/./include/linux/device.h: 666
0xffffffffb8d5fc8c <bus_add_device.cold.8>: mov 0x50(%rbp),%r8
0xffffffffb8d5fc90 <bus_add_device.cold.8+4>: test %r8,%r8
0xffffffffb8d5fc93 <bus_add_device.cold.8+7>: je 0xffffffffb8d5fcff <bus_add_device.cold.8+115>
0xffffffffb8d5fc95 <bus_add_device.cold.8+9>: mov (%rbx),%rcx
0xffffffffb8d5fc98 <bus_add_device.cold.8+12>: mov $0x1c3,%edx
0xffffffffb8d5fc9d <bus_add_device.cold.8+17>: mov $0xffffffffb956a4b7,%rsi
0xffffffffb8d5fca4 <bus_add_device.cold.8+24>: mov $0xffffffffb956a530,%rdi
0xffffffffb8d5fcab <bus_add_device.cold.8+31>: callq 0xffffffffb8d3fbd8 <printk>
/images/yzhu/stable-linux/drivers/base/bus.c: 452 <-------------------------------------------------------This line calls another functions, prepare the input parameters.
0xffffffffb8d5fcb0 <bus_add_device.cold.8+36>: mov 0x20(%rbx),%rsi <-----------------------------input parameter
0xffffffffb8d5fcb4 <bus_add_device.cold.8+40>: mov %rbp,%rdi <-----------------------------input parameter
0xffffffffb8d5fcb7 <bus_add_device.cold.8+43>: callq 0xffffffffb8d5ee56 <device_add_groups>
0xffffffffb8d5fcbc <bus_add_device.cold.8+48>: mov %eax,%r12d
/images/yzhu/stable-linux/drivers/base/bus.c: 453
0xffffffffb8d5fcbf <bus_add_device.cold.8+51>: test %eax,%eax
0xffffffffb8d5fcc1 <bus_add_device.cold.8+53>: je 0xffffffffb8d5fd05 <bus_add_device.cold.8+121>
/images/yzhu/stable-linux/drivers/base/bus.c: 454
0xffffffffb8d5fcc3 <bus_add_device.cold.8+55>: mov %eax,%ecx
0xffffffffb8d5fcc5 <bus_add_device.cold.8+57>: mov $0x1c6,%edx
0xffffffffb8d5fcca <bus_add_device.cold.8+62>: mov $0xffffffffb956a4b7,%rsi
0xffffffffb8d5fcd1 <bus_add_device.cold.8+69>: mov $0xffffffffb9521507,%rdi
0xffffffffb8d5fcd8 <bus_add_device.cold.8+76>: callq 0xffffffffb8d3fbd8 <printk>
/images/yzhu/stable-linux/drivers/base/bus.c: 474
0xffffffffb8d5fcdd <bus_add_device.cold.8+81>: mov 0x60(%rbp),%rax
/images/yzhu/stable-linux/drivers/base/bus.c: 53
0xffffffffb8d5fce1 <bus_add_device.cold.8+85>: test %rax,%rax
0xffffffffb8d5fce4 <bus_add_device.cold.8+88>: je 0xffffffffb8a08df6 <bus_add_device+54>
/images/yzhu/stable-linux/./include/linux/kobject.h: 218
0xffffffffb8d5fcea <bus_add_device.cold.8+94>: mov 0xa0(%rax),%rdi
0xffffffffb8d5fcf1 <bus_add_device.cold.8+101>: add $0x18,%rdi
0xffffffffb8d5fcf5 <bus_add_device.cold.8+105>: callq 0xffffffffb88c3db0 <kobject_put>
0xffffffffb8d5fcfa <bus_add_device.cold.8+110>: jmpq 0xffffffffb8a08df6 <bus_add_device+54>
/images/yzhu/stable-linux/./include/linux/device.h: 669
0xffffffffb8d5fcff <bus_add_device.cold.8+115>: mov 0x0(%rbp),%r8
/images/yzhu/stable-linux/./include/linux/kobject.h: 90
0xffffffffb8d5fd03 <bus_add_device.cold.8+119>: jmp 0xffffffffb8d5fc95 <bus_add_device.cold.8+9>
/images/yzhu/stable-linux/./include/linux/device.h: 666
0xffffffffb8d5fd05 <bus_add_device.cold.8+121>: mov 0x50(%rbp),%rdx
0xffffffffb8d5fd09 <bus_add_device.cold.8+125>: test %rdx,%rdx
0xffffffffb8d5fd0c <bus_add_device.cold.8+128>: je 0xffffffffb8d5fd3a <bus_add_device.cold.8+174>
0xffffffffb8d5fd0e <bus_add_device.cold.8+130>: mov 0xa0(%rbx),%rax
0xffffffffb8d5fd15 <bus_add_device.cold.8+137>: mov %rbp,%rsi
0xffffffffb8d5fd18 <bus_add_device.cold.8+140>: mov 0x60(%rax),%rdi
crash> dis -s bus_add_device
FILE: drivers/base/bus.c
LINE: 446
441 * - Add device's bus attributes.
442 * - Create links to device's bus.
443 * - Add the device to its bus's list of devices.
444 */
445 int bus_add_device(struct device *dev)
* 446 {
447 struct bus_type *bus = bus_get(dev->bus);
448 int error = 0;
449
450 if (bus) {
451 pr_info("file: %s +%d, bus: '%s': add device %s\n", __FILE__, __LINE__, bus->name, dev_name(dev));
452 error = device_add_groups(dev, bus->dev_groups); <-----------------------------------------This function is interesting
453 if (error) {
454 pr_info("file: %s +%d, error:%d\n", __FILE__, __LINE__, error);
455 goto out_put;
456 }
crash> whatis device_add_groups
int device_add_groups(struct device *, const struct attribute_group **);
crash> struct device ffff9b641f3f4028
struct device {
kobj = {
name = 0xffff9b6415e24f70 "ice_peer_0",
entry = {
next = 0xffff9b6104cdbde0,
prev = 0xffff9b640f993518
},
parent = 0xffff9b641ee0f0b0,
kset = 0xffff9b6104cdbde0,
ktype = 0xffffffffb995aa20,
sd = 0xffff9b642a1c5880,
kref = {
refcount = {
refs = {
counter = 2
}
}
},
state_initialized = 1,
state_in_sysfs = 1,
state_add_uevent_sent = 0,
state_remove_uevent_sent = 0,
uevent_suppress = 0
},
parent = 0xffff9b641ee0f0b0,
p = 0xffff9b641765a000,
init_name = 0x0,
type = 0x0,
bus = 0xffffffffc077f120 <ice_peer_bus>, <---------------This is bus type
driver = 0x0,
platform_data = 0x0,
driver_data = 0x0,
mutex = {
owner = {
counter = 0
},
wait_lock = {
{
rlock = {
raw_lock = {
{
val = {
counter = 0
},
{
locked = 0 '\000',
pending = 0 '\000'
},
{
locked_pending = 0,
tail = 0
}
}
}
crash> struct bus_type 0xffffffffc077f120
struct bus_type {
name = 0xffffffffc076872e "ice_pseudo_bus",
dev_name = 0x0,
dev_root = 0x0,
bus_groups = 0x0,
dev_groups = 0x0, <----------------------------this dev_groups is NULL
drv_groups = 0x0,
match = 0xffffffffc071d0b0 <ice_bus_match>,
uevent = 0x0,
probe = 0xffffffffc071f300 <ice_bus_probe>,
sync_state = 0x0,
remove = 0xffffffffc071f360 <ice_bus_remove>,
shutdown = 0x0,
online = 0x0,
offline = 0x0,
suspend = 0x0,
resume = 0x0,
num_vf = 0x0,
dma_configure = 0x0,
pm = 0x0,
iommu_ops = 0x0,
p = 0x0,
lock_key = {<No data fields>},
need_parent_lock = false
}
crash> eval 0xffffffffc077f120 + 0x20
hexadecimal: ffffffffc077f140
decimal: 18446744072643670336 (-1065881280)
octal: 1777777777770035770500
binary: 1111111111111111111111111111111111000000011101111111000101000000
crash> whatis bus_add_device
int bus_add_device(struct device *);
crash> whatis device_add_groups
int device_add_groups(struct device *, const struct attribute_group **);
crash> struct bus_type -o
struct bus_type {
[0] const char *name;
[8] const char *dev_name;
[16] struct device *dev_root;
[24] const struct attribute_group **bus_groups;
[32] const struct attribute_group **dev_groups;
[40] const struct attribute_group **drv_groups;
[48] int (*match)(struct device *, struct device_driver *);
[56] int (*uevent)(struct device *, struct kobj_uevent_env *);
[64] int (*probe)(struct device *);
[72] void (*sync_state)(struct device *);
[80] int (*remove)(struct device *);
[88] void (*shutdown)(struct device *);
[96] int (*online)(struct device *);
[104] int (*offline)(struct device *);
[112] int (*suspend)(struct device *, pm_message_t);
[120] int (*resume)(struct device *);
[128] int (*num_vf)(struct device *);
[136] int (*dma_configure)(struct device *);
[144] const struct dev_pm_ops *pm;
[152] const struct iommu_ops *iommu_ops;
[160] struct subsys_private *p;
[168] struct lock_class_key lock_key;
[168] bool need_parent_lock;
}
SIZE: 176
Now from vmcore, dev_groups should not be NULL. We will check why from the ice source code.