Crash to analyze the vmcore to find out root cause of NULL deference

The problem symptoms:
"
[  298.884379] BUG: kernel NULL pointer dereference, address: 0000000000000060
[  298.884380] #PF: supervisor read access in kernel mode
[  298.884381] #PF: error_code(0x0000) - not-present page
[  298.884382] PGD 0 P4D 0
[  298.884386] Oops: 0000 [#1] SMP PTI
[  298.884389] CPU: 0 PID: 5 Comm: kworker/0:0 Kdump: loaded Tainted: G S         OE     5.9.16+ #16
[  298.884390] Hardware name: Intel Corporation S2600GZ/S2600GZ, BIOS SE5C600.86B.02.02.0002.122320131210 12/23/2013
[  298.884397] Workqueue: events work_for_cpu_fn
[  298.884403] RIP: 0010:bus_add_device.cold.8+0x8c/0x11e
[  298.884406] Code: 00 00 00 48 83 c7 18 e8 b6 40 b6 ff e9 f7 90 ca ff 4c 8b 45 00 eb 90 48 8b 55 50 48 85 d2 74 2c 48 8b 83 a0 00 00 00 48 89 ee <48> 8b 78 60 48 83 c7 18 e8 9b c5 a5 ff 41 89 c4 85 c0 74 14 48 8b
[  298.884407] RSP: 0018:ffffbf71800fbd90 EFLAGS: 00010246
[  298.884409] RAX: 0000000000000000 RBX: ffffffffc077f120 RCX: 0000000000000027
[  298.884410] RDX: ffff9b6415e24f70 RSI: ffff9b641f3f4028 RDI: ffff9b642ea18008
[  298.884411] RBP: ffff9b641f3f4028 R08: 0000000000011a86 R09: 0000000000000047
[  298.884413] R10: 0000000000000000 R11: ffffbf71800fbc08 R12: 0000000000000000
[  298.884414] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[  298.884415] FS:  0000000000000000(0000) GS:ffff9b642ea00000(0000) knlGS:0000000000000000
[  298.884417] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  298.884418] CR2: 0000000000000060 CR3: 000000041883a006 CR4: 00000000000606f0
[  298.884419] Call Trace:
[  298.884425]  device_add.cold.48+0x3d6/0x6bb
[  298.884445]  ice_probe+0xa8a/0x1080 [ice]
[  298.884451]  local_pci_probe+0x42/0x80
[  298.884453]  work_for_cpu_fn+0x16/0x20
[  298.884456]  process_one_work+0x1a7/0x370
[  298.884458]  worker_thread+0x1c9/0x370
[  298.884460]  ? process_one_work+0x370/0x370
[  298.884462]  kthread+0x116/0x130
[  298.884464]  ? kthread_park+0x80/0x80
[  298.884467]  ret_from_fork+0x22/0x30
[  298.884469] Modules linked in: ice(OE+) xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_nat_tftp nft_objref nf_conntrack_tftp nft_counter tun bridge stp llc nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat rfkill nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip6_tables nft_compat ip_set nf_tables nfnetlink sunrpc ext4 mbcache jbd2 intel_rapl_msr intel_rapl_common sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul iTCO_wdt ghash_clmulni_intel iTCO_vendor_support rapl intel_cstate mgag200 drm_kms_helper ipmi_si syscopyarea sysfillrect sysimgblt fb_sys_fops ipmi_devintf drm intel_uncore joydev pcspkr ipmi_msghandler mei_me i2c_i801 mei lpc_ich i2c_smbus ioatdma ip_tables xfs libcrc32c sd_mod t10_pi sg ahci libahci libata igb crc32c_intel i2c_algo_bit dca wmi dm_mirror dm_region_hash dm_log dm_mod fuse [last unloaded: ice]
[  298.884511] CR2: 0000000000000060
"

crash> whatis device_add
<text variable, no debug info> device_add;
crash> whatis bus_add_device
int bus_add_device(struct device *);

Since the "[  298.884403] RIP: 0010:bus_add_device.cold.8+0x8c/0x11e" exists in dmesg, so beginning with this:

crash> dis -lr bus_add_device.cold.8+0x8c
/images/yzhu/stable-linux/./include/linux/device.h: 666
0xffffffffb8d5fc8c <bus_add_device.cold.8>:     mov    0x50(%rbp),%r8
0xffffffffb8d5fc90 <bus_add_device.cold.8+4>:   test   %r8,%r8
0xffffffffb8d5fc93 <bus_add_device.cold.8+7>:   je     0xffffffffb8d5fcff <bus_add_device.cold.8+115>
0xffffffffb8d5fc95 <bus_add_device.cold.8+9>:   mov    (%rbx),%rcx
0xffffffffb8d5fc98 <bus_add_device.cold.8+12>:  mov    $0x1c3,%edx
0xffffffffb8d5fc9d <bus_add_device.cold.8+17>:  mov    $0xffffffffb956a4b7,%rsi
0xffffffffb8d5fca4 <bus_add_device.cold.8+24>:  mov    $0xffffffffb956a530,%rdi
0xffffffffb8d5fcab <bus_add_device.cold.8+31>:  callq  0xffffffffb8d3fbd8 <printk>
/images/yzhu/stable-linux/drivers/base/bus.c: 452 <-------------------------------------------------------This line calls another functions, prepare the input parameters.
0xffffffffb8d5fcb0 <bus_add_device.cold.8+36>:  mov    0x20(%rbx),%rsi      <-----------------------------input parameter
0xffffffffb8d5fcb4 <bus_add_device.cold.8+40>:  mov    %rbp,%rdi            <-----------------------------input parameter
0xffffffffb8d5fcb7 <bus_add_device.cold.8+43>:  callq  0xffffffffb8d5ee56 <device_add_groups>
0xffffffffb8d5fcbc <bus_add_device.cold.8+48>:  mov    %eax,%r12d
/images/yzhu/stable-linux/drivers/base/bus.c: 453
0xffffffffb8d5fcbf <bus_add_device.cold.8+51>:  test   %eax,%eax
0xffffffffb8d5fcc1 <bus_add_device.cold.8+53>:  je     0xffffffffb8d5fd05 <bus_add_device.cold.8+121>
/images/yzhu/stable-linux/drivers/base/bus.c: 454
0xffffffffb8d5fcc3 <bus_add_device.cold.8+55>:  mov    %eax,%ecx
0xffffffffb8d5fcc5 <bus_add_device.cold.8+57>:  mov    $0x1c6,%edx
0xffffffffb8d5fcca <bus_add_device.cold.8+62>:  mov    $0xffffffffb956a4b7,%rsi
0xffffffffb8d5fcd1 <bus_add_device.cold.8+69>:  mov    $0xffffffffb9521507,%rdi
0xffffffffb8d5fcd8 <bus_add_device.cold.8+76>:  callq  0xffffffffb8d3fbd8 <printk>
/images/yzhu/stable-linux/drivers/base/bus.c: 474
0xffffffffb8d5fcdd <bus_add_device.cold.8+81>:  mov    0x60(%rbp),%rax
/images/yzhu/stable-linux/drivers/base/bus.c: 53
0xffffffffb8d5fce1 <bus_add_device.cold.8+85>:  test   %rax,%rax
0xffffffffb8d5fce4 <bus_add_device.cold.8+88>:  je     0xffffffffb8a08df6 <bus_add_device+54>
/images/yzhu/stable-linux/./include/linux/kobject.h: 218
0xffffffffb8d5fcea <bus_add_device.cold.8+94>:  mov    0xa0(%rax),%rdi
0xffffffffb8d5fcf1 <bus_add_device.cold.8+101>: add    $0x18,%rdi
0xffffffffb8d5fcf5 <bus_add_device.cold.8+105>: callq  0xffffffffb88c3db0 <kobject_put>
0xffffffffb8d5fcfa <bus_add_device.cold.8+110>: jmpq   0xffffffffb8a08df6 <bus_add_device+54>
/images/yzhu/stable-linux/./include/linux/device.h: 669
0xffffffffb8d5fcff <bus_add_device.cold.8+115>: mov    0x0(%rbp),%r8
/images/yzhu/stable-linux/./include/linux/kobject.h: 90
0xffffffffb8d5fd03 <bus_add_device.cold.8+119>: jmp    0xffffffffb8d5fc95 <bus_add_device.cold.8+9>
/images/yzhu/stable-linux/./include/linux/device.h: 666
0xffffffffb8d5fd05 <bus_add_device.cold.8+121>: mov    0x50(%rbp),%rdx
0xffffffffb8d5fd09 <bus_add_device.cold.8+125>: test   %rdx,%rdx
0xffffffffb8d5fd0c <bus_add_device.cold.8+128>: je     0xffffffffb8d5fd3a <bus_add_device.cold.8+174>
0xffffffffb8d5fd0e <bus_add_device.cold.8+130>: mov    0xa0(%rbx),%rax
0xffffffffb8d5fd15 <bus_add_device.cold.8+137>: mov    %rbp,%rsi
0xffffffffb8d5fd18 <bus_add_device.cold.8+140>: mov    0x60(%rax),%rdi

crash> dis -s bus_add_device
FILE: drivers/base/bus.c
LINE: 446

  441    * - Add device's bus attributes.
  442    * - Create links to device's bus.
  443    * - Add the device to its bus's list of devices.
  444    */
  445   int bus_add_device(struct device *dev)
* 446   {
  447           struct bus_type *bus = bus_get(dev->bus);
  448           int error = 0;
  449
  450           if (bus) {
  451                   pr_info("file: %s +%d, bus: '%s': add device %s\n", __FILE__, __LINE__, bus->name, dev_name(dev));
  452                   error = device_add_groups(dev, bus->dev_groups);                 <-----------------------------------------This function is interesting
  453                   if (error) {
  454                           pr_info("file: %s +%d, error:%d\n", __FILE__, __LINE__, error);
  455                           goto out_put;
  456                   }

crash> whatis device_add_groups
int device_add_groups(struct device *, const struct attribute_group **);

crash> struct device ffff9b641f3f4028
struct device {
  kobj = {
    name = 0xffff9b6415e24f70 "ice_peer_0",
    entry = {
      next = 0xffff9b6104cdbde0,
      prev = 0xffff9b640f993518
    },
    parent = 0xffff9b641ee0f0b0,
    kset = 0xffff9b6104cdbde0,
    ktype = 0xffffffffb995aa20,
    sd = 0xffff9b642a1c5880,
    kref = {
      refcount = {
        refs = {
          counter = 2
        }
      }
    },
    state_initialized = 1,
    state_in_sysfs = 1,
    state_add_uevent_sent = 0,
    state_remove_uevent_sent = 0,
    uevent_suppress = 0
  },
  parent = 0xffff9b641ee0f0b0,
  p = 0xffff9b641765a000,
  init_name = 0x0,
  type = 0x0,
  bus = 0xffffffffc077f120 <ice_peer_bus>,      <---------------This is bus type
  driver = 0x0,
  platform_data = 0x0,
  driver_data = 0x0,
  mutex = {
    owner = {
      counter = 0
    },
    wait_lock = {
      {
        rlock = {
          raw_lock = {
            {
              val = {
                counter = 0
              },
              {
                locked = 0 '\000',
                pending = 0 '\000'
              },
              {
                locked_pending = 0,
                tail = 0
              }
            }
          }

crash> struct bus_type 0xffffffffc077f120
struct bus_type {
  name = 0xffffffffc076872e "ice_pseudo_bus",
  dev_name = 0x0,
  dev_root = 0x0,
  bus_groups = 0x0,
  dev_groups = 0x0,    <----------------------------this dev_groups is NULL
  drv_groups = 0x0,
  match = 0xffffffffc071d0b0 <ice_bus_match>,
  uevent = 0x0,
  probe = 0xffffffffc071f300 <ice_bus_probe>,
  sync_state = 0x0,
  remove = 0xffffffffc071f360 <ice_bus_remove>,
  shutdown = 0x0,
  online = 0x0,
  offline = 0x0,
  suspend = 0x0,
  resume = 0x0,
  num_vf = 0x0,
  dma_configure = 0x0,
  pm = 0x0,
  iommu_ops = 0x0,
  p = 0x0,
  lock_key = {<No data fields>},
  need_parent_lock = false
}
crash> eval 0xffffffffc077f120 + 0x20
hexadecimal: ffffffffc077f140
    decimal: 18446744072643670336  (-1065881280)
      octal: 1777777777770035770500
     binary: 1111111111111111111111111111111111000000011101111111000101000000
crash> whatis bus_add_device
int bus_add_device(struct device *);
crash> whatis device_add_groups
int device_add_groups(struct device *, const struct attribute_group **);
crash> struct bus_type -o
struct bus_type {
    [0] const char *name;
    [8] const char *dev_name;
   [16] struct device *dev_root;
   [24] const struct attribute_group **bus_groups;
   [32] const struct attribute_group **dev_groups;
   [40] const struct attribute_group **drv_groups;
   [48] int (*match)(struct device *, struct device_driver *);
   [56] int (*uevent)(struct device *, struct kobj_uevent_env *);
   [64] int (*probe)(struct device *);
   [72] void (*sync_state)(struct device *);
   [80] int (*remove)(struct device *);
   [88] void (*shutdown)(struct device *);
   [96] int (*online)(struct device *);
  [104] int (*offline)(struct device *);
  [112] int (*suspend)(struct device *, pm_message_t);
  [120] int (*resume)(struct device *);
  [128] int (*num_vf)(struct device *);
  [136] int (*dma_configure)(struct device *);
  [144] const struct dev_pm_ops *pm;
  [152] const struct iommu_ops *iommu_ops;
  [160] struct subsys_private *p;
  [168] struct lock_class_key lock_key;
  [168] bool need_parent_lock;
}
SIZE: 176

Now from vmcore, dev_groups should not be NULL. We will check why from the ice source code.
 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

mounter625

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值