一个ixgbe空指针访问导致宿主机宕机的未知问题

1、在某个机器上出现机器宕机,通过kdump日志,初步看是因为ixgbe里有控制针访问

2、使用crash分析空指针访问原因

1)、安装kernel debug包(包含debug调试信息的vmlinux);

2)、使用crash打开vmcore( crash /usr/lib/debug/usr/lib/modules/3.10.0-327/vmlinux /home/vmcore ),先用dis命令看下RIP地址的汇编信息;从汇编指令看此时正在访问rbx寄存器;

3)、从bt里看下现场信息,发现rbx确实为空;而且从调用栈里看,此时正在访问ixgbe_xmit_frame_ring函数;

4)、对ixgbe_xmit_frame_ring做下反汇编,追踪下rbx的来源;从汇编里看出rbx是从rdx里赋值过来的,按x86处理器的约定,函数调用时,%rdi,%rsi,%rdx,%rcx,%r8,%r9分别用来传递第1、2、3、4、5、6个参数,因此这里的rdx表示的是ixgbe_xmit_frame_ring的第三个参数;

crash> dis -l ixgbe_xmit_frame_ring
0xffffffffc05cdd90 <ixgbe_xmit_frame_ring>:     nopl   0x0(%rax,%rax,1) [FTRACE NOP]
0xffffffffc05cdd95 <ixgbe_xmit_frame_ring+5>:   push   %rbp
0xffffffffc05cdd96 <ixgbe_xmit_frame_ring+6>:   mov    %rsp,%rbp
0xffffffffc05cdd99 <ixgbe_xmit_frame_ring+9>:   push   %r15
0xffffffffc05cdd9b <ixgbe_xmit_frame_ring+11>:  push   %r14
0xffffffffc05cdd9d <ixgbe_xmit_frame_ring+13>:  mov    %rsi,%r14
0xffffffffc05cdda0 <ixgbe_xmit_frame_ring+16>:  push   %r13
0xffffffffc05cdda2 <ixgbe_xmit_frame_ring+18>:  mov    %rdi,%r13
0xffffffffc05cdda5 <ixgbe_xmit_frame_ring+21>:  push   %r12
0xffffffffc05cdda7 <ixgbe_xmit_frame_ring+23>:  push   %rbx
0xffffffffc05cdda8 <ixgbe_xmit_frame_ring+24>:  mov    %rdx,%rbx  //rbx赋值的地方
0xffffffffc05cddab <ixgbe_xmit_frame_ring+27>:  sub    $0x40,%rsp
0xffffffffc05cddaf <ixgbe_xmit_frame_ring+31>:  movzwl 0x7e(%rdi),%r15d
0xffffffffc05cddb4 <ixgbe_xmit_frame_ring+36>:  movb   $0x0,-0x35(%rbp)
0xffffffffc05cddb8 <ixgbe_xmit_frame_ring+40>:  mov    %gs:0x28,%rax
0xffffffffc05cddc1 <ixgbe_xmit_frame_ring+49>:  mov    %rax,-0x30(%rbp)
0xffffffffc05cddc5 <ixgbe_xmit_frame_ring+53>:  xor    %eax,%eax
0xffffffffc05cddc7 <ixgbe_xmit_frame_ring+55>:  mov    0x68(%rdi),%eax
0xffffffffc05cddca <ixgbe_xmit_frame_ring+58>:  lea    0x3fff(%rax),%ecx
0xffffffffc05cddd0 <ixgbe_xmit_frame_ring+64>:  sub    0x6c(%rdi),%ecx
0xffffffffc05cddd3 <ixgbe_xmit_frame_ring+67>:  mov    0xdc(%rdi),%edi
0xffffffffc05cddd9 <ixgbe_xmit_frame_ring+73>:  add    0xe0(%r13),%rdi
0xffffffffc05cdde0 <ixgbe_xmit_frame_ring+80>:  shr    $0xe,%ecx
0xffffffffc05cdde3 <ixgbe_xmit_frame_ring+83>:  movzbl (%rdi),%r8d
0xffffffffc05cdde7 <ixgbe_xmit_frame_ring+87>:  test   %r8w,%r8w
0xffffffffc05cddeb <ixgbe_xmit_frame_ring+91>:  je     0xffffffffc05cde18 <ixgbe_xmit_frame_ring+136>
0xffffffffc05cdded <ixgbe_xmit_frame_ring+93>:  sub    $0x1,%r8d
0xffffffffc05cddf1 <ixgbe_xmit_frame_ring+97>:  xor    %eax,%eax
0xffffffffc05cddf3 <ixgbe_xmit_frame_ring+99>:  movzwl %r8w,%r8d
0xffffffffc05cddf7 <ixgbe_xmit_frame_ring+103>: add    $0x1,%r8
0xffffffffc05cddfb <ixgbe_xmit_frame_ring+107>: shl    $0x4,%r8
0xffffffffc05cddff <ixgbe_xmit_frame_ring+111>: nop
0xffffffffc05cde00 <ixgbe_xmit_frame_ring+112>: mov    0x3c(%rdi,%rax,1),%esi
0xffffffffc05cde04 <ixgbe_xmit_frame_ring+116>: add    $0x10,%rax
0xffffffffc05cde08 <ixgbe_xmit_frame_ring+120>: lea    0x3fff(%rsi),%edx
0xffffffffc05cde0e <ixgbe_xmit_frame_ring+126>: shr    $0xe,%edx
0xffffffffc05cde11 <ixgbe_xmit_frame_ring+129>: add    %edx,%ecx
0xffffffffc05cde13 <ixgbe_xmit_frame_ring+131>: cmp    %r8,%rax
0xffffffffc05cde16 <ixgbe_xmit_frame_ring+134>: jne    0xffffffffc05cde00 <ixgbe_xmit_frame_ring+112>
0xffffffffc05cde18 <ixgbe_xmit_frame_ring+136>: movzwl 0x58(%rbx),%eax  //访问空指针的地方
0xffffffffc05cde1c <ixgbe_xmit_frame_ring+140>: movzwl 0x5a(%rbx),%esi
0xffffffffc05cde20 <ixgbe_xmit_frame_ring+144>: add    $0x3,%ecx
0xffffffffc05cde23 <ixgbe_xmit_frame_ring+147>: xor    %edx,%edx

5)、结合源码分析下函数ixgbe_xmit_frame_ring,不难分析出rbx表示的是tx_ring,由于tx_ring为空,函数在调用ixgbe_desc_unused时,访问tx_ring->next_to_clean时出现异常,next_to_clean正好位于tx_ring的0x58偏移处,与RIP信息一致;

netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
			  struct ixgbe_adapter *adapter,
			  struct ixgbe_ring *tx_ring)
{
	struct ixgbe_tx_buffer *first;
	int tso;
	u32 tx_flags = 0;
	unsigned short f;
	u16 count = TXD_USE_COUNT(skb_headlen(skb));
	__be16 protocol = skb->protocol;
	u8 hdr_len = 0;

	/*
	 * need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD,
	 *       + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD,
	 *       + 2 desc gap to keep tail from touching head,
	 *       + 1 desc for context descriptor,
	 * otherwise try next time
	 */
	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);

	if (ixgbe_maybe_stop_tx(tx_ring, count + 3)) {
		tx_ring->tx_stats.tx_busy++;
		return NETDEV_TX_BUSY;
	}
}

static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring)
{
	u16 ntc = ring->next_to_clean;
	u16 ntu = ring->next_to_use;

	return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
}

6)、再进一步看ixgbe_xmit_frame_ring的调用关系,会发现tx_ring是由adapter->tx_ring[skb->queue_mapping]得到的;

static netdev_tx_t __ixgbe_xmit_frame(struct sk_buff *skb,
				      struct net_device *netdev,
				      struct ixgbe_ring *ring)
{
	struct ixgbe_adapter *adapter = netdev_priv(netdev);
	struct ixgbe_ring *tx_ring;

	/*
	 * The minimum packet size for olinfo paylen is 17 so pad the skb
	 * in order to meet this minimum size requirement.
	 */
	if (skb_put_padto(skb, 17))
		return NETDEV_TX_OK;
        //ring入参始终为null,因此tx_ring为adater->tx_ring来赋值
	tx_ring = ring ? ring : adapter->tx_ring[skb->queue_mapping];

	return ixgbe_xmit_frame_ring(skb, adapter, tx_ring);
}

7)、接下来看下adapter->tx_ring[skb->queue_mapping]是否为空,首先得先找到skb->queue_mapping的值,由于skb是ixgbe_xmit_frame_ring的第一个参数,第一个参数保存在rdi里,因此先看下rdi描述的skb的信息;从以下的输出里可以看出skb->queue_mapping为0,因此tx_ring即为adapt->rx_ring[0]所表示的值;

crash> bt
PID: 19     TASK: ffff880169748fe0  CPU: 2   COMMAND: "ksoftirqd/2"
 #0 [ffff8801697578d8] machine_kexec at ffffffff8105c54b
 #1 [ffff880169757938] __crash_kexec at ffffffff81105b82
 #2 [ffff880169757a08] crash_kexec at ffffffff81105c70
 #3 [ffff880169757a20] oops_end at ffffffff816bb078
 #4 [ffff880169757a48] no_context at ffffffff816ab189
 #5 [ffff880169757a98] __bad_area_nosemaphore at ffffffff816ab21f
 #6 [ffff880169757ae0] bad_area_nosemaphore at ffffffff816ab389
 #7 [ffff880169757af0] __do_page_fault at ffffffff816bdf3e
 #8 [ffff880169757b50] do_page_fault at ffffffff816be0e5
 #9 [ffff880169757b80] page_fault at ffffffff816ba308
    [exception RIP: ixgbe_xmit_frame_ring+136]
    RIP: ffffffffc05cde18  RSP: ffff880169757c30  RFLAGS: 00010246
    RAX: 00000000000005ea  RBX: 0000000000000000  RCX: 0000000000000001
    RDX: 0000000000000000  RSI: ffff88203d4208c0  RDI: ffff88188fd90ec0
    RBP: ffff880169757c98   R8: 0000000000000000   R9: ffffffff8157d137
    R10: ffff88103fc99f40  R11: ffffea008062e900  R12: ffff88203d420000
    R13: ffff88132b78eb00  R14: ffff88203d4208c0  R15: 0000000000000008
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
#10 [ffff880169757ca0] ixgbe_xmit_frame at ffffffffc05cec2f [ixgbe]
#11 [ffff880169757cd0] dev_hard_start_xmit at ffffffff815927d1
#12 [ffff880169757d40] sch_direct_xmit at ffffffff815bd2ba
#13 [ffff880169757d90] __qdisc_run at ffffffff815bd470
#14 [ffff880169757dd8] net_tx_action at ffffffff81591cc8
#15 [ffff880169757e10] __do_softirq at ffffffff810916af
#16 [ffff880169757e80] run_ksoftirqd at ffffffff81091878
#17 [ffff880169757e98] smpboot_thread_fn at ffffffff810b9e0f
#18 [ffff880169757ec8] kthread at ffffffff810b16ff
#19 [ffff880169757f50] ret_from_fork at ffffffff816c2cd8
crash> struct sk_buff -x ffff88188fd90ec0
struct sk_buff {
  next = 0x0, 
  prev = 0x0, 
  {
    tstamp = {
      tv64 = 0x0
    }, 
    skb_mstamp = {
      {
        v64 = 0x0, 
        {
          stamp_us = 0x0, 
          stamp_jiffies = 0x0
        }
      }
    }
  }, 
  sk = 0x0, 
  dev = 0x100000000, 
  cb = "\000\000\000\000\000\000\000\000\000\317\025\\\000\352\377\377*\b\000\000\337\003\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000", 
  _skb_refdst = 0x0, 
  sp = 0x0, 
  len = 0x0, 
  data_len = 0x0, 
  mac_len = 0x0, 
  hdr_len = 0x0, 
  {
    csum = 0x0, 
    {
      csum_start = 0x0, 
      csum_offset = 0x0
    }
  }, 
  priority = 0x0, 
  ignore_df = 0x0, 
  cloned = 0x0, 
  ip_summed = 0x0, 
  nohdr = 0x0, 
  nfctinfo = 0x0, 
  pkt_type = 0x0, 
  fclone = 0x0, 
  ipvs_property = 0x0, 
  peeked = 0x0, 
  nf_trace = 0x0, 
  protocol = 0x0, 
  destructor = 0x0, 
  nfct = 0x0, 
  nf_bridge = 0x0, 
  headers_start = 0xffff88188fd90f58, 
  skb_iif = 0x0, 
  {
    hash = 0x0, 
    __UNIQUE_ID_rh_kabi_hide35 = {
      rxhash = 0x0
    }, 
    {<No data fields>}
  }, 
  vlan_proto = 0x0, 
  vlan_tci = 0x0, 
  tc_index = 0x0, 
  tc_verd = 0x0, 
  queue_mapping = 0x0, 
  ndisc_nodetype = 0x0, 
  pfmemalloc = 0x0, 
  ooo_okay = 0x0, 

8)、进一步分析adapter的值,adapter作为ixgbe_xmit_frame_ring的第二个参数保存在rsi里,因此分析下rsi表示的adapter的信息(一开始会报ixgbe_adapter变量为定义,需要先加载ixgbe模块的调试信息),从以下的输出信息里可以看到adapter->rx_ring[0]为0xffff88088a4f7400,并不会为空;

crash> struct ixgbe_adapter -x  ffff88203d4208c0
struct: invalid data structure reference: ixgbe_adapter
crash> mod -s ixgbe /usr/lib/debug/usr/lib/modules/3.10.0-327/kernel/drivers/net/ethernet/intel/ixgbe/ixgbe.ko.debug
     MODULE       NAME                       SIZE  OBJECT FILE
ffffffffc0600000  ixgbe                    301698  /usr/lib/debug/usr/lib/modules/3.10.0-327/kernel/drivers/net/ethernet/intel/ixgbe/ixgbe.ko.debug 
crash> 
crash> 
crash> 
crash> 
crash> struct ixgbe_adapter -x  ffff88203d4208c0
struct ixgbe_adapter {
  active_vlans = {0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, 
  netdev = 0xffff88203d420000, 
  pdev = 0xffff8810e923d000, 
  state = 0x140, 
  flags = 0x8050208, 
  flags2 = 0x1, 
  num_tx_queues = 0x8, 
  tx_itr_setting = 0x1, 
  tx_work_limit = 0x100, 
  num_rx_queues = 0x8, 
  rx_itr_setting = 0x1, 
  vxlan_port = 0x0, 
  geneve_port = 0x0, 
  tx_ring = {0xffff88088a4f7400, 0xffff88088a4f7c00, 0xffff880aac240400, 0xffff880234c3c400, 0xffff880234c3ac00, 0xffff880234c3e400, 0xffff880234c3fc00, 0xffff880234c3d400, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, 

3、总结

根据crash信息,结合源码分析,访问空指针是由于ixgbe_xmit_frame_ring的tx_ring参数为空,tx_ring是从adapter->tx_ring[skb->queue_mapping]里获取的,但是adapter->tx_ring[skb->queue_mapping]又不为空!怀疑可能是有一些并发问题,tx_ring一开始获取的确实是空的,但是获取完后adapter的值马上被重新赋值了(从当时的日志看,网卡正在重新初始化过程)。

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值