1、在某个机器上出现机器宕机,通过kdump日志,初步看是因为ixgbe里有控制针访问
2、使用crash分析空指针访问原因
1)、安装kernel debug包(包含debug调试信息的vmlinux);
2)、使用crash打开vmcore( crash /usr/lib/debug/usr/lib/modules/3.10.0-327/vmlinux /home/vmcore ),先用dis命令看下RIP地址的汇编信息;从汇编指令看此时正在访问rbx寄存器;
3)、从bt里看下现场信息,发现rbx确实为空;而且从调用栈里看,此时正在访问ixgbe_xmit_frame_ring函数;
4)、对ixgbe_xmit_frame_ring做下反汇编,追踪下rbx的来源;从汇编里看出rbx是从rdx里赋值过来的,按x86处理器的约定,函数调用时,%rdi,%rsi,%rdx,%rcx,%r8,%r9分别用来传递第1、2、3、4、5、6个参数,因此这里的rdx表示的是ixgbe_xmit_frame_ring的第三个参数;
crash> dis -l ixgbe_xmit_frame_ring
0xffffffffc05cdd90 <ixgbe_xmit_frame_ring>: nopl 0x0(%rax,%rax,1) [FTRACE NOP]
0xffffffffc05cdd95 <ixgbe_xmit_frame_ring+5>: push %rbp
0xffffffffc05cdd96 <ixgbe_xmit_frame_ring+6>: mov %rsp,%rbp
0xffffffffc05cdd99 <ixgbe_xmit_frame_ring+9>: push %r15
0xffffffffc05cdd9b <ixgbe_xmit_frame_ring+11>: push %r14
0xffffffffc05cdd9d <ixgbe_xmit_frame_ring+13>: mov %rsi,%r14
0xffffffffc05cdda0 <ixgbe_xmit_frame_ring+16>: push %r13
0xffffffffc05cdda2 <ixgbe_xmit_frame_ring+18>: mov %rdi,%r13
0xffffffffc05cdda5 <ixgbe_xmit_frame_ring+21>: push %r12
0xffffffffc05cdda7 <ixgbe_xmit_frame_ring+23>: push %rbx
0xffffffffc05cdda8 <ixgbe_xmit_frame_ring+24>: mov %rdx,%rbx //rbx赋值的地方
0xffffffffc05cddab <ixgbe_xmit_frame_ring+27>: sub $0x40,%rsp
0xffffffffc05cddaf <ixgbe_xmit_frame_ring+31>: movzwl 0x7e(%rdi),%r15d
0xffffffffc05cddb4 <ixgbe_xmit_frame_ring+36>: movb $0x0,-0x35(%rbp)
0xffffffffc05cddb8 <ixgbe_xmit_frame_ring+40>: mov %gs:0x28,%rax
0xffffffffc05cddc1 <ixgbe_xmit_frame_ring+49>: mov %rax,-0x30(%rbp)
0xffffffffc05cddc5 <ixgbe_xmit_frame_ring+53>: xor %eax,%eax
0xffffffffc05cddc7 <ixgbe_xmit_frame_ring+55>: mov 0x68(%rdi),%eax
0xffffffffc05cddca <ixgbe_xmit_frame_ring+58>: lea 0x3fff(%rax),%ecx
0xffffffffc05cddd0 <ixgbe_xmit_frame_ring+64>: sub 0x6c(%rdi),%ecx
0xffffffffc05cddd3 <ixgbe_xmit_frame_ring+67>: mov 0xdc(%rdi),%edi
0xffffffffc05cddd9 <ixgbe_xmit_frame_ring+73>: add 0xe0(%r13),%rdi
0xffffffffc05cdde0 <ixgbe_xmit_frame_ring+80>: shr $0xe,%ecx
0xffffffffc05cdde3 <ixgbe_xmit_frame_ring+83>: movzbl (%rdi),%r8d
0xffffffffc05cdde7 <ixgbe_xmit_frame_ring+87>: test %r8w,%r8w
0xffffffffc05cddeb <ixgbe_xmit_frame_ring+91>: je 0xffffffffc05cde18 <ixgbe_xmit_frame_ring+136>
0xffffffffc05cdded <ixgbe_xmit_frame_ring+93>: sub $0x1,%r8d
0xffffffffc05cddf1 <ixgbe_xmit_frame_ring+97>: xor %eax,%eax
0xffffffffc05cddf3 <ixgbe_xmit_frame_ring+99>: movzwl %r8w,%r8d
0xffffffffc05cddf7 <ixgbe_xmit_frame_ring+103>: add $0x1,%r8
0xffffffffc05cddfb <ixgbe_xmit_frame_ring+107>: shl $0x4,%r8
0xffffffffc05cddff <ixgbe_xmit_frame_ring+111>: nop
0xffffffffc05cde00 <ixgbe_xmit_frame_ring+112>: mov 0x3c(%rdi,%rax,1),%esi
0xffffffffc05cde04 <ixgbe_xmit_frame_ring+116>: add $0x10,%rax
0xffffffffc05cde08 <ixgbe_xmit_frame_ring+120>: lea 0x3fff(%rsi),%edx
0xffffffffc05cde0e <ixgbe_xmit_frame_ring+126>: shr $0xe,%edx
0xffffffffc05cde11 <ixgbe_xmit_frame_ring+129>: add %edx,%ecx
0xffffffffc05cde13 <ixgbe_xmit_frame_ring+131>: cmp %r8,%rax
0xffffffffc05cde16 <ixgbe_xmit_frame_ring+134>: jne 0xffffffffc05cde00 <ixgbe_xmit_frame_ring+112>
0xffffffffc05cde18 <ixgbe_xmit_frame_ring+136>: movzwl 0x58(%rbx),%eax //访问空指针的地方
0xffffffffc05cde1c <ixgbe_xmit_frame_ring+140>: movzwl 0x5a(%rbx),%esi
0xffffffffc05cde20 <ixgbe_xmit_frame_ring+144>: add $0x3,%ecx
0xffffffffc05cde23 <ixgbe_xmit_frame_ring+147>: xor %edx,%edx
5)、结合源码分析下函数ixgbe_xmit_frame_ring,不难分析出rbx表示的是tx_ring,由于tx_ring为空,函数在调用ixgbe_desc_unused时,访问tx_ring->next_to_clean时出现异常,next_to_clean正好位于tx_ring的0x58偏移处,与RIP信息一致;
netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
struct ixgbe_adapter *adapter,
struct ixgbe_ring *tx_ring)
{
struct ixgbe_tx_buffer *first;
int tso;
u32 tx_flags = 0;
unsigned short f;
u16 count = TXD_USE_COUNT(skb_headlen(skb));
__be16 protocol = skb->protocol;
u8 hdr_len = 0;
/*
* need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD,
* + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD,
* + 2 desc gap to keep tail from touching head,
* + 1 desc for context descriptor,
* otherwise try next time
*/
for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
if (ixgbe_maybe_stop_tx(tx_ring, count + 3)) {
tx_ring->tx_stats.tx_busy++;
return NETDEV_TX_BUSY;
}
}
static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring)
{
u16 ntc = ring->next_to_clean;
u16 ntu = ring->next_to_use;
return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
}
6)、再进一步看ixgbe_xmit_frame_ring的调用关系,会发现tx_ring是由adapter->tx_ring[skb->queue_mapping]得到的;
static netdev_tx_t __ixgbe_xmit_frame(struct sk_buff *skb,
struct net_device *netdev,
struct ixgbe_ring *ring)
{
struct ixgbe_adapter *adapter = netdev_priv(netdev);
struct ixgbe_ring *tx_ring;
/*
* The minimum packet size for olinfo paylen is 17 so pad the skb
* in order to meet this minimum size requirement.
*/
if (skb_put_padto(skb, 17))
return NETDEV_TX_OK;
//ring入参始终为null,因此tx_ring为adater->tx_ring来赋值
tx_ring = ring ? ring : adapter->tx_ring[skb->queue_mapping];
return ixgbe_xmit_frame_ring(skb, adapter, tx_ring);
}
7)、接下来看下adapter->tx_ring[skb->queue_mapping]是否为空,首先得先找到skb->queue_mapping的值,由于skb是ixgbe_xmit_frame_ring的第一个参数,第一个参数保存在rdi里,因此先看下rdi描述的skb的信息;从以下的输出里可以看出skb->queue_mapping为0,因此tx_ring即为adapt->rx_ring[0]所表示的值;
crash> bt
PID: 19 TASK: ffff880169748fe0 CPU: 2 COMMAND: "ksoftirqd/2"
#0 [ffff8801697578d8] machine_kexec at ffffffff8105c54b
#1 [ffff880169757938] __crash_kexec at ffffffff81105b82
#2 [ffff880169757a08] crash_kexec at ffffffff81105c70
#3 [ffff880169757a20] oops_end at ffffffff816bb078
#4 [ffff880169757a48] no_context at ffffffff816ab189
#5 [ffff880169757a98] __bad_area_nosemaphore at ffffffff816ab21f
#6 [ffff880169757ae0] bad_area_nosemaphore at ffffffff816ab389
#7 [ffff880169757af0] __do_page_fault at ffffffff816bdf3e
#8 [ffff880169757b50] do_page_fault at ffffffff816be0e5
#9 [ffff880169757b80] page_fault at ffffffff816ba308
[exception RIP: ixgbe_xmit_frame_ring+136]
RIP: ffffffffc05cde18 RSP: ffff880169757c30 RFLAGS: 00010246
RAX: 00000000000005ea RBX: 0000000000000000 RCX: 0000000000000001
RDX: 0000000000000000 RSI: ffff88203d4208c0 RDI: ffff88188fd90ec0
RBP: ffff880169757c98 R8: 0000000000000000 R9: ffffffff8157d137
R10: ffff88103fc99f40 R11: ffffea008062e900 R12: ffff88203d420000
R13: ffff88132b78eb00 R14: ffff88203d4208c0 R15: 0000000000000008
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#10 [ffff880169757ca0] ixgbe_xmit_frame at ffffffffc05cec2f [ixgbe]
#11 [ffff880169757cd0] dev_hard_start_xmit at ffffffff815927d1
#12 [ffff880169757d40] sch_direct_xmit at ffffffff815bd2ba
#13 [ffff880169757d90] __qdisc_run at ffffffff815bd470
#14 [ffff880169757dd8] net_tx_action at ffffffff81591cc8
#15 [ffff880169757e10] __do_softirq at ffffffff810916af
#16 [ffff880169757e80] run_ksoftirqd at ffffffff81091878
#17 [ffff880169757e98] smpboot_thread_fn at ffffffff810b9e0f
#18 [ffff880169757ec8] kthread at ffffffff810b16ff
#19 [ffff880169757f50] ret_from_fork at ffffffff816c2cd8
crash> struct sk_buff -x ffff88188fd90ec0
struct sk_buff {
next = 0x0,
prev = 0x0,
{
tstamp = {
tv64 = 0x0
},
skb_mstamp = {
{
v64 = 0x0,
{
stamp_us = 0x0,
stamp_jiffies = 0x0
}
}
}
},
sk = 0x0,
dev = 0x100000000,
cb = "\000\000\000\000\000\000\000\000\000\317\025\\\000\352\377\377*\b\000\000\337\003\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000",
_skb_refdst = 0x0,
sp = 0x0,
len = 0x0,
data_len = 0x0,
mac_len = 0x0,
hdr_len = 0x0,
{
csum = 0x0,
{
csum_start = 0x0,
csum_offset = 0x0
}
},
priority = 0x0,
ignore_df = 0x0,
cloned = 0x0,
ip_summed = 0x0,
nohdr = 0x0,
nfctinfo = 0x0,
pkt_type = 0x0,
fclone = 0x0,
ipvs_property = 0x0,
peeked = 0x0,
nf_trace = 0x0,
protocol = 0x0,
destructor = 0x0,
nfct = 0x0,
nf_bridge = 0x0,
headers_start = 0xffff88188fd90f58,
skb_iif = 0x0,
{
hash = 0x0,
__UNIQUE_ID_rh_kabi_hide35 = {
rxhash = 0x0
},
{<No data fields>}
},
vlan_proto = 0x0,
vlan_tci = 0x0,
tc_index = 0x0,
tc_verd = 0x0,
queue_mapping = 0x0,
ndisc_nodetype = 0x0,
pfmemalloc = 0x0,
ooo_okay = 0x0,
8)、进一步分析adapter的值,adapter作为ixgbe_xmit_frame_ring的第二个参数保存在rsi里,因此分析下rsi表示的adapter的信息(一开始会报ixgbe_adapter变量为定义,需要先加载ixgbe模块的调试信息),从以下的输出信息里可以看到adapter->rx_ring[0]为0xffff88088a4f7400,并不会为空;
crash> struct ixgbe_adapter -x ffff88203d4208c0
struct: invalid data structure reference: ixgbe_adapter
crash> mod -s ixgbe /usr/lib/debug/usr/lib/modules/3.10.0-327/kernel/drivers/net/ethernet/intel/ixgbe/ixgbe.ko.debug
MODULE NAME SIZE OBJECT FILE
ffffffffc0600000 ixgbe 301698 /usr/lib/debug/usr/lib/modules/3.10.0-327/kernel/drivers/net/ethernet/intel/ixgbe/ixgbe.ko.debug
crash>
crash>
crash>
crash>
crash> struct ixgbe_adapter -x ffff88203d4208c0
struct ixgbe_adapter {
active_vlans = {0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
netdev = 0xffff88203d420000,
pdev = 0xffff8810e923d000,
state = 0x140,
flags = 0x8050208,
flags2 = 0x1,
num_tx_queues = 0x8,
tx_itr_setting = 0x1,
tx_work_limit = 0x100,
num_rx_queues = 0x8,
rx_itr_setting = 0x1,
vxlan_port = 0x0,
geneve_port = 0x0,
tx_ring = {0xffff88088a4f7400, 0xffff88088a4f7c00, 0xffff880aac240400, 0xffff880234c3c400, 0xffff880234c3ac00, 0xffff880234c3e400, 0xffff880234c3fc00, 0xffff880234c3d400, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
3、总结
根据crash信息,结合源码分析,访问空指针是由于ixgbe_xmit_frame_ring的tx_ring参数为空,tx_ring是从adapter->tx_ring[skb->queue_mapping]里获取的,但是adapter->tx_ring[skb->queue_mapping]又不为空!怀疑可能是有一些并发问题,tx_ring一开始获取的确实是空的,但是获取完后adapter的值马上被重新赋值了(从当时的日志看,网卡正在重新初始化过程)。