This vmcore analysis is interesting since it is not only based on vmcore, but also the source code and x86_64 registers.
And we also need Assembly language to understand it. I make this record to remember this.
If something is wrong, please
1. crash7lates> bt
PID: 31556 TASK: ffff880f823caa00 CPU: 1 COMMAND: "cellsrv"
#0 [ffff880f823db850] machine_kexec at ffffffff8105d93c
#1 [ffff880f823db8b0] crash_kexec at ffffffff811103b3
#2 [ffff880f823db980] oops_end at ffffffff8101a788
#3 [ffff880f823db9b0] no_context at ffffffff8106b9cf
#4 [ffff880f823dba20] __bad_area_nosemaphore at ffffffff8106bc9d
#5 [ffff880f823dba70] bad_area at ffffffff8106be97
#6 [ffff880f823dbaa0] __do_page_fault at ffffffff8106c71e
#7 [ffff880f823dbb00] do_page_fault at ffffffff8106c81f
#8 [ffff880f823dbb40] page_fault at ffffffff816b5a9f
[exception RIP: rds_ib_inc_copy_to_user+104]
RIP: ffffffffa04607b8 RSP: ffff880f823dbbf8 RFLAGS: 00010287
RAX: 0000000000000340 RBX: 0000000000001000 RCX: 0000000000004000
RDX: 0000000000001000 RSI: ffff88176cea2000 RDI: ffff8817d291f520
RBP: ffff880f823dbc48 R8: 0000000000001340 R9: 0000000000001000
R10: 0000000000001200 R11: ffff880f823dc000 R12: ffff880f823dbed0
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000001000
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#9 [ffff880f823dbc50] rds_recvmsg at ffffffffa041d837 [rds]
#10 [ffff880f823dbd10] sock_recvmsg at ffffffff815b3851
#11 [ffff880f823dbd40] ___sys_recvmsg at ffffffff815b48a6
#12 [ffff880f823dbeb0] __sys_recvmsg at ffffffff815b53b2
#13 [ffff880f823dbf40] sys_recvmsg at ffffffff815b5402
#14 [ffff880f823dbf50] system_call_fastpath at ffffffff816b031a
RIP: 00007fde653fdd0d RSP: 00007fda3b5eb500 RFLAGS: 00000293
RAX: ffffffffffffffda RBX: 00007fda3b5eb928 RCX: 00007fde653fdd0d
RDX: 0000000000000000 RSI: 00007fda3b5eb5d0 RDI: 0000000000000251
RBP: 00007fda3b5eb650 R8: 00007fda3b5eba10 R9: 0000000000000005
R10: 0000000000000000 R11: 0000000000000293 R12: 000060023360b688
R13: 000060023360b468 R14: 000060023360b468 R15: 00007fda3b5ebb18
ORIG_RAX: 000000000000002f CS: 0033 SS: 002b
2. crash7lates> dis -l rds_ib_inc_copy_to_user+104
net/rds/ib_recv.c: 862
0xffffffffa04607b8 <rds_ib_inc_copy_to_user+104>: mov 0xc(%r13),%r8d
3.crash7lates> whatis rds_ib_inc_copy_to_user
int rds_ib_inc_copy_to_user(struct rds_incoming *, struct iov_iter *);
4.
844 int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
845 {
846 struct rds_ib_connection *ic = inc->i_conn->c_transport_data;
847 struct rds_ib_incoming *ibinc;
848 struct rds_page_frag *frag;
849 struct scatterlist *sg;
850 unsigned long to_copy;
851 unsigned long frag_off = 0;
852 int copied = 0;
853 int ret;
854 u32 len;
855
856 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
857 frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
858 len = be32_to_cpu(inc->i_hdr.h_len);
859 sg = frag->f_sg;
860
861 while (iov_iter_count(to) && copied < len) {
862 to_copy = min_t(unsigned long, iov_iter_count(to),
863 sg->length - frag_off);
864 to_copy = min_t(unsigned long, to_copy, len - copied);
865
866 /* XXX needs + offset for multiple recvs per page */
867 rds_stats_add(s_copy_to_user, to_copy);
868 ret = copy_page_to_iter(sg_page(sg),
869 sg->offset + frag_off,
870 to_copy,
871 to);
872 if (ret != to_copy)
873 return -EFAULT;
874
875 frag_off += to_copy;
876 copied += to_copy;
877
878 if (frag_off == sg->length) {
879 frag_off = 0;
880 sg = sg_next(sg);
881 }
882
883 if (copied % ic->i_frag_sz == 0) {
884 frag = list_entry(frag->f_item.next,
885 struct rds_page_frag, f_item);
886 frag_off = 0;
887 sg = frag->f_sg;
888 }
889
890 }
891
892 return copied;
893 }
5.crash7lates> dis -lr rds_ib_inc_copy_to_user+104
net/rds/ib_recv.c: 845
0xffffffffa0460750 <rds_ib_inc_copy_to_user>: push %rbp
0xffffffffa0460751 <rds_ib_inc_copy_to_user+1>: mov %rsp,%rbp
0xffffffffa0460754 <rds_ib_inc_copy_to_user+4>: push %r15
0xffffffffa0460756 <rds_ib_inc_copy_to_user+6>: push %r14
0xffffffffa0460758 <rds_ib_inc_copy_to_user+8>: push %r13
0xffffffffa046075a <rds_ib_inc_copy_to_user+10>: push %r12
0xffffffffa046075c <rds_ib_inc_copy_to_user+12>: push %rbx
0xffffffffa046075d <rds_ib_inc_copy_to_user+13>: sub $0x28,%rsp
0xffffffffa0460761 <rds_ib_inc_copy_to_user+17>: nopl 0x0(%rax,%rax,1)
net/rds/ib_recv.c: 846
0xffffffffa0460766 <rds_ib_inc_copy_to_user+22>: mov 0x18(%rdi),%rax
include/uapi/linux/swab.h: 60
0xffffffffa046076a <rds_ib_inc_copy_to_user+26>: mov 0x30(%rdi),%ebx
net/rds/ib_recv.c: 852
0xffffffffa046076d <rds_ib_inc_copy_to_user+29>: xor %r15d,%r15d
net/rds/ib_recv.c: 845
0xffffffffa0460770 <rds_ib_inc_copy_to_user+32>: mov %rsi,%r12
net/rds/ib_recv.c: 851
0xffffffffa0460773 <rds_ib_inc_copy_to_user+35>: xor %r14d,%r14d
net/rds/ib_recv.c: 846
0xffffffffa0460776 <rds_ib_inc_copy_to_user+38>: mov 0x98(%rax),%rax
include/uapi/linux/swab.h: 60
0xffffffffa046077d <rds_ib_inc_copy_to_user+45>: bswap %ebx
0xffffffffa046077f <rds_ib_inc_copy_to_user+47>: mov %ebx,-0x34(%rbp)
net/rds/ib_recv.c: 846
0xffffffffa0460782 <rds_ib_inc_copy_to_user+50>: mov %rax,-0x50(%rbp)
net/rds/ib_recv.c: 857
0xffffffffa0460786 <rds_ib_inc_copy_to_user+54>: mov -0x20(%rdi),%rax
0xffffffffa046078a <rds_ib_inc_copy_to_user+58>: mov %rax,-0x48(%rbp)
net/rds/ib_recv.c: 859
0xffffffffa046078e <rds_ib_inc_copy_to_user+62>: lea 0x20(%rax),%r13
net/rds/ib_recv.c: 867
0xffffffffa0460792 <rds_ib_inc_copy_to_user+66>: mov $0x19900,%rax
0xffffffffa0460799 <rds_ib_inc_copy_to_user+73>: mov %rax,-0x40(%rbp)
0xffffffffa046079d <rds_ib_inc_copy_to_user+77>: nopl (%rax)
0xffffffffa04607a0 <rds_ib_inc_copy_to_user+80>: mov 0x10(%r12),%rax
net/rds/ib_recv.c: 861
0xffffffffa04607a5 <rds_ib_inc_copy_to_user+85>: test %rax,%rax
0xffffffffa04607a8 <rds_ib_inc_copy_to_user+88>: je 0xffffffffa046085c <rds_ib_inc_copy_to_user+268>
0xffffffffa04607ae <rds_ib_inc_copy_to_user+94>: cmp -0x34(%rbp),%r15d
0xffffffffa04607b2 <rds_ib_inc_copy_to_user+98>: jae 0xffffffffa046085c <rds_ib_inc_copy_to_user+268>
net/rds/ib_recv.c: 862
0xffffffffa04607b8 <rds_ib_inc_copy_to_user+104>: mov 0xc(%r13),%r8d
From bt, "R13: 0000000000000000", r13 is NULL. So this bug occurred.
Now let us check r13.
rdi is struct rds_incoming
struct rds_incoming {
atomic_t i_refcount;
struct list_head i_item;
struct rds_connection *i_conn;
struct rds_header i_hdr;
unsigned long i_rx_jiffies;
__be32 i_saddr;
struct rds_connection *i_oconn;
struct sk_buff *i_skb;
rds_rdma_cookie_t i_rdma_cookie;
struct timeval i_rx_tstamp;
u64 i_rx_lat_trace[4];
}
Now "-0x20(%rdi),%rax" is struct rds_ib_incoming.
struct rds_ib_incoming -o
struct rds_ib_incoming {
[0] struct list_head ii_frags;
[16] struct list_head ii_cache_entry;
[32] struct rds_incoming ii_inc;
}
From "lea 0x20(%rax),%r13",
crash7lates> struct rds_page_frag -o
struct rds_page_frag {
[0] struct list_head f_item;
[16] struct list_head f_cache_entry;
[32] struct scatterlist f_sg[4];
}
So r13 is struct scatterlist. That is, sg is NULL.
Now let check why sg is NULL.
crash7lates> dis -l rds_ib_inc_copy_to_user
net/rds/ib_recv.c: 845
0xffffffffa0460750 <rds_ib_inc_copy_to_user>: push %rbp
0xffffffffa0460751 <rds_ib_inc_copy_to_user+1>: mov %rsp,%rbp
0xffffffffa0460754 <rds_ib_inc_copy_to_user+4>: push %r15
0xffffffffa0460756 <rds_ib_inc_copy_to_user+6>: push %r14
0xffffffffa0460758 <rds_ib_inc_copy_to_user+8>: push %r13
0xffffffffa046075a <rds_ib_inc_copy_to_user+10>: push %r12
0xffffffffa046075c <rds_ib_inc_copy_to_user+12>: push %rbx
0xffffffffa046075d <rds_ib_inc_copy_to_user+13>: sub $0x28,%rsp
0xffffffffa0460761 <rds_ib_inc_copy_to_user+17>: nopl 0x0(%rax,%rax,1)
net/rds/ib_recv.c: 846
0xffffffffa0460766 <rds_ib_inc_copy_to_user+22>: mov 0x18(%rdi),%rax
include/uapi/linux/swab.h: 60
0xffffffffa046076a <rds_ib_inc_copy_to_user+26>: mov 0x30(%rdi),%ebx
net/rds/ib_recv.c: 852
0xffffffffa046076d <rds_ib_inc_copy_to_user+29>: xor %r15d,%r15d
net/rds/ib_recv.c: 845
0xffffffffa0460770 <rds_ib_inc_copy_to_user+32>: mov %rsi,%r12
net/rds/ib_recv.c: 851
0xffffffffa0460773 <rds_ib_inc_copy_to_user+35>: xor %r14d,%r14d
net/rds/ib_recv.c: 846
0xffffffffa0460776 <rds_ib_inc_copy_to_user+38>: mov 0x98(%rax),%rax
include/uapi/linux/swab.h: 60
0xffffffffa046077d <rds_ib_inc_copy_to_user+45>: bswap %ebx
0xffffffffa046077f <rds_ib_inc_copy_to_user+47>: mov %ebx,-0x34(%rbp)
net/rds/ib_recv.c: 846
0xffffffffa0460782 <rds_ib_inc_copy_to_user+50>: mov %rax,-0x50(%rbp)
net/rds/ib_recv.c: 857
0xffffffffa0460786 <rds_ib_inc_copy_to_user+54>: mov -0x20(%rdi),%rax
0xffffffffa046078a <rds_ib_inc_copy_to_user+58>: mov %rax,-0x48(%rbp)
net/rds/ib_recv.c: 859
0xffffffffa046078e <rds_ib_inc_copy_to_user+62>: lea 0x20(%rax),%r13
net/rds/ib_recv.c: 867
0xffffffffa0460792 <rds_ib_inc_copy_to_user+66>: mov $0x19900,%rax
0xffffffffa0460799 <rds_ib_inc_copy_to_user+73>: mov %rax,-0x40(%rbp)
0xffffffffa046079d <rds_ib_inc_copy_to_user+77>: nopl (%rax)
0xffffffffa04607a0 <rds_ib_inc_copy_to_user+80>: mov 0x10(%r12),%rax
net/rds/ib_recv.c: 861
0xffffffffa04607a5 <rds_ib_inc_copy_to_user+85>: test %rax,%rax
0xffffffffa04607a8 <rds_ib_inc_copy_to_user+88>: je 0xffffffffa046085c <rds_ib_inc_copy_to_user+268>
0xffffffffa04607ae <rds_ib_inc_copy_to_user+94>: cmp -0x34(%rbp),%r15d
0xffffffffa04607b2 <rds_ib_inc_copy_to_user+98>: jae 0xffffffffa046085c <rds_ib_inc_copy_to_user+268>
net/rds/ib_recv.c: 862
0xffffffffa04607b8 <rds_ib_inc_copy_to_user+104>: mov 0xc(%r13),%r8d
0xffffffffa04607bc <rds_ib_inc_copy_to_user+108>: sub %r14,%r8
0xffffffffa04607bf <rds_ib_inc_copy_to_user+111>: cmp %rax,%r8
0xffffffffa04607c2 <rds_ib_inc_copy_to_user+114>: cmovbe %r8,%rax
net/rds/ib_recv.c: 864
0xffffffffa04607c6 <rds_ib_inc_copy_to_user+118>: mov -0x34(%rbp),%r8d
0xffffffffa04607ca <rds_ib_inc_copy_to_user+122>: sub %r15d,%r8d
0xffffffffa04607cd <rds_ib_inc_copy_to_user+125>: cmp %r8,%rax
0xffffffffa04607d0 <rds_ib_inc_copy_to_user+128>: mov %r8,%rbx
0xffffffffa04607d3 <rds_ib_inc_copy_to_user+131>: cmovbe %rax,%rbx
net/rds/ib_recv.c: 867
0xffffffffa04607d7 <rds_ib_inc_copy_to_user+135>: mov %gs:0x5fbac95a(%rip),%eax # 0xd138
0xffffffffa04607de <rds_ib_inc_copy_to_user+142>: cltq
0xffffffffa04607e0 <rds_ib_inc_copy_to_user+144>: mov -0x40(%rbp),%rdx
0xffffffffa04607e4 <rds_ib_inc_copy_to_user+148>: add -0x7e437800(,%rax,8),%rdx
0xffffffffa04607ec <rds_ib_inc_copy_to_user+156>: add %rbx,0xd8(%rdx)
net/rds/ib_recv.c: 868
0xffffffffa04607f3 <rds_ib_inc_copy_to_user+163>: mov 0x8(%r13),%esi
include/linux/scatterlist.h: 101
0xffffffffa04607f7 <rds_ib_inc_copy_to_user+167>: mov 0x0(%r13),%rdi
net/rds/ib_recv.c: 868
0xffffffffa04607fb <rds_ib_inc_copy_to_user+171>: mov %r12,%rcx
0xffffffffa04607fe <rds_ib_inc_copy_to_user+174>: mov %rbx,%rdx
include/linux/scatterlist.h: 101
0xffffffffa0460801 <rds_ib_inc_copy_to_user+177>: and $0xfffffffffffffffc,%rdi
net/rds/ib_recv.c: 868
0xffffffffa0460805 <rds_ib_inc_copy_to_user+181>: add %r14,%rsi
0xffffffffa0460808 <rds_ib_inc_copy_to_user+184>: callq 0xffffffff81333000 <copy_page_to_iter>
net/rds/ib_recv.c: 872
0xffffffffa046080d <rds_ib_inc_copy_to_user+189>: cltq
0xffffffffa046080f <rds_ib_inc_copy_to_user+191>: cmp %rax,%rbx
0xffffffffa0460812 <rds_ib_inc_copy_to_user+194>: jne 0xffffffffa0460880 <rds_ib_inc_copy_to_user+304>
net/rds/ib_recv.c: 878
0xffffffffa0460814 <rds_ib_inc_copy_to_user+196>: mov 0xc(%r13),%eax
net/rds/ib_recv.c: 875
0xffffffffa0460818 <rds_ib_inc_copy_to_user+200>: add %rbx,%r14
net/rds/ib_recv.c: 876
0xffffffffa046081b <rds_ib_inc_copy_to_user+203>: add %ebx,%r15d
net/rds/ib_recv.c: 878
0xffffffffa046081e <rds_ib_inc_copy_to_user+206>: cmp %r14,%rax
0xffffffffa0460821 <rds_ib_inc_copy_to_user+209>: je 0xffffffffa0460870 <rds_ib_inc_copy_to_user+288>
net/rds/ib_recv.c: 883
0xffffffffa0460823 <rds_ib_inc_copy_to_user+211>: mov -0x50(%rbp),%rax
0xffffffffa0460827 <rds_ib_inc_copy_to_user+215>: movzwl 0x123e(%rax),%ecx
0xffffffffa046082e <rds_ib_inc_copy_to_user+222>: mov %r15d,%eax
0xffffffffa0460831 <rds_ib_inc_copy_to_user+225>: cltd
0xffffffffa0460832 <rds_ib_inc_copy_to_user+226>: idiv %ecx
0xffffffffa0460834 <rds_ib_inc_copy_to_user+228>: test %edx,%edx
0xffffffffa0460836 <rds_ib_inc_copy_to_user+230>: jne 0xffffffffa04607a0 <rds_ib_inc_copy_to_user+80>
net/rds/ib_recv.c: 884
0xffffffffa046083c <rds_ib_inc_copy_to_user+236>: mov -0x48(%rbp),%rax
net/rds/ib_recv.c: 886
0xffffffffa0460840 <rds_ib_inc_copy_to_user+240>: xor %r14d,%r14d
net/rds/ib_recv.c: 884
0xffffffffa0460843 <rds_ib_inc_copy_to_user+243>: mov (%rax),%rax
0xffffffffa0460846 <rds_ib_inc_copy_to_user+246>: mov %rax,-0x48(%rbp)
net/rds/ib_recv.c: 887
0xffffffffa046084a <rds_ib_inc_copy_to_user+250>: lea 0x20(%rax),%r13
0xffffffffa046084e <rds_ib_inc_copy_to_user+254>: mov 0x10(%r12),%rax
net/rds/ib_recv.c: 861
0xffffffffa0460853 <rds_ib_inc_copy_to_user+259>: test %rax,%rax
0xffffffffa0460856 <rds_ib_inc_copy_to_user+262>: jne 0xffffffffa04607ae <rds_ib_inc_copy_to_user+94>
net/rds/ib_recv.c: 893
0xffffffffa046085c <rds_ib_inc_copy_to_user+268>: add $0x28,%rsp
0xffffffffa0460860 <rds_ib_inc_copy_to_user+272>: mov %r15d,%eax
0xffffffffa0460863 <rds_ib_inc_copy_to_user+275>: pop %rbx
0xffffffffa0460864 <rds_ib_inc_copy_to_user+276>: pop %r12
0xffffffffa0460866 <rds_ib_inc_copy_to_user+278>: pop %r13
0xffffffffa0460868 <rds_ib_inc_copy_to_user+280>: pop %r14
0xffffffffa046086a <rds_ib_inc_copy_to_user+282>: pop %r15
0xffffffffa046086c <rds_ib_inc_copy_to_user+284>: pop %rbp
0xffffffffa046086d <rds_ib_inc_copy_to_user+285>: retq
0xffffffffa046086e <rds_ib_inc_copy_to_user+286>: xchg %ax,%ax
net/rds/ib_recv.c: 880
0xffffffffa0460870 <rds_ib_inc_copy_to_user+288>: mov %r13,%rdi
net/rds/ib_recv.c: 879
0xffffffffa0460873 <rds_ib_inc_copy_to_user+291>: xor %r14d,%r14d
net/rds/ib_recv.c: 880
0xffffffffa0460876 <rds_ib_inc_copy_to_user+294>: callq 0xffffffff8132fab0 <sg_next>
0xffffffffa046087b <rds_ib_inc_copy_to_user+299>: mov %rax,%r13
0xffffffffa046087e <rds_ib_inc_copy_to_user+302>: jmp 0xffffffffa0460823 <rds_ib_inc_copy_to_user+211>
net/rds/ib_recv.c: 893
0xffffffffa0460880 <rds_ib_inc_copy_to_user+304>: add $0x28,%rsp
net/rds/ib_recv.c: 873
0xffffffffa0460884 <rds_ib_inc_copy_to_user+308>: mov $0xfffffff2,%eax
net/rds/ib_recv.c: 893
0xffffffffa0460889 <rds_ib_inc_copy_to_user+313>: pop %rbx
0xffffffffa046088a <rds_ib_inc_copy_to_user+314>: pop %r12
0xffffffffa046088c <rds_ib_inc_copy_to_user+316>: pop %r13
0xffffffffa046088e <rds_ib_inc_copy_to_user+318>: pop %r14
0xffffffffa0460890 <rds_ib_inc_copy_to_user+320>: pop %r15
0xffffffffa0460892 <rds_ib_inc_copy_to_user+322>: pop %rbp
0xffffffffa0460893 <rds_ib_inc_copy_to_user+323>: retq
From "net/rds/ib_recv.c: 846
0xffffffffa0460782 <rds_ib_inc_copy_to_user+50>: mov %rax,-0x50(%rbp)"
and "net/rds/ib_recv.c: 883
0xffffffffa0460823 <rds_ib_inc_copy_to_user+211>: mov -0x50(%rbp),%rax
0xffffffffa0460827 <rds_ib_inc_copy_to_user+215>: movzwl 0x123e(%rax),%ecx",
rcx should be "ic->i_frag_sz".
crash7lates> eval 0x0000000000004000
hexadecimal: 4000 (16KB)
decimal: 16384
Now it is 16K.
From this "net/rds/ib_recv.c: 876
0xffffffffa046081b <rds_ib_inc_copy_to_user+203>: add %ebx,%r15d"
Now it is
crash7lates> eval 0x0000000000001000
hexadecimal: 1000 (4KB)
decimal: 4096
883 if (copied % ic->i_frag_sz == 0) {
884 frag = list_entry(frag->f_item.next,
885 struct rds_page_frag, f_item);
886 frag_off = 0;
887 sg = frag->f_sg;
888 }
if (copied % ic->i_frag_sz == 0) is false.
So sg is not set. Now it is NULL.