ubuntu18.04:记一次由引用计数引发的crash

[    0.000000] Linux version 4.18.0-15-generic (buildd@lcy01-amd64-029) (gcc version 7.3.0 (Ubuntu 7.3.0-16ubuntu3)) #16~18.04.1-Ubuntu SMP Thu Feb 7 14:06:04 UTC 2019 (Ubuntu 4.18.0-15.16~18.04.1-generic 4.18.20)
[  122.189976] IPv4: Attempt to release TCP socket in state 10 00000000b898966d
[  125.157205] ------------[ cut here ]------------
[  125.157211] refcount_t overflow at inet_reqsk_alloc+0x115/0x130 in ksoftirqd/0[9], uid/euid: 0/0
[  125.157214] WARNING: CPU: 0 PID: 9 at /build/linux-hwe-9KJ07q/linux-hwe-4.18.0/kernel/panic.c:648 refcount_error_report+0x9c/0xac
[  125.157215] Kernel panic - not syncing: panic_on_warn set ...
               
[  125.157328] CPU: 0 PID: 9 Comm: ksoftirqd/0 Kdump: loaded Tainted: G           OE     4.18.0-15-generic #16~18.04.1-Ubuntu
[  125.157397] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015
[  125.157474] Call Trace:
[  125.157509]  dump_stack+0x63/0x85
[  125.157539]  panic+0xe4/0x244
[  125.157567]  ? refcount_error_report+0x9c/0xac
[  125.157603]  __warn+0x107/0x110
[  125.157632]  ? refcount_error_report+0x9c/0xac
[  125.157668]  report_bug+0xaa/0x120
[  125.157698]  fixup_bug.part.9+0x1c/0x40
[  125.157731]  do_error_trap+0x128/0x160
[  125.157763]  ? down_trylock+0x2e/0x40
[  125.157795]  ? vprintk_emit+0xec/0x290
[  125.157827]  do_invalid_op+0x20/0x30
[  125.157858]  invalid_op+0x14/0x20
[  125.157888] RIP: 0010:refcount_error_report+0x9c/0xac
[  125.157926] Code: 49 8b 94 24 80 00 00 00 41 56 49 8d 8d 50 0a 00 00 45 8b 85 a8 08 00 00 41 89 c1 48 89 de 48 c7 c7 50 d2 6d 86 e8 e4 f9 ff ff <0f> 0b 58 48 8d 65 e0 5b 41 5c 41 5d 41 5e 5d c3 0f 1f 44 00 00 55 
[  125.158066] RSP: 0018:ffffab89c06975b0 EFLAGS: 00010286
[  125.158106] RAX: 0000000000000000 RBX: ffffffff866ed740 RCX: 0000000000000006
[  125.158155] RDX: 0000000000000007 RSI: 0000000000000082 RDI: ffff8b22fac164b0
[  125.158205] RBP: ffffab89c06975d8 R08: 00000000000006a2 R09: 0000000000000004
[  125.158254] R10: 0000000000000000 R11: 0000000000000001 R12: ffffab89c0697738
[  125.158303] R13: ffff8b23405e4500 R14: 0000000000000000 R15: ffffab89c0697738
[  125.158355]  ex_handler_refcount+0x52/0x80
[  125.158389]  fixup_exception+0x3a/0x50
[  125.158420]  do_trap+0x8a/0x140
[  125.158449]  do_error_trap+0xba/0x160
[  125.158481]  ? csum_partial_copy_generic+0x1f6c/0x2b30
[  125.158521]  do_invalid_op+0x20/0x30
[  125.158552]  invalid_op+0x14/0x20
[  125.158581] RIP: 0010:inet_reqsk_alloc+0x115/0x130
[  125.158617] Code: 5d c3 85 c0 74 29 89 c2 8d 48 01 c1 e8 1f 81 fa ff ff ff 7f 41 0f 94 c0 41 08 c0 75 08 39 d1 0f 8d 5a ff ff ff e9 00 7a 0e 00 <e9> 60 ff ff ff 49 8b 7c 24 08 e8 1c 5e 96 ff 31 f6 eb bb 0f 1f 84 
[  125.158753] RSP: 0018:ffffab89c06977e8 EFLAGS: 00010a12
[  125.158792] RAX: ffff8b22efd1e730 RBX: ffff8b22fa7e3180 RCX: ffff8b22fa7e3200
[  125.158841] RDX: 00000000c0000000 RSI: ffff8b22efd1e730 RDI: ffff8b22fa7e3200
[  125.158889] RBP: ffffab89c0697800 R08: ffff8b22fac2a5d0 R09: 0000000000000000
[  125.158938] R10: ffffab89c0697940 R11: 0000000000000000 R12: ffffffff86c90a60
[  125.158986] R13: 0000000000000001 R14: ffff8b2275466000 R15: ffff8b22ed373b62
[  125.159036]  ? inet_reqsk_alloc+0x25/0x130
[  125.159070]  tcp_conn_request+0x1a6/0xc60
[  125.159103]  ? do_invalid_op+0x20/0x30
[  125.159134]  ? invalid_op+0x14/0x20
[  125.159165]  tcp_v4_conn_request+0x51/0x60
[  125.159198]  ? tcp_v4_conn_request+0x51/0x60
[  125.159233]  tcp_rcv_state_process+0x45a/0xe90
[  125.159268]  ? security_sock_rcv_skb+0x2f/0x50
[  125.159304]  tcp_v4_do_rcv+0x111/0x1c0
[  125.159336]  ? tcp_v4_do_rcv+0x111/0x1c0
[  125.159368]  tcp_v4_rcv+0xa77/0xae0
[  125.159399]  ip_local_deliver_finish+0x62/0x200
[  125.159435]  ip_local_deliver+0xdf/0xf0
[  125.159467]  ? ip_rcv_finish+0x420/0x420
[  125.159499]  ip_rcv_finish+0x126/0x420
[  125.159531]  ip_rcv+0x28f/0x360
[  125.159559]  ? inet_del_offload+0x40/0x40
[  125.159593]  __netif_receive_skb_core+0x48c/0xb70
[  125.159630]  ? ktime_get+0x43/0xa0
[  125.159708]  ? tcp4_gro_receive+0x137/0x1a0
[  125.159753]  __netif_receive_skb+0x18/0x60
[  125.159794]  ? __netif_receive_skb+0x18/0x60
[  125.159826]  netif_receive_skb_internal+0x45/0xe0
[  125.159857]  napi_gro_receive+0xc5/0xf0
[  125.159887]  e1000_clean_rx_irq+0x197/0x520 [e1000]
[  125.159921]  e1000_clean+0x27d/0x890 [e1000]
[  125.159953]  ? __switch_to_asm+0x34/0x70
[  125.159989]  net_rx_action+0x140/0x3a0
[  125.160025]  ? __switch_to_asm+0x34/0x70
[  125.160055]  __do_softirq+0xe4/0x2d4
[  125.160083]  run_ksoftirqd+0x2b/0x40
[  125.160109]  smpboot_thread_fn+0xfc/0x170
[  125.160138]  kthread+0x121/0x140
[  125.160162]  ? sort_range+0x30/0x30
[  125.160188]  ? kthread_create_worker_on_cpu+0x70/0x70
[  125.160221]  ret_from_fork+0x35/0x40

 

1.crash定位是refcount引用计数的问题;

2.查看源码,

i.4.18.0-15-generic内核__inet_lookup_listener实现

struct sock *__inet_lookup_listener(struct net *net,
				    struct inet_hashinfo *hashinfo,
				    struct sk_buff *skb, int doff,
				    const __be32 saddr, __be16 sport,
				    const __be32 daddr, const unsigned short hnum,
				    const int dif, const int sdif)
{
	unsigned int hash = inet_lhashfn(net, hnum);
	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
	bool exact_dif = inet_exact_dif_match(net, skb);
	struct inet_listen_hashbucket *ilb2;
	struct sock *sk, *result = NULL;
	int score, hiscore = 0;
	unsigned int hash2;
	u32 phash = 0;

	if (ilb->count <= 10 || !hashinfo->lhash2)
		goto port_lookup;

	/* Too many sk in the ilb bucket (which is hashed by port alone).
	 * Try lhash2 (which is hashed by port and addr) instead.
	 */

	hash2 = ipv4_portaddr_hash(net, daddr, hnum);
	ilb2 = inet_lhash2_bucket(hashinfo, hash2);
	if (ilb2->count > ilb->count)
		goto port_lookup;

	result = inet_lhash2_lookup(net, ilb2, skb, doff,
				    saddr, sport, daddr, hnum,
				    dif, sdif);
	if (result)
		return result;

	/* Lookup lhash2 with INADDR_ANY */

	hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
	ilb2 = inet_lhash2_bucket(hashinfo, hash2);
	if (ilb2->count > ilb->count)
		goto port_lookup;

	return inet_lhash2_lookup(net, ilb2, skb, doff,
				  saddr, sport, daddr, hnum,
				  dif, sdif);

port_lookup:
	sk_for_each_rcu(sk, &ilb->head) {
		score = compute_score(sk, net, hnum, daddr,
				      dif, sdif, exact_dif);
		if (score > hiscore) {
			if (sk->sk_reuseport) {
				phash = inet_ehashfn(net, daddr, hnum,
						     saddr, sport);
				result = reuseport_select_sock(sk, phash,
							       skb, doff);
				if (result)
					return result;
			}
			result = sk;
			hiscore = score;
		}
	}
	return result;
}

ii.3.10.0内核源码中__inet_lookup_listener实现

struct sock *__inet_lookup_listener(struct net *net,
				    struct inet_hashinfo *hashinfo,
				    const __be32 saddr, __be16 sport,
				    const __be32 daddr, const unsigned short hnum,
				    const int dif)
{
	struct sock *sk, *result;
	struct hlist_nulls_node *node;
	unsigned int hash = inet_lhashfn(net, hnum);
	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
	int score, hiscore, matches = 0, reuseport = 0;
	u32 phash = 0;

	rcu_read_lock();
begin:
	result = NULL;
	hiscore = 0;
	sk_nulls_for_each_rcu(sk, node, &ilb->head) {
		score = compute_score(sk, net, hnum, daddr, dif);
		if (score > hiscore) {
			result = sk;
			hiscore = score;
			reuseport = sk->sk_reuseport;
			if (reuseport) {
				phash = inet_ehashfn(net, daddr, hnum,
						     saddr, sport);
				matches = 1;
			}
		} else if (score == hiscore && reuseport) {
			matches++;
			if (((u64)phash * matches) >> 32 == 0)
				result = sk;
			phash = next_pseudo_random32(phash);
		}
	}
	/*
	 * if the nulls value we got at the end of this lookup is
	 * not the expected one, we must restart lookup.
	 * We probably met an item that was moved to another chain.
	 */
	if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
		goto begin;
	if (result) {
		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
			result = NULL;
		else if (unlikely(compute_score(result, net, hnum, daddr,
				  dif) < hiscore)) {
			sock_put(result);
			goto begin;
		}
	}
	rcu_read_unlock();
	return result;
}

 

3.分析原因:

3.10.0内核代码__inet_lookup_listener实现中查找到sock会进行引用加1,“if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))”,但是新内核代码中已经去掉了引用加.

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

danielliu861

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值