邻居子系统之邻居项的回收


邻居项删除涉及邻居子系统的垃圾回收机制,所以这里单独分析其代码实现。

邻居项删除主要有如下三个原因:

  1. 邻居子系统感知到该邻居项不可达,将邻居项状态设置成了NUD_FAILED,这种邻居项会有异步垃圾回收机制回收;
  2. 该邻居项关联的L2地址发生了变化,但是它L3地址不变,这种邻居项需要先进入NUD_FAILED状态被回收,然后再创建新的;
  3. 邻居项长期未被使用,且内核需要使用内存了,会使用同步垃圾回收机制回收;

邻居项的销毁: neigh_destroy()

邻居项维持了一个引用计数,只有当引用计数归零的时候才会真正的执行销毁,引用计数的操作见neigh_hold()和neigh_release()。邻居项的销毁函数为neigh_destroy()。

// 调用该函数之前,邻居项肯定已经从邻居表的哈希表中删除了
void neigh_destroy(struct neighbour *neigh)
{
	struct hh_cache *hh;

	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);

	if (!neigh->dead) { // dead标记不为1,有bug
		printk(KERN_WARNING
		       "Destroying alive neighbour %p\n", neigh);
		dump_stack();
		return;
	}
    // 停止状态更新定时器
	if (neigh_del_timer(neigh))
		printk(KERN_WARNING "Impossible event.\n");
    // 邻居项销毁时,其关联的L2帧头部缓存也就没有意义了,尝试回收它们
    // 将所有的L2帧头部缓存的输出设置为neigh_blackhole(),其实现会丢包
	while ((hh = neigh->hh) != NULL) {
		neigh->hh = hh->hh_next;
		hh->hh_next = NULL;

		write_seqlock_bh(&hh->hh_lock);
		hh->hh_output = neigh_blackhole;
		write_sequnlock_bh(&hh->hh_lock);
		if (atomic_dec_and_test(&hh->hh_refcnt))
			kfree(hh);
	}
    // 清空发送缓存队列
	skb_queue_purge(&neigh->arp_queue);
    // 释放对网络设备对象、邻居协议参数的引用
	dev_put(neigh->dev);
	neigh_parms_put(neigh->parms);

	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
    // 递减邻居表中的邻居项计数
	atomic_dec(&neigh->tbl->entries);
	// 将内存归还给高速缓存
	kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
}

垃圾回收

邻居子系统为邻居项设计了两种垃圾回收机制,分别用于应对不同的情况:

  • 同步清理:当邻居子系统在分配邻居项时,如果发现当前邻居项个数已经超过了配置的参数,那么会触发同步清理;
  • 异步清理:邻居子系统通过启动一个周期性的定时器来清理邻居表中那些长时间未使用的邻居项,进而避免邻居项占用过多的内存;

同步清理

在neigh_alloc()中有如下逻辑:

static struct neighbour *neigh_alloc(struct neigh_table *tbl)
{
	struct neighbour *n = NULL;
	unsigned long now = jiffies;
	int entries;

	entries = atomic_inc_return(&tbl->entries) - 1;
	if (entries >= tbl->gc_thresh3 ||
	    (entries >= tbl->gc_thresh2 && time_after(now, tbl->last_flush + 5 * HZ))) {
		if (!neigh_forced_gc(tbl) && entries >= tbl->gc_thresh3)
			goto out_entries;
	}
...
}

可见,触发同步清理需要满足下面任意一个条件:

  1. 当前邻居表中邻居项个数超过了配置门限gc_thresh3;
  2. 当前邻居表中邻居项个数超过了配置门限gc_thresh2,并且距离上次同步清理超过了5s;

此外,还可以看出,如果时条件1触发的同步清理,如果同步清理最后没有成功,那么会导致邻居项分配失败。

同步清理函数为neigh_forced_gc()。

static int neigh_forced_gc(struct neigh_table *tbl)
{
	int shrunk = 0; // 返回值,0表示一个邻居项都没有回收,1表示有邻居项被回收
	int i;

	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);

	write_lock_bh(&tbl->lock);
	// 遍历整个邻居表,寻找可以回收的邻居项
	for (i = 0; i <= tbl->hash_mask; i++) { // 遍历哈希桶
		struct neighbour *n, **np;

		np = &tbl->hash_buckets[i];
		while ((n = *np) != NULL) { // 遍历冲突链
			/* Neighbour record may be discarded if:
			 * - nobody refers to it(引用计数为1表示没有人使用该邻居项).
			 * - it is not permanent
			 */
			write_lock(&n->lock);
			if (atomic_read(&n->refcnt) == 1 && !(n->nud_state & NUD_PERMANENT)) {
				*np	= n->next;
				n->dead = 1; // 清理标记置1
				shrunk	= 1;
				write_unlock(&n->lock);
				neigh_cleanup_and_release(n); // 清理该邻居项
				continue;
			}
			write_unlock(&n->lock);
			np = &n->next;
		}
	}
    // 记录执行同步清理的时间戳
	tbl->last_flush = jiffies;
	write_unlock_bh(&tbl->lock);
	return shrunk;
}

static void neigh_cleanup_and_release(struct neighbour *neigh)
{
    // 首先尝试回调邻居参数的邻居项清理回调
	if (neigh->parms->neigh_cleanup)
		neigh->parms->neigh_cleanup(neigh);
    // 发布邻居项删除消息
	__neigh_notify(neigh, RTM_DELNEIGH, 0);
	neigh_release(neigh);
}

异步清理

每个邻居协议在向邻居子系统注册邻居协议对象neigh_table时,邻居子系统就会为其启动异步清理定时器neigh_table.gc_timer,其定时器函数为neigh_periodic_timer()。

static void neigh_periodic_timer(unsigned long arg)
{
	struct neigh_table *tbl = (struct neigh_table *)arg;
	struct neighbour *n, **np;
	unsigned long expire, now = jiffies;

	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);

	write_lock(&tbl->lock);

	// 每隔300s重新随机一次邻居向的reachable_time,这种设计是为了放只Dos攻击
	if (time_after(now, tbl->last_rand + 300 * HZ)) {
		struct neigh_parms *p;
		tbl->last_rand = now;
		for (p = &tbl->parms; p; p = p->next)
			p->reachable_time = neigh_rand_reach_time(p->base_reachable_time);
	}

    // 每次异步清理,仅仅清理一个哈希桶上邻居项,tbl->hash_chain_gc记录了当前要清理哈希桶索引
	np = &tbl->hash_buckets[tbl->hash_chain_gc];
	tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask);

	while ((n = *np) != NULL) { // 遍历哈希桶,尝试清理邻居项
		unsigned int state;

		write_lock(&n->lock);
		state = n->nud_state;
		if (state & (NUD_PERMANENT | NUD_IN_TIMER)) { // 这些状态的邻居项不能被回收
			write_unlock(&n->lock);
			goto next_elt;
		}

		if (time_before(n->used, n->confirmed))
			n->used = n->confirmed;
        // 回收那些没有被使用的,并且处于NUD_FAILED或者一段时间无人使用的邻居项
		if (atomic_read(&n->refcnt) == 1 &&
		    (state == NUD_FAILED || time_after(now, n->used + n->parms->gc_staletime))) {
			*np = n->next;
			n->dead = 1;
			write_unlock(&n->lock);
			neigh_cleanup_and_release(n);
			continue;
		}
		write_unlock(&n->lock);

next_elt:
		np = &n->next;
	}

	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
	 * base_reachable_time.
	 */
	// 计算下一次定时器到期时间并重启定时器
	expire = tbl->parms.base_reachable_time >> 1;
	expire /= (tbl->hash_mask + 1);
	if (!expire)
		expire = 1;

	if (expire>HZ)
		mod_timer(&tbl->gc_timer, round_jiffies(now + expire));
	else
		mod_timer(&tbl->gc_timer, now + expire);

	write_unlock(&tbl->lock);
}

邻居项的刷新

此外,L2地址发生变化,或者L3地址发生了变化,都需要刷新相应的邻居项。所谓的刷新也就是清除邻居项(后续如果需要可以重建)。

其中L3地址变化是通过neigh_ifdown()通知邻居子系统;L2地址变化是通过neigh_changeaddr()通知邻居子系统,它们最终都会调用neigh_flush_dev()刷新邻居项。

L3地址变化: neigh_ifdown()

int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
	write_lock_bh(&tbl->lock);
	neigh_flush_dev(tbl, dev); // 刷新邻居项
	pneigh_ifdown(tbl, dev); 
	write_unlock_bh(&tbl->lock);
    // 刷新代理信息
	del_timer_sync(&tbl->proxy_timer);
	pneigh_queue_purge(&tbl->proxy_queue);
	return 0;
}

L2地址变化: neigh_changeaddr()

void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
{
	write_lock_bh(&tbl->lock);
	neigh_flush_dev(tbl, dev);
	write_unlock_bh(&tbl->lock);
}

neigh_flush_dev()

static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
{
	int i;

    // 遍历整个邻居表,找到与该网络设备相关的邻居项
	for (i = 0; i <= tbl->hash_mask; i++) {
		struct neighbour *n, **np = &tbl->hash_buckets[i];

		while ((n = *np) != NULL) {
			if (dev && n->dev != dev) { // 网络设备对象不匹配
				np = &n->next;
				continue;
			}
			*np = n->next;
			write_lock(&n->lock);
			// 停止邻居项的状态更新定时器并设置dead标记
			neigh_del_timer(n);
			n->dead = 1;
            // 清理该邻居项
			if (atomic_read(&n->refcnt) != 1) {
				/* The most unpleasant situation.
				   We must destroy neighbour entry,
				   but someone still uses it.

				   The destroy will be delayed until
				   the last user releases us, but
				   we must kill timers etc. and move
				   it to safe state.
				 */
				skb_queue_purge(&n->arp_queue);
				n->output = neigh_blackhole;
				if (n->nud_state & NUD_VALID)
					n->nud_state = NUD_NOARP;
				else
					n->nud_state = NUD_NONE;
				NEIGH_PRINTK2("neigh %p is stray.\n", n);
			}
			write_unlock(&n->lock);
			neigh_cleanup_and_release(n);
		}
	}
}
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值