neighbor 子系统

ipv4对应的arp表格如下:

struct neigh_table arp_tbl = {
    .family     = AF_INET,
    .entry_size = sizeof(struct neighbour) + 4, 
    .key_len    = 4, 
    .hash       = arp_hash,
    .constructor    = arp_constructor,
    .proxy_redo = parp_redo,
    .id     = "arp_cache",
    .parms      = {  
        .tbl            = &arp_tbl,
        .base_reachable_time    = 30 * HZ,
        .retrans_time       = 1 * HZ,
        .gc_staletime       = 60 * HZ,
        .reachable_time     = 30 * HZ,
        .delay_probe_time   = 5 * HZ,
        .queue_len      = 3, 
        .ucast_probes       = 3, 
        .mcast_probes       = 3, 
        .anycast_delay      = 1 * HZ,
        .proxy_delay        = (8 * HZ) / 10,
        .proxy_qlen     = 64,
        .locktime       = 1 * HZ,
    },   
    .gc_interval    = 30 * HZ,
    .gc_thresh1 = 128, 
    .gc_thresh2 = 512, 
    .gc_thresh3 = 1024,
};
EXPORT_SYMBOL(arp_tbl);
在arp_init函数中,对其初始化,并把它加入到neigh_tables对应的链表里

void __init arp_init(void)
{
    neigh_table_init(&arp_tbl);

    dev_add_pack(&arp_packet_type);
    arp_proc_init();
#ifdef CONFIG_SYSCTL 
    neigh_sysctl_register(NULL, &arp_tbl.parms, "ipv4", NULL);
#endif   
    register_netdevice_notifier(&arp_netdev_notifier);
}
void neigh_table_init(struct neigh_table *tbl)
{
    struct neigh_table *tmp;

    neigh_table_init_no_netlink(tbl);
    write_lock(&neigh_tbl_lock);
    for (tmp = neigh_tables; tmp; tmp = tmp->next) {
        if (tmp->family == tbl->family)
            break;
    }    
    tbl->next   = neigh_tables;
    neigh_tables    = tbl; 
    write_unlock(&neigh_tbl_lock);

    if (unlikely(tmp)) {
        printk(KERN_ERR "NEIGH: Registering multiple tables for "
               "family %d\n", tbl->family);
        dump_stack();
    }    
}

初始化中主要设计slab的创建,hash表的创建:

void neigh_table_init_no_netlink(struct neigh_table *tbl)
{
    unsigned long now = jiffies;
    unsigned long phsize;

    write_pnet(&tbl->parms.net, &init_net);
    atomic_set(&tbl->parms.refcnt, 1);
    tbl->parms.reachable_time =
              neigh_rand_reach_time(tbl->parms.base_reachable_time);
 
    if (!tbl->kmem_cachep)
        tbl->kmem_cachep =
            kmem_cache_create(tbl->id, tbl->entry_size, 0,
                      SLAB_HWCACHE_ALIGN|SLAB_PANIC,
                      NULL);
    tbl->stats = alloc_percpu(struct neigh_statistics);
    if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
                  &neigh_stat_seq_fops, tbl))
        panic("cannot create neighbour proc dir entry");
    RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
    phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
    tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
    rwlock_init(&tbl->lock);
    INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
    schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
    setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
    skb_queue_head_init_class(&tbl->proxy_queue,
            &neigh_table_proxy_queue_class);
    
    tbl->last_flush = now;
    tbl->last_rand  = now + tbl->parms.reachable_time * 20;
}

在函数neigh_create中会创建新的neighbour数据结构:

struct neighbour *neigh_create(struct neigh_table *tbl,const void *pkey,
				struct net_device *dev)

该函数首先分配内存,然后初始化:

 u32 hash_val;
    int key_len = tbl->key_len;
    int error;
    struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
    struct neigh_hash_table *nht;
    memcpy(n->primary_key, pkey, key_len);
    n->dev = dev;
    /* Protocol specific setup. */
    if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
        rc = ERR_PTR(error);
        goto out_neigh_release;
    }

    /* Device specific setup. */
    if (n->parms->neigh_setup &&
        (error = n->parms->neigh_setup(n)) < 0) {
        rc = ERR_PTR(error);
        goto out_neigh_release;
    }

    n->confirmed = jiffies - (n->parms->base_reachable_time << 1);

static struct neighbour *neigh_alloc(struct neigh_table *tbl)
{
    struct neighbour *n = NULL;
    unsigned long now = jiffies;
    int entries;

    n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);

    skb_queue_head_init(&n->arp_queue);
    rwlock_init(&n->lock);
    seqlock_init(&n->ha_lock);
    n->updated    = n->used = now;
    n->nud_state      = NUD_NONE;
    n->output     = neigh_blackhole;
    seqlock_init(&n->hh.hh_lock);
    n->parms      = neigh_parms_clone(&tbl->parms);
    setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);

    NEIGH_CACHE_STAT_INC(tbl, allocs);
    n->tbl        = tbl;
    atomic_set(&n->refcnt, 1);
    n->dead       = 1;
    return n;
}

其中constructor会调用arp_constructor(struct neighbour *neigh)函数:

根据ip地址得到其类型,

static int arp_constructor(struct neighbour *neigh)
{
    __be32 addr = *(__be32 *)neigh->primary_key;
    struct net_device *dev = neigh->dev;
    struct in_device *in_dev;
    struct neigh_parms *parms;

    rcu_read_lock();
    in_dev = __in_dev_get_rcu(dev);
    if (in_dev == NULL) {
        rcu_read_unlock();
        return -EINVAL;
    }

    neigh->type = inet_addr_type(dev_net(dev), addr);

    parms = in_dev->arp_parms;
    __neigh_parms_put(neigh->parms);
    neigh->parms = neigh_parms_clone(parms);
    rcu_read_unlock();
    if (!dev->header_ops) {     //在函数ether_setup中初始化为eth_header_ops
        neigh->nud_state = NUD_NOARP;
        neigh->ops = &arp_direct_ops;
        neigh->output = neigh_direct_output;
    } else { 
        if (neigh->type == RTN_MULTICAST) {
            neigh->nud_state = NUD_NOARP;
            arp_mc_map(addr, neigh->ha, dev, 1);
        } else if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) {
            neigh->nud_state = NUD_NOARP;
            memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
        } else if (neigh->type == RTN_BROADCAST ||
               (dev->flags & IFF_POINTOPOINT)) {
            neigh->nud_state = NUD_NOARP;
            memcpy(neigh->ha, dev->broadcast, dev->addr_len);
        }
        if (dev->header_ops->cache) //eth_header_cache
            neigh->ops = &arp_hh_ops;
        else
            neigh->ops = &arp_generic_ops;

        if (neigh->nud_state & NUD_VALID)
            neigh->output = neigh->ops->connected_output; 
        else
            neigh->output = neigh->ops->output;  \\neigh_resolve_output
    }
    return 0;
}
static inline unsigned __inet_dev_addr_type(struct net *net,
					    const struct net_device *dev,
					    __be32 addr)
{
	struct flowi4		fl4 = { .daddr = addr };
	struct fib_result	res;
	unsigned ret = RTN_BROADCAST;
	struct fib_table *local_table;

	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
		return RTN_BROADCAST;
	if (ipv4_is_multicast(addr))
		return RTN_MULTICAST;

#ifdef CONFIG_IP_MULTIPLE_TABLES
	res.r = NULL;
#endif

	local_table = fib_get_table(net, RT_TABLE_LOCAL);
	if (local_table) {
		ret = RTN_UNICAST;
		rcu_read_lock();
		if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) {
			if (!dev || dev == res.fi->fib_dev)
				ret = res.type;
		}
		rcu_read_unlock();
	}
	return ret;
}


根据ip地址以及网卡设备查找hash表,查不到的话添加到hash表中

 nht = rcu_dereference_protected(tbl->nht,
                    lockdep_is_held(&tbl->lock));

    if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
        nht = neigh_hash_grow(tbl, nht->hash_shift + 1);

    hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);

    if (n->parms->dead) {
        rc = ERR_PTR(-EINVAL);
        goto out_tbl_unlock;
    }

    for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
                        lockdep_is_held(&tbl->lock));
         n1 != NULL;
         n1 = rcu_dereference_protected(n1->next,
            lockdep_is_held(&tbl->lock))) {
        if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
            neigh_hold(n1);
            rc = n1;
            goto out_tbl_unlock;
        }
    }

    n->dead = 0;
    neigh_hold(n);
    rcu_assign_pointer(n->next,
               rcu_dereference_protected(nht->hash_buckets[hash_val],
                             lockdep_is_held(&tbl->lock)));
    rcu_assign_pointer(nht->hash_buckets[hash_val], n);
    write_unlock_bh(&tbl->lock);
    NEIGH_PRINTK2("neigh %p is created.\n", n);
    rc = n;
out:
    return rc;  


模拟neighbour查找的函数__ipv4_neigh_lookup,dump系统的neighbour信息:

struct neigh_table *tbl=&arp_tbl;
        struct neigh_hash_table *nht;
        struct neighbour *n;
        u32 hash_val;
        printk("entry=%d\n",atomic_read(&tbl->entries));
        nht = rcu_dereference_bh(tbl->nht);
        for(hash_val=0;hash_val< 1<<nht->hash_shift;hash_val++){
            for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
                    n != NULL;
                    n = rcu_dereference_bh(n->next)) {
                printk("hash_val=%d,",hash_val);
                printk("dev=%s,key=%x,state=%x,type=%d\n",n->dev->name,*(u32 *)n->primary_key,n->nud_state,n->type);
            }
        }
输出:

[14799.054977] hash_val=0,dev=wlan0,key=6f01a8c0,state=2,type=1
[14799.054982] hash_val=1,dev=eth0,key=6fd20880,state=4,type=1
[14799.054987] hash_val=2,dev=wlan0,key=101a8c0,state=4,type=1
[14799.054992] hash_val=3,dev=eth0,key=160000e0,state=40,type=5
[14799.054996] hash_val=3,dev=wlan0,key=fb0000e0,state=40,type=5
[14799.055001] hash_val=8,dev=eth0,key=6ed20880,state=20,type=1
[14799.055006] hash_val=9,dev=eth0,key=70d20880,state=20,type=1
[14799.055010] hash_val=9,dev=lo,key=0,state=40,type=3
[14799.055015] hash_val=12,dev=eth0,key=fb0000e0,state=40,type=5
root@zhenghan:/opt/module/net/udp# arp -n
Address                  HWtype  HWaddress           Flags Mask            Iface
192.168.1.111            ether   80:56:f2:db:2f:7b   C                     wlan0
128.8.210.111            ether   3c:97:0e:cd:a4:6d   C                     eth0
192.168.1.1              ether   00:23:cd:5b:ea:d6   C                     wlan0
128.8.210.110                    (incomplete)                              eth0
128.8.210.112                    (incomplete)                              eth0
状态定义

#define NUD_INCOMPLETE  0x01
#define NUD_REACHABLE   0x02
#define NUD_STALE   0x04
#define NUD_DELAY   0x08
#define NUD_PROBE   0x10
#define NUD_FAILED  0x20

/* Dummy states */
#define NUD_NOARP   0x40
#define NUD_PERMANENT   0x80
#define NUD_NONE    0x00
enum {
    RTN_UNSPEC,
    RTN_UNICAST,        /* Gateway or direct route  */
    RTN_LOCAL,      /* Accept locally       */
    RTN_BROADCAST,      /* Accept locally as broadcast,
                   send as broadcast */
    RTN_ANYCAST,        /* Accept locally as broadcast,
                   but send as unicast */
    RTN_MULTICAST,      /* Multicast route      */
    RTN_BLACKHOLE,      /* Drop             */
    RTN_UNREACHABLE,    /* Destination is unreachable   */
    RTN_PROHIBIT,       /* Administratively prohibited  */
    RTN_THROW,      /* Not in this table        */
    RTN_NAT,        /* Translate this address   */
    RTN_XRESOLVE,       /* Use external resolver    */
    __RTN_MAX
};
从上面可以知道,对于一般的以太网网卡,dev->header_ops=eth_header_ops,

const struct header_ops eth_header_ops ____cacheline_aligned = {
	.create		= eth_header,
	.parse		= eth_header_parse,
	.rebuild	= eth_rebuild_header,
	.cache		= eth_header_cache,
	.cache_update	= eth_header_cache_update,
};

创建的neighbour的neigh->ops = &arp_hh_ops;neigh->output =neigh_resolve_output
neigh_resolve_output函数位于L3和驱动发包之间,处理L2头相关的事情,
ip_finish_output2----->neigh_output------> n->output(n, skb);
如果当前对应的neighbour状态不是reachable,则发送arp请求报文

int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
{
    struct dst_entry *dst = skb_dst(skb);
    int rc = 0;

    if (!dst)
        goto discard;

    __skb_pull(skb, skb_network_offset(skb));

    if (!neigh_event_send(neigh, skb)) {
        int err;
        struct net_device *dev = neigh->dev;
        unsigned int seq;

        if (dev->header_ops->cache && !neigh->hh.hh_len)
            neigh_hh_init(neigh, dst);

        do {
            seq = read_seqbegin(&neigh->ha_lock);
            err = dev_hard_header(skb, dev, ntohs(skb->protocol),
                          neigh->ha, NULL, skb->len);
        } while (read_seqretry(&neigh->ha_lock, seq));

        if (err >= 0)
            rc = dev_queue_xmit(skb);
        else
            goto out_kfree_skb;
    }
out:
    return rc;
discard:
    NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
              dst, neigh);
out_kfree_skb:
    rc = -EINVAL;
    kfree_skb(skb);
    goto out;
}
首先分析neigh_hh_init函数:

static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
{
    struct net_device *dev = dst->dev;
    __be16 prot = dst->ops->protocol;
    struct hh_cache *hh = &n->hh;

    write_lock_bh(&n->lock);

    /* Only one thread can come in here and initialize the
     * hh_cache entry.
     */
    if (!hh->hh_len)
        dev->header_ops->cache(n, hh, prot);

    write_unlock_bh(&n->lock);
}
dst->ops在分配rtable的时候赋值为ipv4_dst_ops,因此protocal为ETH_P_IP

static struct rtable *rt_dst_alloc(struct net_device *dev,
				   bool nopolicy, bool noxfrm)
{
	return dst_alloc(&ipv4_dst_ops, dev, 1, -1,
			 DST_HOST |
			 (nopolicy ? DST_NOPOLICY : 0) |
			 (noxfrm ? DST_NOXFRM : 0));
}
static struct dst_ops ipv4_dst_ops = {
	.family =		AF_INET,
	.protocol =		cpu_to_be16(ETH_P_IP),
	.gc =			rt_garbage_collect,
	.check =		ipv4_dst_check,
	.default_advmss =	ipv4_default_advmss,
	.default_mtu =		ipv4_default_mtu,
	.cow_metrics =		ipv4_cow_metrics,
	.destroy =		ipv4_dst_destroy,
	.ifdown =		ipv4_dst_ifdown,
	.negative_advice =	ipv4_negative_advice,
	.link_failure =		ipv4_link_failure,
	.update_pmtu =		ip_rt_update_pmtu,
	.local_out =		__ip_local_out,
	.neigh_lookup =		ipv4_neigh_lookup,
};


最后调用eth_header_cache,主要是构造一个L2的和ader,缓存到hh_cache数据结构中,提高性能

int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type)
{
	struct ethhdr *eth;
	const struct net_device *dev = neigh->dev;

	eth = (struct ethhdr *)
	    (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth))));

	if (type == htons(ETH_P_802_3))
		return -1;

	eth->h_proto = type;
	memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); \\6
	memcpy(eth->h_dest, neigh->ha, ETH_ALEN);
	hh->hh_len = ETH_HLEN;\\14
	return 0;
}


dev_hard_header调用eth_header创建L2的header

int eth_header(struct sk_buff *skb, struct net_device *dev,
	       unsigned short type,
	       const void *daddr, const void *saddr, unsigned len)
{
	struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN);

	if (type != ETH_P_802_3 && type != ETH_P_802_2)
		eth->h_proto = htons(type);
	else
		eth->h_proto = htons(len);  //第一次操作
	/*
	 *      Set the source hardware address.
	 */
	if (!saddr)
		saddr = dev->dev_addr; 
	memcpy(eth->h_source, saddr, ETH_ALEN);//第二次操作
	if (daddr) {
		memcpy(eth->h_dest, daddr, ETH_ALEN); //第三次操作
		return ETH_HLEN;
	}
	/*
	 *      Anyway, the loopback-device should never use this function...
	 */

	if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) {
		memset(eth->h_dest, 0, ETH_ALEN);
		return ETH_HLEN;
	}
	return -ETH_HLEN;
}
对比下面的代码可以看到L2 header缓存机制的作用

static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
{
	unsigned seq;
	int hh_len;

	do {
		int hh_alen;

		seq = read_seqbegin(&hh->hh_lock);
		hh_len = hh->hh_len;
		hh_alen = HH_DATA_ALIGN(hh_len);
		memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); //只需一次操作
	} while (read_seqretry(&hh->hh_lock, seq));

	skb_push(skb, hh_len);
	return dev_queue_xmit(skb);
}

static inline int neigh_output(struct neighbour *n, struct sk_buff *skb)
{
	struct hh_cache *hh = &n->hh;
	if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
		return neigh_hh_output(hh, skb);
	else
		return n->output(n, skb);
}


前面的这几个函数显然需要negibour数据结构中有正确的MAC值了,即neigh_event_send返回0

static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
	unsigned long now = jiffies;
	
	if (neigh->used != now)
		neigh->used = now;
	if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
		return __neigh_event_send(neigh, skb);
	return 0;
}
int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
	int rc;
	unsigned long now;

	write_lock_bh(&neigh->lock);

	rc = 0;
	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
		goto out_unlock_bh;

	now = jiffies;

	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
			neigh->nud_state     = NUD_INCOMPLETE;
			neigh->updated = jiffies;
			neigh_add_timer(neigh, now + 1); \\neigh_timer_handler
		} else {
			neigh->nud_state = NUD_FAILED;
			neigh->updated = jiffies;
			write_unlock_bh(&neigh->lock);

			kfree_skb(skb);
			return 1;
		}
	} else if (neigh->nud_state & NUD_STALE) {
		NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
		neigh->nud_state = NUD_DELAY;
		neigh->updated = jiffies;
		neigh_add_timer(neigh,
				jiffies + neigh->parms->delay_probe_time);
	}

	if (neigh->nud_state == NUD_INCOMPLETE) {
		if (skb) {
			if (skb_queue_len(&neigh->arp_queue) >=
			    neigh->parms->queue_len) {
				struct sk_buff *buff;
				buff = __skb_dequeue(&neigh->arp_queue);
				kfree_skb(buff);
				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
			}
			skb_dst_force(skb);
			__skb_queue_tail(&neigh->arp_queue, skb);
		}
		rc = 1;
	}
out_unlock_bh:
	write_unlock_bh(&neigh->lock);
	return rc;
}
static void neigh_timer_handler(unsigned long arg)
{
	unsigned long now, next;
	struct neighbour *neigh = (struct neighbour *)arg;
	unsigned state;
	int notify = 0;

	write_lock(&neigh->lock);

	state = neigh->nud_state;
	now = jiffies;
	next = now + HZ;

	if (!(state & NUD_IN_TIMER)) {
#ifndef CONFIG_SMP
		printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
#endif
		goto out;
	}

	if (state & NUD_REACHABLE) {
		if (time_before_eq(now,
				   neigh->confirmed + neigh->parms->reachable_time)) {
			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
			next = neigh->confirmed + neigh->parms->reachable_time;
		} else if (time_before_eq(now,
					  neigh->used + neigh->parms->delay_probe_time)) {
			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
			neigh->nud_state = NUD_DELAY;
			neigh->updated = jiffies;
			neigh_suspect(neigh);
			next = now + neigh->parms->delay_probe_time;
		} else {
			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
			neigh->nud_state = NUD_STALE;
			neigh->updated = jiffies;
			neigh_suspect(neigh);
			notify = 1;
		}
	} else if (state & NUD_DELAY) {
		if (time_before_eq(now,
				   neigh->confirmed + neigh->parms->delay_probe_time)) {
			NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
			neigh->nud_state = NUD_REACHABLE;
			neigh->updated = jiffies;
			neigh_connect(neigh);
			notify = 1;
			next = neigh->confirmed + neigh->parms->reachable_time;
		} else {
			NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
			neigh->nud_state = NUD_PROBE;
			neigh->updated = jiffies;
			atomic_set(&neigh->probes, 0);
			next = now + neigh->parms->retrans_time;
		}
	} else {
		/* NUD_PROBE|NUD_INCOMPLETE */
		next = now + neigh->parms->retrans_time;
	}

	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
		neigh->nud_state = NUD_FAILED;
		notify = 1;
		neigh_invalidate(neigh);
	}

	if (neigh->nud_state & NUD_IN_TIMER) {
		if (time_before(next, jiffies + HZ/2))
			next = jiffies + HZ/2;
		if (!mod_timer(&neigh->timer, next))
			neigh_hold(neigh);
	}
	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
		struct sk_buff *skb = skb_peek(&neigh->arp_queue);
		/* keep skb alive even if arp_queue overflows */
		if (skb)
			skb = skb_copy(skb, GFP_ATOMIC);
		write_unlock(&neigh->lock);
		neigh->ops->solicit(neigh, skb); \\发送arp请求
		atomic_inc(&neigh->probes);
		kfree_skb(skb);
	} else {
out:
		write_unlock(&neigh->lock);
	}

	if (notify)
		neigh_update_notify(neigh);

	neigh_release(neigh);
}


我们以一个刚创建的neighbour为例,初始化的时候状态为NUD_NONE,因此在函数__neigh_event_send中会把它的状态设成NUD_INCOMPLETE,同时触发定时器,把skb放入arp_queue队列中定时器处理函数neigh_timer_handler调用arp_solicit发送arp请求在arp接收处理函数中会通过neigh_update函数发送挂在arp_queue上的skb<Understading Linux Networking Internals>描述的neighbour状态之间的转变关系

neigh_alloc中把初始状态统一设成NUD_NONE;
arp_constructor会根据ip地址的类型,net_device的类型对一些特殊的情况进行处理,设成NUD_NOARP
__neigh_event_send根据之前的状态作进一步处理;
如果是NUD_CONNECTED | NUD_DELAY | NUD_PROBE,不处理  
如果是NUD_NONE,则设成NUD_INCOMPLETE,启动1个jiffies后的定时器,然后把skb放入arp_queue中
如果已经是NUD_INCOMPLETE,把skb放入arp_queue中
如果是NUD_STALE,则设成NUD_DELAY,并启动delay_probe_time后的定时器

定时器函数neigh_timer_handler处理NUD_IN_TIMER状态,即NUD_INCOMPLETE|NUD_REACHABLE|NUD_DELAY|NUD_PROBE
如果是NUD_INCOMPLETE或者NUD_PROBE,如果probe次数还没到最大值,启动一个HZ/2后的定时器,发送arp请求
如果超过最到值,则设成NUD_FAILED

	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
		neigh->nud_state = NUD_FAILED;
		notify = 1;
		neigh_invalidate(neigh);
	}
	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
		struct sk_buff *skb = skb_peek(&neigh->arp_queue);
		/* keep skb alive even if arp_queue overflows */
		if (skb)
			skb = skb_copy(skb, GFP_ATOMIC);
		write_unlock(&neigh->lock);
		neigh->ops->solicit(neigh, skb);
		atomic_inc(&neigh->probes);
		kfree_skb(skb);
	}



如果是NUD_REACHABLE,根据使用情况,不改变状态,启动定时器;设成NUD_DELAY,启动定时器;设成NUD_STALE

	if (state & NUD_REACHABLE) {
		if (time_before_eq(now,
				   neigh->confirmed + neigh->parms->reachable_time)) {
			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
			next = neigh->confirmed + neigh->parms->reachable_time;
		} else if (time_before_eq(now,
					  neigh->used + neigh->parms->delay_probe_time)) {
			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
			neigh->nud_state = NUD_DELAY;
			neigh->updated = jiffies;
			neigh_suspect(neigh);
			next = now + neigh->parms->delay_probe_time;
		} else {
			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
			neigh->nud_state = NUD_STALE;
			neigh->updated = jiffies;
			neigh_suspect(neigh);
			notify = 1;
		}
	}
如果是NUD_DELAY,则根据情况设成NUD_REACHABLE或者NUD_PROBE

	if (state & NUD_DELAY) {
		if (time_before_eq(now,
				   neigh->confirmed + neigh->parms->delay_probe_time)) {
			NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
			neigh->nud_state = NUD_REACHABLE;
			neigh->updated = jiffies;
			neigh_connect(neigh);
			notify = 1;
			next = neigh->confirmed + neigh->parms->reachable_time;
		} else {
			NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
			neigh->nud_state = NUD_PROBE;
			neigh->updated = jiffies;
			atomic_set(&neigh->probes, 0);
			next = now + neigh->parms->retrans_time;
		}
	}








































  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值