arp2-初始化

arp作为Neighboring Subsystem 模块在IPv4上的一种实现,符合Neighboring Subsystem的设计:
ARP实现会创建一个表,除了保存一些状态和属性,主要是缓冲ARP的地址。表的定义如下:
[ include/net/neighbour.h ]
struct neigh_table {
	struct neigh_table	*next;	// 所有的协议在一个列表中
	int			family;	// 协议的famiml,如:AF_INET
	int			entry_size;	// 对象的大小
	int			key_len;	// 查找函数key的大小
	__u32			(*hash)(const void *pkey,
					const struct net_device *dev,
					__u32 *hash_rnd);
	int			(*constructor)(struct neighbour *);
	int			(*pconstructor)(struct pneigh_entry *);
	void			(*pdestructor)(struct pneigh_entry *);
	void			(*proxy_redo)(struct sk_buff *skb);
	char			*id;	// 标识协议的ID,标识内存池
	struct neigh_parms	parms;	// 决定neighbour协议的行为的参数
	/* HACK. gc_* should follow parms without a gap! */
	int			gc_interval;	// 垃圾回收:回收频率
	int			gc_thresh1;	// 缓冲中的内存使用水平阈值
	int			gc_thresh2;	// 缓冲中的内存使用水平阈值
	int			gc_thresh3;	// 缓冲中的内存使用水平阈值
	unsigned long		last_flush;	// 最近的垃圾回收时间
	struct delayed_work	gc_work;
	struct timer_list 	proxy_timer;	// 当proxy_queue中有对象时,开始执行
	struct sk_buff_head	proxy_queue;	// 接收到的请求代理(ARPOP_REQUEST)的包的队列
	atomic_t		entries;	// 在缓冲中的neighbour对象数量
	rwlock_t		lock;
	unsigned long		last_rand;	// 更新时间,和neigh_parms->reachable_time相关联
	struct neigh_statistics	__percpu *stats;	// 在缓冲中的neighbour对象的状态
	struct neigh_hash_table __rcu *nht;	// neighbour对象的哈希表
	struct pneigh_entry	**phash_buckets;	// 存储IP地址,这些地址被代理
};
其中的neigh_parms结构为属性,定义如下:
[ include/net/neighbour.h ]
struct neigh_parms {
#ifdef CONFIG_NET_NS
	struct net *net;
#endif
	struct net_device *dev;
	struct neigh_parms *next;	// 挂在相同family的协议
	int	(*neigh_setup)(struct neighbour *);
	void	(*neigh_cleanup)(struct neighbour *);
	struct neigh_table *tbl;

	void	*sysctl_table;	// net/ipv4/neighbour.c

	int dead;	// 为1,可以删除
	atomic_t refcnt;	// 引用计数
	struct rcu_head rcu_head;

	int	reachable_time;	// 最近可到达的时间
	int	data[NEIGH_VAR_DATA_MAX];
	DECLARE_BITMAP(data_state, NEIGH_VAR_DATA_MAX);
};

enum {
	NEIGH_VAR_MCAST_PROBES,	// number of multicast solicitations that can be sent to resolve a neighbor’s address
	NEIGH_VAR_UCAST_PROBES,	// number of unicast solicitations that can be sent to confirm the reachability of an address.
	NEIGH_VAR_APP_PROBES,	// number of solicitations that can be sent by a user-space application when resolving an address
	NEIGH_VAR_RETRANS_TIME,	// 重传时间
	NEIGH_VAR_BASE_REACHABLE_TIME,	// interval of time since the most recent proof of reachability was received.
	NEIGH_VAR_DELAY_PROBE_TIME,	// how long a neighbor in the NUD_DELAY state waits before entering the NUD_PROBE state.
	NEIGH_VAR_GC_STALETIME,	// A neighbour structure is removed if it has not been used for gc_staletime time and no one holds a reference to it
	NEIGH_VAR_QUEUE_LEN_BYTES,	// arp_queue 的最大长度
	NEIGH_VAR_PROXY_QLEN,	// proxy_queue 的最大长度
	NEIGH_VAR_ANYCAST_DELAY,
	NEIGH_VAR_PROXY_DELAY,	// Amount of time that neighboring protocol packets handled by a proxy should be kept in a queue before being processed.
	NEIGH_VAR_LOCKTIME,	// Minimum time that has to pass between two updates of the fields of a neighbour entry
#define NEIGH_VAR_DATA_MAX (NEIGH_VAR_LOCKTIME + 1)
	/* Following are used as a second way to access one of the above */
	NEIGH_VAR_QUEUE_LEN, /* same data as NEIGH_VAR_QUEUE_LEN_BYTES */
	NEIGH_VAR_RETRANS_TIME_MS, /* same data as NEIGH_VAR_RETRANS_TIME */
	NEIGH_VAR_BASE_REACHABLE_TIME_MS, /* same data as NEIGH_VAR_BASE_REACHABLE_TIME */
	/* Following are used by "default" only */
	NEIGH_VAR_GC_INTERVAL,
	NEIGH_VAR_GC_THRESH1,
	NEIGH_VAR_GC_THRESH2,
	NEIGH_VAR_GC_THRESH3,
	NEIGH_VAR_MAX
};
其中的neigh_statistics结构为状态,定义如下:
[  include/net/neighbour.h ]
struct neigh_statistics {
	unsigned long allocs;		/* number of allocated neighs */
	unsigned long destroys;		/* number of destroyed neighs */
	unsigned long hash_grows;	/* number of hash resizes */

	unsigned long res_failed;	/* number of failed resolutions */

	unsigned long lookups;		/* number of lookups */
	unsigned long hits;		/* number of hits (among lookups) */

	unsigned long rcv_probes_mcast;	/* number of received mcast ipv6 */
	unsigned long rcv_probes_ucast; /* number of received ucast ipv6 */

	unsigned long periodic_gc_runs;	/* number of periodic GC runs */
	unsigned long forced_gc_runs;	/* number of forced GC runs */

	unsigned long unres_discards;	/* number of unresolved drops */
};
哈希表的结构:
[  include/net/neighbour.h ]
#define NEIGH_NUM_HASH_RND	4

struct neigh_hash_table {
	struct neighbour __rcu	**hash_buckets;
	unsigned int		hash_shift;
	__u32			hash_rnd[NEIGH_NUM_HASH_RND];
	struct rcu_head		rcu;
};
面每个neighbour由下面结构表示:
[  include/net/neighbour.h ]
struct neighbour {
	struct neighbour __rcu	*next;	// 每个neighbour 对象都要插入到哈希表中,next用来构建子表
	struct neigh_table	*tbl;	// IPv4指向arp_tbl
	struct neigh_parms	*parms;	// 决定neighbour协议的行为的参数
	unsigned long		confirmed;	// 时间戳(确定地址可到达的最近时间)
	unsigned long		updated;	// 被更新的时间戳
	rwlock_t		lock;
	atomic_t		refcnt;	// 引用计数
	struct sk_buff_head	arp_queue;	// 还没有找到硬件地址的包先放在这里
	unsigned int		arp_queue_len_bytes;
	struct timer_list	timer;
	unsigned long		used;	// 被使用的时间戳
	atomic_t		probes;	// 探测目标地址的次数
	__u8			flags;
	__u8			nud_state;
	__u8			type;
	__u8			dead;	// 设为1,标记此对象可以被删除
	seqlock_t		ha_lock;
	unsigned char		ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];	// 与IP地址对应的硬件地址
	struct hh_cache		hh;	// L2 header caching
	int			(*output)(struct neighbour *, struct sk_buff *);
	const struct neigh_ops	*ops;	// VFT接口
	struct rcu_head		rcu;
	struct net_device	*dev;	// 通过此设备neighbor是可到达的
	u8			primary_key[0];	// IP地址,被当作key,用于在缓冲中查找
};
在缓冲中的neighbour的状态值如下:
[ include/uapi/linux/neighbour.h ]
/*
 *	Neighbor Cache Entry States.
 */

/* A solicitation has been sent, but no reply has been received yet. 
 * In this state, there is no hardware address to use (not even an old one, as there is with NUD_STALE).
 */
#define NUD_INCOMPLETE	0x01
/* The address of the neighbor is cached and the latter is known 
 * to be reachable (there has been a proof of reachability).
 */
#define NUD_REACHABLE	0x02	// 连接状态
#define NUD_STALE	0x04	// 过期状态
#define NUD_DELAY	0x08	// 延迟状态
#define NUD_PROBE	0x10	// 探测状态
#define NUD_FAILED	0x20	// Marks a neighbor as unreachable

/* Dummy states */
/* This state is used to mark neighbors that do not need any protocol to resolve the L3-to-L2 mapping
 */
#define NUD_NOARP	0x40
/* The L2 address of the neighbor has been statically configured (i.e., with userspace commands) 
 * and therefore there is no need to use any neighboring protocol to take care of it
 */
#define NUD_PERMANENT	0x80	
#define NUD_NONE	0x00

/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
   and make no address resolution or NUD.
   NUD_PERMANENT also cannot be deleted by garbage collectors.
 */
对ARP,当然要创建一个neigh_table表:
[ net/ipv4/arp.c ]
// arp表
struct neigh_table arp_tbl = {
	.family		= AF_INET,	// 协议的family
	.key_len	= 4,	// 查找函数key的大小
	.hash		= arp_hash,
	.constructor	= arp_constructor,
	.proxy_redo	= parp_redo,
	.id		= "arp_cache",	// 标识协议的ID,标识内存池
	.parms		= {
		.tbl			= &arp_tbl,	// ARP表
		.reachable_time		= 30 * HZ,	// 最近可到达的时间
		.data	= {
			[NEIGH_VAR_MCAST_PROBES] = 3,	// number of multicast solicitations that can be sent to resolve a neighbor’s address
			[NEIGH_VAR_UCAST_PROBES] = 3,	// number of unicast solicitations that can be sent to confirm the reachability of an address.
			[NEIGH_VAR_RETRANS_TIME] = 1 * HZ,	// 重传时间
			[NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,	// interval of time since the most recent proof of reachability was received.
			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,	// how long a neighbor in the NUD_DELAY state waits before entering the NUD_PROBE state.
			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,	// A neighbour structure is removed if it has not been used for gc_staletime 
								// time and no one holds a reference to it
			[NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024,	// arp_queue 的最大长度
			[NEIGH_VAR_PROXY_QLEN] = 64,	// proxy_queue 的最大长度
			[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
			[NEIGH_VAR_PROXY_DELAY]	= (8 * HZ) / 10,	// Amount of time that neighboring protocol packets handled by a 
									// proxy should be kept in a queue before being processed.
			[NEIGH_VAR_LOCKTIME] = 1 * HZ,	// Minimum time that has to pass between two updates of the fields of a neighbour entry
		},
	},
	.gc_interval	= 30 * HZ,	// 垃圾回收:回收频率
	.gc_thresh1	= 128,	// 缓冲中的内存使用水平阈值
	.gc_thresh2	= 512,	// 缓冲中的内存使用水平阈值
	.gc_thresh3	= 1024,	// 缓冲中的内存使用水平阈值
};
EXPORT_SYMBOL(arp_tbl);
对于设备,它可以对ARP进行配置,配置的属性如下:
arp_announce : 本地发出的ARP请求包,当源地址使用本机的IP地址时,提供一个等级限制:
0:(默认)使用任何接口上的任何IP地址
1:不使用不在目标子网内的本机地址
2:只使用最适合目标地址的本机地址

arp_ignore:当收到ARP请求包,当目标地址是本机地址时的回应模式
0:(默认)响应任何接口上的任何IP地址
1:只响应目标地址和接收数据的设备的地址相同的请求
2:只响应目标地址和接收数据的设备的地址相同的请求,并且发送请求的IP和目标地址在同一个子网内
3:不响应配置为 scope host 的地址,只响应 resolutions for global and link addresses
4-7: -保留
8:全部不响应

ARP协议的头部定义如下:
[ include/uapi/linux/if_arp.h ]
/*
 *	This structure defines an ethernet arp header.
 */

struct arphdr {
	__be16		ar_hrd;		/* format of hardware address	*/
	__be16		ar_pro;		/* format of protocol address	*/
	unsigned char	ar_hln;		/* length of hardware address	*/
	unsigned char	ar_pln;		/* length of protocol address	*/
	__be16		ar_op;		/* ARP opcode (command)		*/

#if 0
	 /*
	  *	 Ethernet looks like this : This bit is variable sized however...
	  */
	unsigned char		ar_sha[ETH_ALEN];	/* sender hardware address	*/
	unsigned char		ar_sip[4];		/* sender IP address		*/
	unsigned char		ar_tha[ETH_ALEN];	/* target hardware address	*/
	unsigned char		ar_tip[4];		/* target IP address		*/
#endif

};
在初始化IP模块时,会对ARP模块进行初始化,调用的函数为:
[ net/ipv4/arp.c ]
void __init arp_init(void)
{
	neigh_table_init(&arp_tbl);	// 初始化arp_tbl

	dev_add_pack(&arp_packet_type);	// 处理接收类型为ARP的包
	arp_proc_init();	// proc下创建arp文件
#ifdef CONFIG_SYSCTL
	neigh_sysctl_register(NULL, &arp_tbl.parms, NULL);	// sysctl文件
#endif
	register_netdevice_notifier(&arp_netdev_notifier);	// 加入通知队列
}
初始化arp_tbl:
[ net/core/neighbour.c ]
void neigh_table_init(struct neigh_table *tbl)
{
	struct neigh_table *tmp;

	neigh_table_init_no_netlink(tbl);	// 初始化表
	write_lock(&neigh_tbl_lock);
	/* neigh_tables是一个全局列表,所有的表都挂在它下面
	 */
	for (tmp = neigh_tables; tmp; tmp = tmp->next) {
		if (tmp->family == tbl->family)
			break;
	}
	tbl->next	= neigh_tables;
	neigh_tables	= tbl;
	write_unlock(&neigh_tbl_lock);

	if (unlikely(tmp)) {
		pr_err("Registering multiple tables for family %d\n",
		       tbl->family);
		dump_stack();
	}
}
EXPORT_SYMBOL(neigh_table_init);

static void neigh_table_init_no_netlink(struct neigh_table *tbl)
{
    unsigned long now = jiffies;    // 当前时间
    unsigned long phsize;

    write_pnet(&tbl->parms.net, &init_net);    // 设置网络为init_net
    atomic_set(&tbl->parms.refcnt, 1);    // 设置引用计数为1
    // 最近可到达的时间为随机数
    tbl->parms.reachable_time =
              neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));


    tbl->stats = alloc_percpu(struct neigh_statistics);    // 状态,是一个PER_CPU变量
    if (!tbl->stats)
        panic("cannot create neighbour cache statistics");

#ifdef CONFIG_PROC_FS
    if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
                  &neigh_stat_seq_fops, tbl))
        panic("cannot create neighbour proc dir entry");
#endif

    RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));    // 初始化哈希表

    phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
    tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);    // 初始化代理地址的哈希表

    if (!tbl->nht || !tbl->phash_buckets)
        panic("cannot allocate neighbour cache hashes");


    // 在缓冲中的neighbour对象数量
    if (!tbl->entry_size)
        tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
                    tbl->key_len, NEIGH_PRIV_ALIGN);
    else
        WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);

    rwlock_init(&tbl->lock);
    INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);    // 垃圾回收操作
    queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
            tbl->parms.reachable_time);
    setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);    // 地址代理定时器
    // 初始化代理地址队列
    skb_queue_head_init_class(&tbl->proxy_queue,
            &neigh_table_proxy_queue_class);

    tbl->last_flush = now;    // 最近的垃圾回收时间
    tbl->last_rand    = now + tbl->parms.reachable_time * 20;    // 更新时间,和neigh_parms->reachable_time相关联
}

/*
 * It is random distribution in the interval (1/2)*base...(3/2)*base.
 * It corresponds to default IPv6 settings and is not overridable,
 * because it is really reasonable choice.
 */

unsigned long neigh_rand_reach_time(unsigned long base)
{
    return base ? (prandom_u32() % base) + (base >> 1) : 0;
}
EXPORT_SYMBOL(neigh_rand_reach_time);

为处理类型为ARP的包,要提供一个packet_type类型,并注册到内核中:
[ net/ipv4/arp.c ]
static struct packet_type arp_packet_type __read_mostly = {
	.type =	cpu_to_be16(ETH_P_ARP),
	.func =	arp_rcv,
};
其中arp_rcv就是用来接收ARP包的函数。
当ARP被调用时,要调用构造函数,在arp_table设置它为arp_constructor
[ net/ipv4/arp.c ]
static int arp_constructor(struct neighbour *neigh)
{
	__be32 addr = *(__be32 *)neigh->primary_key;	// 目标地址
	struct net_device *dev = neigh->dev;	// 网络设备
	struct in_device *in_dev;
	struct neigh_parms *parms;

	rcu_read_lock();
	in_dev = __in_dev_get_rcu(dev);	// IPv4 specific data
	if (in_dev == NULL) {
		rcu_read_unlock();
		return -EINVAL;
	}

	neigh->type = inet_addr_type(dev_net(dev), addr);	// 地址类型RTN_UNICAST,RTN_MULTICAST....

	parms = in_dev->arp_parms;	// 决定neighbour协议的行为的参数
	__neigh_parms_put(neigh->parms);	// neigh->parms引用计数
	neigh->parms = neigh_parms_clone(parms);	// 设置neigh->parms
	rcu_read_unlock();

	if (!dev->header_ops) {	// 不用ARP
		neigh->nud_state = NUD_NOARP; // do not need any protocol to resolve the L3-to-L2 mapping
		neigh->ops = &arp_direct_ops;
		neigh->output = neigh_direct_output;
	} else {
		/* Good devices (checked by reading texts, but only Ethernet is
		   tested)

		   ARPHRD_ETHER: (ethernet, apfddi)
		   ARPHRD_FDDI: (fddi)
		   ARPHRD_IEEE802: (tr)
		   ARPHRD_METRICOM: (strip)
		   ARPHRD_ARCNET:
		   etc. etc. etc.

		   ARPHRD_IPDDP will also work, if author repairs it.
		   I did not it, because this driver does not work even
		   in old paradigm.
		 */

#if 1
		/* So... these "amateur" devices are hopeless.
		   The only thing, that I can say now:
		   It is very sad that we need to keep ugly obsolete
		   code to make them happy.

		   They should be moved to more reasonable state, now
		   they use rebuild_header INSTEAD OF hard_start_xmit!!!
		   Besides that, they are sort of out of date
		   (a lot of redundant clones/copies, useless in 2.1),
		   I wonder why people believe that they work.
		 */
		switch (dev->type) {
		default:
			break;
		case ARPHRD_ROSE:
#if IS_ENABLED(CONFIG_AX25)
		case ARPHRD_AX25:/* CCITT X.25			*/
#if IS_ENABLED(CONFIG_NETROM)
		case ARPHRD_NETROM:/* from KA9Q: NET/ROM pseudo	*/
#endif
			neigh->ops = &arp_broken_ops;
			neigh->output = neigh->ops->output;
			return 0;
#else
			break;
#endif
		}
#endif
		if (neigh->type == RTN_MULTICAST) {	// 多播地址
			neigh->nud_state = NUD_NOARP;	// do not need any protocol to resolve the L3-to-L2 mapping
			arp_mc_map(addr, neigh->ha, dev, 1);	// 设置neigh硬件地址
		} else if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) {	// no ARP protocol 或环回接口
			neigh->nud_state = NUD_NOARP;	// do not need any protocol to resolve the L3-to-L2 mapping
			memcpy(neigh->ha, dev->dev_addr, dev->addr_len);	// 设置neigh硬件地址为dev->dev_addr(硬件地址)
		} else if (neigh->type == RTN_BROADCAST ||
			   (dev->flags & IFF_POINTOPOINT)) {	// 广播地址或interface is has p-p link
			neigh->nud_state = NUD_NOARP;	// do not need any protocol to resolve the L3-to-L2 mapping
			memcpy(neigh->ha, dev->broadcast, dev->addr_len);	// 设置neigh硬件地址为广播地址
		}

		if (dev->header_ops->cache)	// 存在cache
			neigh->ops = &arp_hh_ops;
		else
			neigh->ops = &arp_generic_ops;

		if (neigh->nud_state & NUD_VALID)	// 不支持ARP
			neigh->output = neigh->ops->connected_output;
		else
			neigh->output = neigh->ops->output;
	}
	return 0;
}

/* 得到多播地址
 */
int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
{
	switch (dev->type) {
	case ARPHRD_ETHER:	// ethernet, apfddi
	case ARPHRD_FDDI:	// fddi
	case ARPHRD_IEEE802:	// tr
		ip_eth_mc_map(addr, haddr);	// Map a multicast IP onto multicast MAC for type ethernet.
		return 0;
	case ARPHRD_INFINIBAND:	// InfiniBand
		ip_ib_mc_map(addr, dev->broadcast, haddr);
		return 0;
	case ARPHRD_IPGRE:	// GRE over IP
		ip_ipgre_mc_map(addr, dev->broadcast, haddr);
		return 0;
	default:
		if (dir) {
			memcpy(haddr, dev->broadcast, dev->addr_len);
			return 0;
		}
	}
	return -EINVAL;
}

int arp_find(unsigned char *haddr, struct sk_buff *skb)
{
	struct net_device *dev = skb->dev;	// 网络设备
	__be32 paddr;
	struct neighbour *n;

	if (!skb_dst(skb)) {	// 不存在路由
		pr_debug("arp_find is called with dst==NULL\n");
		kfree_skb(skb);
		return 1;
	}

	/* skb的目标地址
	 * 网关地址或是skb中IP头中的目标地址
	 */
	paddr = rt_nexthop(skb_rtable(skb), ip_hdr(skb)->daddr);
	/* 此函数可处理:
	 * 本机地址
	 * 多播地址
	 * 广播地址
	 */
	if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr,
			       paddr, dev))
		return 0;

	n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);	// 在arp_tbl中找到neighbour 

	if (n) {
		n->used = jiffies; // 设置被使用的时间戳
		/* 不支持ARP
		 */
		if (n->nud_state & NUD_VALID || neigh_event_send(n, skb) == 0) {
			neigh_ha_snapshot(haddr, n, dev);	// n中与IP地址对应的硬件地址赋值给haddr
			neigh_release(n);
			return 0;
		}
		neigh_release(n);
	} else
		kfree_skb(skb);
	return 1;
}
EXPORT_SYMBOL(arp_find);
缓冲中查找
[ include/net/neighbour.h ]
static inline struct neighbour *
__neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev, int creat)
{
	struct neighbour *n = neigh_lookup(tbl, pkey, dev);	// 哈希表中找到neighbour 

	if (n || !creat)
		return n;

	n = neigh_create(tbl, pkey, dev);	// 如果没找到,创建一个neighbour 
	return IS_ERR(n) ? NULL : n;
}
其中调用:
[ net/core/neighbour.c ]
// pkey : 目标地址
struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
			       struct net_device *dev)
{
	struct neighbour *n;
	int key_len = tbl->key_len;	// 地址长度
	u32 hash_val;
	struct neigh_hash_table *nht;

	NEIGH_CACHE_STAT_INC(tbl, lookups);

	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);	// neighbour对象的哈希表
	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);	// 计算出pkey的哈希值

	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);	// 哈希表子表
	     n != NULL;
	     n = rcu_dereference_bh(n->next)) {
		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {	// 设备和KEY都相同
			if (!atomic_inc_not_zero(&n->refcnt))	// 增加引用计数
				n = NULL;
			NEIGH_CACHE_STAT_INC(tbl, hits);	// 记录状态
			break;
		}
	}

	rcu_read_unlock_bh();
	return n;
}
EXPORT_SYMBOL(neigh_lookup);
如果没找到,创建一个:
[ include/net/neighbour.h ]
static inline struct neighbour *neigh_create(struct neigh_table *tbl,
					     const void *pkey,
					     struct net_device *dev)
{
	return __neigh_create(tbl, pkey, dev, true);
}
[ net/core/neighbour.c ]
struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
				 struct net_device *dev, bool want_ref)
{
	u32 hash_val;
	int key_len = tbl->key_len;// 查找函数key的大小,如:arp为4
	int error;
	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);	// 分配一个neighbour 
	struct neigh_hash_table *nht;

	if (!n) {
		rc = ERR_PTR(-ENOBUFS);
		goto out;
	}

	memcpy(n->primary_key, pkey, key_len);// IP地址,被当作key,用于在缓冲中查找
	n->dev = dev;	// 通过此设备neighbor是可到达的
	dev_hold(dev);

	/* Protocol specific setup. */
	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
		rc = ERR_PTR(error);
		goto out_neigh_release;
	}

	if (dev->netdev_ops->ndo_neigh_construct) {
		error = dev->netdev_ops->ndo_neigh_construct(n);
		if (error < 0) {
			rc = ERR_PTR(error);
			goto out_neigh_release;
		}
	}

	/* Device specific setup. */
	if (n->parms->neigh_setup &&
	    (error = n->parms->neigh_setup(n)) < 0) {
		rc = ERR_PTR(error);
		goto out_neigh_release;
	}

	n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);	// 时间戳(确定地址可到达的最近时间)

	write_lock_bh(&tbl->lock);
	// neighbour对象的哈希表
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));

	// 哈希表数量加1
	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);

	// 计算哈希值
	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);

	if (n->parms->dead) {	// 是否被删除
		rc = ERR_PTR(-EINVAL);
		goto out_tbl_unlock;
	}

	// 找到在哈希表中的位置
	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
					    lockdep_is_held(&tbl->lock));
	     n1 != NULL;
	     n1 = rcu_dereference_protected(n1->next,
			lockdep_is_held(&tbl->lock))) {
		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
			if (want_ref)
				neigh_hold(n1);
			rc = n1;
			goto out_tbl_unlock;
		}
	}

	n->dead = 0;	// 不可被删除
	if (want_ref)
		neigh_hold(n);
	// 插入到哈希表中
	rcu_assign_pointer(n->next,
			   rcu_dereference_protected(nht->hash_buckets[hash_val],
						     lockdep_is_held(&tbl->lock)));
	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
	write_unlock_bh(&tbl->lock);
	neigh_dbg(2, "neigh %p is created\n", n);
	rc = n;
out:
	return rc;
out_tbl_unlock:
	write_unlock_bh(&tbl->lock);
out_neigh_release:
	neigh_release(n);
	goto out;
}
EXPORT_SYMBOL(__neigh_create);






  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值