一:邻居子系统概述
邻居子系统是从物理来说是指在同一个局域网内的终端。从网络拓扑的结构来说,是指他们之间相隔的距离仅为一跳,他们属于同一个突冲域
邻居子系统的作用:
它为第三层协议与第二层协议提供地址映射关系。
提供邻居头部缓存,加速发包的速度
二:邻居子系统在整个协议栈的地位
发送数据的时候,要在本机进行路由查找,如果有到目的地地址的路径,查看arp缓存中是否存在相应的映射关系,如果没有,则新建邻居项。判断邻居项是否为可用状态。如果不可用。把skb 存至邻居发送对列中,然后将发送arp请求。
如果接收到arp应答。则将对应邻居项置为可用。如果在指定时间内末收到响应包,则将对应邻居项置为无效状态。
如果邻居更改为可用状态,则把邻居项对应的skb对列中的数据包发送出去
三:流程概述;
发包流程。
下面以包送udp数据为例,看看是怎么与邻居子系统相关联的
Sendmsg() à ip_route_output()(到路由缓存中查找目的出口)à ip_route_output_slow( 如果缓存中不存在目的项,则到路由表中查找) à ip_build_xmit() à output_maybe_reroute à skb->dst->output()
如果至时找到了路由,则根据路由信息分配个dst_entry,并调用arp_bind_neighbour为之绑定邻居 output指针赋值为ip_output
转到执行ip_output
ip_output à __ip_finish_output() -à ip_finish_output2() à dst->neighbour->output()
现在就转至邻居项的出口函数了。关于上述详细流程,将在以后的学习中继续给出
Neighbour->output怎么处理呢?我们从初始化时看起
四:邻居子系统初始化
跟以前一样,linux源代码版本为2.6.21
void __init arp_init(void) (net/ipv4/arp.c)
{
//邻居表初始化
neigh_table_init(&arp_tbl);
//注册arp协议
dev_add_pack(&arp_packet_type);
//建立proc对象
arp_proc_init();
#ifdef CONFIG_SYSCTL
neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4,
NET_IPV4_NEIGH, "ipv4", NULL);
#endif
//事件通知链表
register_netdevice_notifier(&arp_netdev_notifier);
}
在neigh_table_init(&arp_tbl);中,对邻居表进行了相应的初始化,特别的,初始化了一个垃圾回收定时器。后面再给出讨论
arp_packet_type的内容为:
static struct packet_type arp_packet_type = {
.type = __constant_htons(ETH_P_ARP), (链路层对应的协议号)
.func = arp_rcv, 《数据包的处理函数》
}
从上面可以看出,当接收到arp数据包时,将用arp_rcv()处理
五:邻居系统数据结构分析
neigh_table结构:
struct neigh_table
{
//下一个邻居表
struct neigh_table *next;
//协议簇
int family;
//入口长度,也就是一个邻居结构的大小,初始化为sizeof(neighbour)+4(4为一个IP地址的长度)
int entry_size;
//哈希关键值长度 即IP地址的长度,为4
int key_len;
//哈希值的计数函数(哈希值是经对应设备net_device 与 目的Ip计算出来的)
__u32 (*hash)(const void *pkey, const struct net_device *);
//邻居初始化函数
int (*constructor)(struct neighbour *);
int (*pconstructor)(struct pneigh_entry *);
void (*pdestructor)(struct pneigh_entry *);
void (*proxy_redo)(struct sk_buff *skb);
//邻居表的名称
char *id;
struct neigh_parms parms;
/* HACK. gc_* shoul follow parms without a gap! */
//常规垃圾回收的时候
int gc_interval;
int gc_thresh1;
//第二个阀值,如果邻居超过此值,当创建新的邻居时
//若超过五秒没有刷新,则必须立即刷新,强制垃圾回收
int gc_thresh2;
//允许邻居的上限
int gc_thresh3;
//最近刷新时间
unsigned long last_flush;
//常规的垃圾回收定时器
struct timer_list gc_timer;
struct timer_list proxy_timer;
struct sk_buff_head proxy_queue;
//整个表中邻居的数量
int entries;
rwlock_t lock;
unsigned long last_rand;
struct neigh_parms *parms_list;
kmem_cache_t *kmem_cachep;
struct neigh_statistics *stats;
//哈希数组,存入其中的邻居
struct neighbour **hash_buckets;
//哈希数组大小的掩码
unsigned int hash_mask;
__u32 hash_rnd;
unsigned int hash_chain_gc;
//与代理arp相关
struct pneigh_entry **phash_buckets;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *pde;
#endif
}
Neighbour结构:
struct neighbour
{
//下一个邻居
struct neighbour *next;
//所在的邻居表
struct neigh_table *tbl;
//arp传输参数
struct neigh_parms *parms;
//邻居项所对应的网络设备
struct net_device *dev;
//最后使用时间
unsigned long used;
unsigned long confirmed;
//更新时间
unsigned long updated;
__u8 flags;
//邻居项对应的状态
__u8 nud_state;
__u8 type;
//存活标志,如果dead为1,那么垃圾回收函数会将此项删除
__u8 dead;
//重试发送arp请求的次数
atomic_t probes;
rwlock_t lock;
//对应邻居的头部缓存
unsigned char ha[(MAX_ADDR_LEN+sizeof(unsigned long)-1)&~(sizeof(unsigned long)-1)];
struct hh_cache *hh;
//引用计数
atomic_t refcnt;
//邻居项对应的发送函数
int (*output)(struct sk_buff *skb);
//对应的发送skb队列
struct sk_buff_head arp_queue;
//定时器
struct timer_list timer;
struct neigh_ops *ops;
//哈希关键字
u8 primary_key[0];
};
在前面已经分析过,查找到路由后,会调用arp_bind_neighbour绑定一个邻居项
int arp_bind_neighbour(struct dst_entry *dst)
{
struct net_device *dev = dst->dev;
struct neighbour *n = dst->neighbour;
if (dev == NULL)
return -EINVAL;
//如果邻居项不存在
if (n == NULL) {
u32 nexthop = ((struct rtable*)dst)->rt_gateway;
if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT))
nexthop = 0;
n = __neigh_lookup_errno(
#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
dev->type == ARPHRD_ATM ? clip_tbl_hook :
#endif
&arp_tbl, &nexthop, dev);
if (IS_ERR(n))
return PTR_ERR(n);
dst->neighbour = n;
}
return 0;
}
如果邻居项不存同,则执行__neigh_lookup_errno()
__neigh_lookup_errno(struct neigh_table *tbl, const void *pkey,
struct net_device *dev)
{
//在邻居表中查找邻居项
struct neighbour *n = neigh_lookup(tbl, pkey, dev);
if (n)
return n;
//新建邻居项
return neigh_create(tbl, pkey, dev);
}
从上面可以看到,它会先到邻居表中寻找对应的邻居项,如果不存在,则新建一项。继续跟进
struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev)
{
u32 hash_val;
int key_len = tbl->key_len;
int error;
struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
if (!n) {
rc = ERR_PTR(-ENOBUFS);
goto out;
}
//从此可以看到,哈希键值就是目的IP
memcpy(n->primary_key, pkey, key_len);
n->dev = dev;
dev_hold(dev);
/* Protocol specific setup. */
//初始化函数
if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
rc = ERR_PTR(error);
goto out_neigh_release;
}
/* Device specific setup. */
if (n->parms->neigh_setup &&
(error = n->parms->neigh_setup(n)) < 0) {
rc = ERR_PTR(error);
goto out_neigh_release;
}
n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
write_lock_bh(&tbl->lock);
//如果总数超过了hash_mask +1,则增长哈希表
if (tbl->entries > (tbl->hash_mask + 1))
neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
// 如果邻居表项为删除项
if (n->parms->dead) {
rc = ERR_PTR(-EINVAL);
goto out_tbl_unlock;
}
//遍历对应的哈希数组项。如果已经存在,则更新引用计数
for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
neigh_hold(n1);
rc = n1;
goto out_tbl_unlock;
}
}
// 如果不存在,把插入项加到哈希数组项的头部
n->next = tbl->hash_buckets[hash_val];
tbl->hash_buckets[hash_val] = n;
n->dead = 0;
neigh_hold(n);
write_unlock_bh(&tbl->lock);
NEIGH_PRINTK2("neigh %p is created.\n", n);
rc = n;
out:
return rc;
out_tbl_unlock:
write_unlock_bh(&tbl->lock);
out_neigh_release:
neigh_release(n);
goto out;
}
在函数里,会调用tbl->constructor()进行初始化。在arp_tbl结构中,为constructor赋值为arp_constructor。
static int arp_constructor(struct neighbour *neigh)
{
u32 addr = *(u32*)neigh->primary_key;
struct net_device *dev = neigh->dev;
struct in_device *in_dev;
struct neigh_parms *parms;
neigh->type = inet_addr_type(addr);
rcu_read_lock();
in_dev = rcu_dereference(__in_dev_get(dev));
if (in_dev == NULL) {
rcu_read_unlock();
return -EINVAL;
}
parms = in_dev->arp_parms;
__neigh_parms_put(neigh->parms);
neigh->parms = neigh_parms_clone(parms);
rcu_read_unlock();
//dev->hard_header.是为被赋值勤
if (dev->hard_header == NULL) {
neigh->nud_state = NUD_NOARP;
neigh->ops = &arp_direct_ops;
neigh->output = neigh->ops->queue_xmit;
} else {
#if 1
switch (dev->type) {
default:
break;
case ARPHRD_ROSE:
neigh->ops = &arp_broken_ops;
neigh->output = neigh->ops->output;
return 0;
#endif
;}
#endif
if (neigh->type == RTN_MULTICAST) {
neigh->nud_state = NUD_NOARP;
arp_mc_map(addr, neigh->ha, dev, 1);
} else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
neigh->nud_state = NUD_NOARP;
memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
} else if (neigh->type == RTN_BROADCAST || dev->flags&IFF_POINTOPOINT) {
neigh->nud_state = NUD_NOARP;
memcpy(neigh->ha, dev->broadcast, dev->addr_len);
}
if (dev->hard_header_cache)
neigh->ops = &arp_hh_ops;
else
neigh->ops = &arp_generic_ops;
//如果邻居项是可用状态,则调用connected_output里的函数
if (neigh->nud_state&NUD_VALID)
neigh->output = neigh->ops->connected_output;
else
//如果邻居项不可用
neigh->output = neigh->ops->output;
}
return 0;
}
在网卡驱动那一章,我们是调用alloc_etherdev()来构建网卡的net_device结构的,在allocetherdev()调用alloc_etherdev对各函数指针赋值
void ether_setup(struct net_device *dev) (drivers/net/net_init.c)
{
/* Fill in the fields of the device structure with ethernet-generic values.
This should be in a common file instead of per-driver. */
dev->change_mtu = eth_change_mtu;
dev->hard_header = eth_header;
dev->rebuild_header = eth_rebuild_header;
dev->set_mac_address = eth_mac_addr;
dev->hard_header_cache = eth_header_cache;
dev->header_cache_update= eth_header_cache_update;
dev->hard_header_parse = eth_header_parse;
dev->type = ARPHRD_ETHER;
dev->hard_header_len = ETH_HLEN;
dev->mtu = 1500; /* eth_mtu */
dev->addr_len = ETH_ALEN;
dev->tx_queue_len = 1000; /* Ethernet wants good queues */
memset(dev->broadcast,0xFF, ETH_ALEN);
/* New-style flags. */
dev->flags = IFF_BROADCAST|IFF_MULTICAST;
}
所以,neigh->output 就指向了arp_hh_opsàoutput
Arp_hh_ops的结构如下:
static struct neigh_ops arp_hh_ops = {
.family = AF_INET,
.solicit = arp_solicit,
.error_report = arp_error_report,
.output = neigh_resolve_output,
.connected_output = neigh_resolve_output,
.hh_output = dev_queue_xmit,
.queue_xmit = dev_queue_xmit,
};
由此可以看到,最终的数据都会流到neigh_resolve_output
http://www.360doc.com/content/10/1026/23/706976_64322489.shtml