1) 在网络接口设备的硬件层之间能够直接进行包交换的设备构成了一个局域网, 局域网中的每一设备 具有唯一的硬件地址. 对TCPIP协议来说, 局域网中的每一设备又具有唯一的IP地址. 当IP包要从某一 设备发向局域网中具有另一IP地址的设备时, 信源设备必须获得信宿设备的硬件地址, 这就需要硬件 地址解析.arp协议是根据设备的IP地址获取其硬件地址的方法. 信源设备向局域网广播自已地址解析 请求, 局域网中其余设备都收到该请求, 具有所请求IP地址的设备向信源设备作出应答, 提供它的硬 件地址. 由于arp请求的广播特性, 某一设备可以对不是自已IP地址的请求作出应答, 这就是arp代理.
2) 在Linux内核中, 将能与自已在硬件层直接通信的其他网络接口设备称为"邻居", 用neighbour结构描述, 它包含设备的硬件地址信息.系统中所有的IP包都通过路由所绑定的邻居发送到接口设备上. 邻居由邻居表(arp_tbl)来索引, 用邻居的IP地址可查询邻居表中设备的某个邻居.
3) 当要发送的IP包时, 如果邻居的硬件地址(MAC)还未解析,则将发送包暂时缓冲在arp_queue队 列中,然后发送【地址解析】请求, 这时邻居的状态为未完成状态(NUD_INCOMPLETE). 如果1秒内没收到ARP应答, 将重发arp请求, 如果重发达到3次, 则解析失败, 邻居为失败状态(NUD_FAILED). 当收到正确应答, 邻居进入连接状态(NUD_REACHABLE),这时arp_queue中发送包将被创建帧头后发送到设备上.
4) 邻居的IP地址与硬件地址的关系并不是固定的, 系统在接收来自邻居的IP包时, 会及时地证实(confirm)邻居的IP地址与硬件地址的映射关系. 同时, 邻居表每60秒周期性地扫描邻居(neigh_periodic_timer), 一方面从表中清除那些解析失败和长时间(60秒)未被使用的邻居, 另一方面识别那些证实时间已超时的邻居,将它们从连接状态变为过期状态(NUD_STALE).
当邻居在过期状态发送IP包时, 就进入延迟状态(NUD_DELAY), 如果在延迟状态5秒后邻居的硬件地址还是未被证实, 邻居则发送arp请求, 这时进入探测状态(NUD_PROBE). 在探测状态, IP包并不进行缓冲, 仍旧使用过期的邻居地址发送, 如果探测失败, 邻居进入失败状态.
5) 为了缩短IP包到设备的传输路径, 在邻居结构上还引入了帧头缓冲结构(hh_cache). 如果邻居建立了帧头缓冲, IP包将通过帧头缓冲的输出发送出去. 当邻居处于连接状态时, 帧头缓冲输出直接指向 dev_queue_xmit(), 当处于过期状态时, 帧头缓冲输出切换为邻居的输出口, 对以太网设备来说, 邻居的输出口指向neigh_resolve_output(), neigh_connect()和neigh_suspect()两个函数用来进行这种切换.
6) 当系统对外部设备arp的请求进行应答时, 系统将在arp_tbl中创建该外部设备的邻居, 刷新为过期状态. 当收到对其它设备的地址解析请求时, 系统将源设备的邻居刷新为过期状态. 当收到单发给自已, 但目的IP地址不是自已主机地址的arp请求时, 如果设备允许转发并且该IP在代理表有定义, 则将它们缓冲到proxy_queue队列, 等待一段随机的时间作出应答, 防止目标设备拥塞, 向对方提供的是自已的设备地址.
这一部分有点复杂, 还有很多细节尚未搞清, 只能写这么多, 欢迎大家指点. 现在自已的问题是: 在什么情况下必须使用arp代理?
; net/ipv4/ip_output.c:
static inline int ip_finish_output2(struct sk_buff *skb) IP包的发送出口 {
struct dst_entry *dst = skb->dst; 取IP包的路由结构 struct hh_cache *hh = dst->hh; 取路由的帧头缓冲
#ifdef CONFIG_NETFILTER_DEBUG nf_debug_ip_finish_output2(skb); #endif /*CONFIG_NETFILTER_DEBUG*/
if (hh) { 如果路由帧头缓冲非空 read_lock_bh(&hh->hh_lock); memcpy(skb->data - 16, hh->hh_data, 16); 创建IP包的硬件帧头 read_unlock_bh(&hh->hh_lock); skb_push(skb, hh->hh_len); return hh->hh_output(skb); 通过帧头缓冲发出 } else if (dst->neighbour) return dst->neighbour->output(skb); 通过邻居出口发出
printk(KERN_DEBUG "khm/n"); kfree_skb(skb); return -EINVAL; }
; net/ipv4/arp.c, core/neighbour.c:
#define NUD_INCOMPLETE 0x01 #define NUD_REACHABLE 0x02 #define NUD_STALE 0x04 #define NUD_DELAY 0x08 #define NUD_PROBE 0x10 #define NUD_FAILED 0x20
/* Dummy states */ #define NUD_NOARP 0x40 #define NUD_PERMANENT 0x80 #define NUD_NONE 0x00
/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no address resolution or NUD. NUD_PERMANENT is also cannot be deleted by garbage collectors. */ #define NUD_IN_TIMER (NUD_INCOMPLETE|NUD_DELAY|NUD_PROBE) #define NUD_VALID (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY) #define NUD_CONNECTED (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE)
#define NEIGH_HASHMASK 0x1F #define PNEIGH_HASHMASK 0xF
struct neigh_table 网络邻居表 {
struct neigh_table *next; int family; int entry_size; int key_len; __u32 (*hash)(const void *pkey, const struct net_device *); int (*constructor)(struct neighbour *); int (*pconstructor)(struct pneigh_entry *); void (*pdestructor)(struct pneigh_entry *); void (*proxy_redo)(struct sk_buff *skb); char *id; struct neigh_parms parms; /* HACK. gc_* shoul follow parms without a gap! */ int gc_interval; (60秒) int gc_thresh1; (128) int gc_thresh2; (512) int gc_thresh3; (1024) unsigned long last_flush; struct timer_list gc_timer; struct timer_list proxy_timer; struct sk_buff_head proxy_queue; int entries; rwlock_t lock; unsigned long last_rand; struct neigh_parms *parms_list; kmem_cache_t *kmem_cachep; struct tasklet_struct gc_task; struct neigh_statistics stats; struct neighbour *hash_buckets[NEIGH_HASHMASK+1]; struct pneigh_entry *phash_buckets[PNEIGH_HASHMASK+1]; }; struct neigh_parms 邻居参数 {
struct neigh_parms *next; int (*neigh_setup)(struct neighbour *); struct neigh_table *tbl; int entries; void *priv;
void *sysctl_table;
int base_reachable_time; (30秒) int retrans_time; (1秒) int gc_staletime; (60秒) int reachable_time; (30秒左右) int delay_probe_time; (5秒)
int queue_len; (3) int ucast_probes; (3) int app_probes; (0) int mcast_probes; (3) int anycast_delay; (1秒) int proxy_delay; (0.8秒) int proxy_qlen; (64) int locktime; (1秒) }; struct neighbour 网络邻居结构 {
struct neighbour *next; struct neigh_table *tbl; struct neigh_parms *parms; struct net_device *dev; unsigned long used; unsigned long confirmed; unsigned long updated; __u8 flags; __u8 nud_state; __u8 type; __u8 dead; atomic_t probes; rwlock_t lock; unsigned char ha[(MAX_ADDR_LEN+sizeof(unsigned long)-1)&~(sizeof(unsigned long)-1)]; struct hh_cache *hh; atomic_t refcnt; int (*output)(struct sk_buff *skb); struct sk_buff_head arp_queue; struct timer_list timer; struct neigh_ops *ops; u8 primary_key[0]; }; struct hh_cache 帧头缓冲结构 {
struct hh_cache *hh_next; /* Next entry */ atomic_t hh_refcnt; /* number of users */ unsigned short hh_type; /* protocol identifier, f.e ETH_P_IP */ int hh_len; /* length of header */ int (*hh_output)(struct sk_buff *skb); rwlock_t hh_lock; /* cached hardware header; allow for machine alignment needs. */ unsigned long hh_data[16/sizeof(unsigned long)]; };
struct neigh_table arp_tbl = //网络邻居表, 用于TCPIP协议中的网络邻居信息索引 {
NULL, AF_INET, sizeof(struct neighbour) + 4, 4, arp_hash, arp_constructor, NULL, NULL, parp_redo, "arp_cache", { NULL, NULL, &arp_tbl, 0, NULL, NULL, 30*HZ, 1*HZ, 60*HZ, 30*HZ, 5*HZ, 3, 3, 0, 3, 1*HZ, (8*HZ)/10, 64, 1*HZ },30*HZ, 128, 512, 1024, };
void __init arp_init (void) 地址解析模块初始化 {
neigh_table_init(&arp_tbl); 初始化arp协议网络邻居解析表
dev_add_pack(&arp_packet_type); 注册地址解析包接收器
proc_net_create ("arp", 0, arp_get_info);
#ifdef CONFIG_SYSCTL neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4, NET_IPV4_NEIGH, "ipv4"); #endif }
static struct neigh_table *neigh_tables; 所有的网络邻居解析表链表
void neigh_table_init(struct neigh_table *tbl) {
unsigned long now = jiffies;
tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time); if (tbl->kmem_cachep == NULL) tbl->kmem_cachep = kmem_cache_create(tbl->id, (tbl->entry_size+15)&~15, 0, SLAB_HWCACHE_ALIGN, NULL, NULL); 建立网络邻居信息结构内存分配器
#ifdef CONFIG_SMP tasklet_init(&tbl->gc_task, SMP_TIMER_NAME(neigh_periodic_timer), (unsigned long)tbl); #endif init_timer(&tbl->gc_timer); tbl->lock = RW_LOCK_UNLOCKED; tbl->gc_timer.data = (unsigned long)tbl; tbl->gc_timer.function = neigh_periodic_timer; tbl->gc_timer.expires = now + tbl->gc_interval + tbl->parms.reachable_time; add_timer(&tbl->gc_timer);
init_timer(&tbl->proxy_timer); tbl->proxy_timer.data = (unsigned long)tbl; tbl->proxy_timer.function = neigh_proxy_process; skb_queue_head_init(&tbl->proxy_queue);
tbl->last_flush = now; tbl->last_rand = now + tbl->parms.reachable_time*20; write_lock(&neigh_tbl_lock); tbl->next = neigh_tables; neigh_tables = tbl; write_unlock(&neigh_tbl_lock); } unsigned long neigh_rand_reach_time(unsigned long base) 取在(base/2)左右的随机数 {
return (net_random() % base) + (base>>1); }
static void SMP_TIMER_NAME(neigh_periodic_timer)(unsigned long arg) {
struct neigh_table *tbl = (struct neigh_table*)arg; unsigned long now = jiffies; int i;
write_lock(&tbl->lock);
/* * periodicly recompute ReachableTime from random function */ if (now - tbl->last_rand > 300*HZ) { struct neigh_parms *p; tbl->last_rand = now; for (p=&tbl->parms; p; p = p->next) p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); }
for (i=0; i <= NEIGH_HASHMASK; i++) { 扫描所有的网络邻居 struct neighbour *n, **np;
np = &tbl->hash_buckets; while ((n = *np) != NULL) {
unsigned state;
write_lock(&n->lock);
state = n->nud_state; if (state&(NUD_PERMANENT|NUD_IN_TIMER)) { 对于不可释放或正在解析的邻居 write_unlock(&n->lock); goto next_elt; }
if ((long)(n->used - n->confirmed) < 0) n->used = n->confirmed;
if (atomic_read(&n->refcnt) == 1 && (state == NUD_FAILED || now - n->used > n->parms->gc_staletime)) {
*np = n->next; 释放那些解析失败和引用时间超过(60)秒的邻居 n->dead = 1; write_unlock(&n->lock); neigh_release(n); continue; }
if (n->nud_state&NUD_REACHABLE && now - n->confirmed > n->parms->reachable_time) {
n->nud_state = NUD_STALE; 如果解析时间大于可达超时, 则邻居的地址失效 neigh_suspect(n); } write_unlock(&n->lock);
next_elt: np = &n->next; }
|