无端ARP
一般来说,发出一个ARPOP_REQUEST是由于发送方想和一个IP地址通信,需要找到其对应的L2地址。但有时候,发送方生成一个ARPOP_REQUEST是为了通知接收方一些信息,而不是请求信息。这种封包就称为无端ARP,它通常用在下面列出的情况中:
- L2地址发生变化,改变地址的节点通过无端ARP来触发L2和L3的关联更新
- 重复地址检测,同一个局域网中不应该出现在有两台主机有相同的L3地址,但这种情况总会发生。主机可以使用无端ARP来检测重复地址的存在。如果你发出一个目的地是你自己地址的ARP请求,那么只有当存在一台与你相同ip配置的主机时,才会受到应答
- 虚拟IP,故障迁移。通常为了使一个站点有冗余度,该站点除了有一台活跃服务器外,还有一定数量的配置类似的主机处于备用模式。当活跃服务器由于某些原因宕机后,与heartbeat定时器相关一个机制就会检测到这个故障,并启动新活跃服务器的选择。这个服务器会生成一个无端ARP包来更新网络中的所有其他主机上的ARP缓存。因为新服务器继承了就服务器的ip地址,发出的ARPOP_REQUESE不会被应答,但所有的接收者会更新它们的缓存。
调整ARP选项
- 编译选项
- ARPD(CONFIG_ARPD),这个选项允许用户空间的守护进程处理ARP包,这样做在超大型且流量比较大的网络中,可以提高传输性能。
- /proc选项
- 这个选项中的大部分特性可以配置为全局的,也可以给每个设备单独配置。
初始化ARP协议
1: void __init arp_init(void)
2: {
3: neigh_table_init(&arp_tbl);
4:
5: dev_add_pack(&arp_packet_type);
6: arp_proc_init();
7: #ifdef CONFIG_SYSCTL
8: neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4,
9: NET_IPV4_NEIGH, "ipv4", NULL, NULL);
10: #endif
11: register_netdevice_notifier(&arp_netdev_notifier);
12: }
初始化函数首先是注册一个虚函数表和ARP协议是用的其他常用参数;这个工作由neigh_table_init函数完成。dev_add_pack主要用于注册接收封包函数,arp_proc_init函数会建立/proc/net/arp文件,读取该文件就可以看到ARP缓存的内容。若内核支持sysctl,就能依靠neigh_sysctl_register创建目录/proc/sys/net/ipv4/neigh,用于输出neigh_parms结构的默认调节参数。register_netdevice_notifier向内核注册一个回调函数,用于接收设备状态和配置变化的通知。
- arp_tbl表
1: struct neigh_table arp_tbl = {
2: .family = AF_INET,
3: .entry_size = sizeof(struct neighbour) + 4,
4: .key_len = 4,
5: .hash = arp_hash,
6: .constructor = arp_constructor,
7: .proxy_redo = parp_redo,
8: .id = "arp_cache",
9: .parms = {
10: .tbl = &arp_tbl,
11: .base_reachable_time = 30 * HZ,
12: .retrans_time = 1 * HZ,
13: .gc_staletime = 60 * HZ,
14: .reachable_time = 30 * HZ,
15: .delay_probe_time = 5 * HZ,
16: .queue_len = 3,
17: .ucast_probes = 3,
18: .mcast_probes = 3,
19: .anycast_delay = 1 * HZ,
20: .proxy_delay = (8 * HZ) / 10,
21: .proxy_qlen = 64,
22: .locktime = 1 * HZ,
23: },
24: .gc_interval = 30 * HZ,
25: .gc_thresh1 = 128,
26: .gc_thresh2 = 512,
27: .gc_thresh3 = 1024,
28: };
初始化neighbour结构
1: static int arp_constructor(struct neighbour *neigh)
2: {
3: __be32 addr = *(__be32*)neigh->primary_key;
4: struct net_device *dev = neigh->dev;
5: struct in_device *in_dev;
6: struct neigh_parms *parms;
7:
8: rcu_read_lock();
9: in_dev = __in_dev_get_rcu(dev);
10: if (in_dev == NULL) {
11: rcu_read_unlock();
12: return -EINVAL;
13: }
14:
15: neigh->type = inet_addr_type(dev_net(dev), addr);
16:
17: parms = in_dev->arp_parms;
18: __neigh_parms_put(neigh->parms);
19: neigh->parms = neigh_parms_clone(parms);
20: rcu_read_unlock();
21:
22: if (!dev->header_ops) {
23: neigh->nud_state = NUD_NOARP;
24: neigh->ops = &arp_direct_ops;
25: neigh->output = neigh->ops->queue_xmit;
26: } else {
27: /* Good devices (checked by reading texts, but only Ethernet is
28: tested)
29:
30: ARPHRD_ETHER: (ethernet, apfddi)
31: ARPHRD_FDDI: (fddi)
32: ARPHRD_IEEE802: (tr)
33: ARPHRD_METRICOM: (strip)
34: ARPHRD_ARCNET:
35: etc. etc. etc.
36:
37: ARPHRD_IPDDP will also work, if author repairs it.
38: I did not it, because this driver does not work even
39: in old paradigm.
40: */
41:
42: #if 1
43: /* So... these "amateur" devices are hopeless.
44: The only thing, that I can say now:
45: It is very sad that we need to keep ugly obsolete
46: code to make them happy.
47:
48: They should be moved to more reasonable state, now
49: they use rebuild_header INSTEAD OF hard_start_xmit!!!
50: Besides that, they are sort of out of date
51: (a lot of redundant clones/copies, useless in 2.1),
52: I wonder why people believe that they work.
53: */
54: switch (dev->type) {
55: default:
56: break;
57: case ARPHRD_ROSE:
58: #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
59: case ARPHRD_AX25:
60: #if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
61: case ARPHRD_NETROM:
62: #endif
63: neigh->ops = &arp_broken_ops;
64: neigh->output = neigh->ops->output;
65: return 0;
66: #endif
67: ;}
68: #endif
69: if (neigh->type == RTN_MULTICAST) {
70: neigh->nud_state = NUD_NOARP;
71: arp_mc_map(addr, neigh->ha, dev, 1);
72: } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
73: neigh->nud_state = NUD_NOARP;
74: memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
75: } else if (neigh->type == RTN_BROADCAST || dev->flags&IFF_POINTOPOINT) {
76: neigh->nud_state = NUD_NOARP;
77: memcpy(neigh->ha, dev->broadcast, dev->addr_len);
78: }
79:
80: if (dev->header_ops->cache)
81: neigh->ops = &arp_hh_ops;
82: else
83: neigh->ops = &arp_generic_ops;
84:
85: if (neigh->nud_state&NUD_VALID)
86: neigh->output = neigh->ops->connected_output;
87: else
88: neigh->output = neigh->ops->output;
89: }
90: return 0;
91: }
1: static const struct neigh_ops arp_generic_ops = {
2: .family = AF_INET,
3: .solicit = arp_solicit,
4: .error_report = arp_error_report,
5: .output = neigh_resolve_output,
6: .connected_output = neigh_connected_output,
7: .hh_output = dev_queue_xmit,
8: .queue_xmit = dev_queue_xmit,
9: };
10:
11: static const struct neigh_ops arp_hh_ops = {
12: .family = AF_INET,
13: .solicit = arp_solicit,
14: .error_report = arp_error_report,
15: .output = neigh_resolve_output,
16: .connected_output = neigh_resolve_output,
17: .hh_output = dev_queue_xmit,
18: .queue_xmit = dev_queue_xmit,
19: };
20:
21: static const struct neigh_ops arp_direct_ops = {
22: .family = AF_INET,
23: .output = dev_queue_xmit,
24: .connected_output = dev_queue_xmit,
25: .hh_output = dev_queue_xmit,
26: .queue_xmit = dev_queue_xmit,
27: };
28:
29: const struct neigh_ops arp_broken_ops = {
30: .family = AF_INET,
31: .solicit = arp_solicit,
32: .error_report = arp_error_report,
33: .output = neigh_compat_output,
34: .connected_output = neigh_compat_output,
35: .hh_output = dev_queue_xmit,
36: .queue_xmit = dev_queue_xmit,
37: };
传输和接收ARP包
arp_send:
1: static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
2: {
3: __be32 saddr = 0;
4: u8 *dst_ha = NULL;
5: struct net_device *dev = neigh->dev;
6: __be32 target = *(__be32*)neigh->primary_key;
7: int probes = atomic_read(&neigh->probes);
8: struct in_device *in_dev = in_dev_get(dev);
9:
10: if (!in_dev)
11: return;
12:
13: switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
14: default:
15: case 0: /* By default announce any local IP */
16: if (skb && inet_addr_type(dev_net(dev), ip_hdr(skb)->saddr) == RTN_LOCAL)
17: saddr = ip_hdr(skb)->saddr;
18: break;
19: case 1: /* Restrict announcements of saddr in same subnet */
20: if (!skb)
21: break;
22: saddr = ip_hdr(skb)->saddr;
23: if (inet_addr_type(dev_net(dev), saddr) == RTN_LOCAL) {
24: /* saddr should be known to target */
25: if (inet_addr_onlink(in_dev, target, saddr))
26: break;
27: }
28: saddr = 0;
29: break;
30: case 2: /* Avoid secondary IPs, get a primary/preferred one */
31: break;
32: }
33:
34: if (in_dev)
35: in_dev_put(in_dev);
36: if (!saddr)
37: saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
38:
39: if ((probes -= neigh->parms->ucast_probes) < 0) {
40: if (!(neigh->nud_state&NUD_VALID))
41: printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n");
42: dst_ha = neigh->ha;
43: read_lock_bh(&neigh->lock);
44: } else if ((probes -= neigh->parms->app_probes) < 0) {
45: #ifdef CONFIG_ARPD
46: neigh_app_ns(neigh);
47: #endif
48: return;
49: }
50:
51: arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
52: dst_ha, dev->dev_addr, NULL);
53: if (dst_ha)
54: read_unlock_bh(&neigh->lock);
55: }
neigh,L3地址需要被解析的邻居;skb,保存数据封包的缓冲区,该封包的传输由solicitation请求触发。
- skb缓冲区中IP头的源IP地址,和arp_solicit选择的要放到ARP头中的源IP地址,如果封包是本地网络产生的,IP包头中源IP地址就是本机的。若封包是要转发的,源IP地址就是最初发送方的
- skb缓冲区IP包头中的目的地址,和arp_solicit请求解析的目的IP地址
arp_solicit的主要任务:
- 选择要放在ARP头中的源IP地址,区分内核生成的请求和用户空间产生的请求
- 对于内核生成的请求,使用arp_send传输solicitation请求
- 对于用户空间请求,arp_solicit调用neigh_app_ns来通知相应的用户空间程序:需要生成一个solicitation请求。如果内核不支持ARPD,arp_solicit就不做solicitation请求,直接返回。
- 更新生成的solicitation请求数目
- 使用arp_send函数传输请求
ARP_ANNOUNCE与源IP地址的选择:
大部分主机只有一个IP地址,因此将其直接拷贝到ARP头中。若一个主机有多个IP地址,ARP_ANNOUNCE的设置就会影响IP的选择。主要用到三个函数:
- inet_addr_type,在该函数的输入参数中给一个IP地址,它就返回该地址的类型。RTN_LOCAL
- inet_addr_onlink,输入一个设备和两个IP地址,该函数会检查这两个地址是否属于同一个子网
- inet_select_add,输入一个设备,一个IP地址(通常不是本地主机的地址)和一个scope,该函数会在设备配置信息中查找这个IP地址,该地址应该和入口IP地址位于同一个子网中,并且给定的scope比入口scope小,或者相同。
1: static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
2: struct packet_type *pt, struct net_device *orig_dev)
3: {
4: struct arphdr *arp;
5:
6: /* ARP header, plus 2 device addresses, plus 2 IP addresses. */
7: if (!pskb_may_pull(skb, arp_hdr_len(dev)))
8: goto freeskb;
9:
10: arp = arp_hdr(skb);
11: if (arp->ar_hln != dev->addr_len ||
12: dev->flags & IFF_NOARP ||
13: skb->pkt_type == PACKET_OTHERHOST ||
14: skb->pkt_type == PACKET_LOOPBACK ||
15: arp->ar_pln != 4)
16: goto freeskb;
17:
18: if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
19: goto out_of_mem;
20:
21: memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
22:
23: return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
24:
25: freeskb:
26: kfree_skb(skb);
27: out_of_mem:
28: return 0;
29: }
1: static int arp_process(struct sk_buff *skb)
2: {
3: struct net_device *dev = skb->dev;
4: struct in_device *in_dev = in_dev_get(dev);
5: struct arphdr *arp;
6: unsigned char *arp_ptr;
7: struct rtable *rt;
8: unsigned char *sha;
9: __be32 sip, tip;
10: u16 dev_type = dev->type;
11: int addr_type;
12: struct neighbour *n;
13: struct net *net = dev_net(dev);
14:
15: /* arp_rcv below verifies the ARP header and verifies the device
16: * is ARP'able.
17: */
18:
19: if (in_dev == NULL)
20: goto out;
21:
22: arp = arp_hdr(skb);
23:
24: switch (dev_type) {
25: default:
26: if (arp->ar_pro != htons(ETH_P_IP) ||
27: htons(dev_type) != arp->ar_hrd)
28: goto out;
29: break;
30: case ARPHRD_ETHER:
31: case ARPHRD_IEEE802_TR:
32: case ARPHRD_FDDI:
33: case ARPHRD_IEEE802:
34: /*
35: * ETHERNET, Token Ring and Fibre Channel (which are IEEE 802
36: * devices, according to RFC 2625) devices will accept ARP
37: * hardware types of either 1 (Ethernet) or 6 (IEEE 802.2).
38: * This is the case also of FDDI, where the RFC 1390 says that
39: * FDDI devices should accept ARP hardware of (1) Ethernet,
40: * however, to be more robust, we'll accept both 1 (Ethernet)
41: * or 6 (IEEE 802.2)
42: */
43: if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
44: arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
45: arp->ar_pro != htons(ETH_P_IP))
46: goto out;
47: break;
48: case ARPHRD_AX25:
49: if (arp->ar_pro != htons(AX25_P_IP) ||
50: arp->ar_hrd != htons(ARPHRD_AX25))
51: goto out;
52: break;
53: case ARPHRD_NETROM:
54: if (arp->ar_pro != htons(AX25_P_IP) ||
55: arp->ar_hrd != htons(ARPHRD_NETROM))
56: goto out;
57: break;
58: }
59:
60: /* Understand only these message types */
61:
62: if (arp->ar_op != htons(ARPOP_REPLY) &&
63: arp->ar_op != htons(ARPOP_REQUEST))
64: goto out;
65:
66: /*
67: * Extract fields
68: */
69: arp_ptr= (unsigned char *)(arp+1);
70: sha = arp_ptr;
71: arp_ptr += dev->addr_len;
72: memcpy(&sip, arp_ptr, 4);
73: arp_ptr += 4;
74: arp_ptr += dev->addr_len;
75: memcpy(&tip, arp_ptr, 4);
76: /*
77: * Check for bad requests for 127.x.x.x and requests for multicast
78: * addresses. If this is one such, delete it.
79: */
80: if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
81: goto out;
82:
83: /*
84: * Special case: We must set Frame Relay source Q.922 address
85: */
86: if (dev_type == ARPHRD_DLCI)
87: sha = dev->broadcast;
88:
89: /*
90: * Process entry. The idea here is we want to send a reply if it is a
91: * request for us or if it is a request for someone else that we hold
92: * a proxy for. We want to add an entry to our cache if it is a reply
93: * to us or if it is a request for our address.
94: * (The assumption for this last is that if someone is requesting our
95: * address, they are probably intending to talk to us, so it saves time
96: * if we cache their address. Their address is also probably not in
97: * our cache, since ours is not in their cache.)
98: *
99: * Putting this another way, we only care about replies if they are to
100: * us, in which case we add them to the cache. For requests, we care
101: * about those for us and those for our proxies. We reply to both,
102: * and in the case of requests for us we add the requester to the arp
103: * cache.
104: */
105:
106: /* Special case: IPv4 duplicate address detection packet (RFC2131) */
107: if (sip == 0) {
108: if (arp->ar_op == htons(ARPOP_REQUEST) &&
109: inet_addr_type(net, tip) == RTN_LOCAL &&
110: !arp_ignore(in_dev, sip, tip))
111: arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
112: dev->dev_addr, sha);
113: goto out;
114: }
115:
116: if (arp->ar_op == htons(ARPOP_REQUEST) &&
117: ip_route_input(skb, tip, sip, 0, dev) == 0) {
118:
119: rt = skb_rtable(skb);
120: addr_type = rt->rt_type;
121:
122: if (addr_type == RTN_LOCAL) {
123: int dont_send = 0;
124:
125: if (!dont_send)
126: dont_send |= arp_ignore(in_dev,sip,tip);
127: if (!dont_send && IN_DEV_ARPFILTER(in_dev))
128: dont_send |= arp_filter(sip,tip,dev);
129: if (!dont_send) {
130: n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
131: if (n) {
132: arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
133: neigh_release(n);
134: }
135: }
136: goto out;
137: } else if (IN_DEV_FORWARD(in_dev)) {
138: if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev &&
139: (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
140: n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
141: if (n)
142: neigh_release(n);
143:
144: if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
145: skb->pkt_type == PACKET_HOST ||
146: in_dev->arp_parms->proxy_delay == 0) {
147: arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
148: } else {
149: pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb);
150: in_dev_put(in_dev);
151: return 0;
152: }
153: goto out;
154: }
155: }
156: }
157:
158: /* Update our ARP tables */
159:
160: n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
161:
162: if (IPV4_DEVCONF_ALL(dev_net(dev), ARP_ACCEPT)) {
163: /* Unsolicited ARP is not accepted by default.
164: It is possible, that this option should be enabled for some
165: devices (strip is candidate)
166: */
167: if (n == NULL &&
168: arp->ar_op == htons(ARPOP_REPLY) &&
169: inet_addr_type(net, sip) == RTN_UNICAST)
170: n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
171: }
172:
173: if (n) {
174: int state = NUD_REACHABLE;
175: int override;
176:
177: /* If several different ARP replies follows back-to-back,
178: use the FIRST one. It is possible, if several proxy
179: agents are active. Taking the first reply prevents
180: arp trashing and chooses the fastest router.
181: */
182: override = time_after(jiffies, n->updated + n->parms->locktime);
183:
184: /* Broadcast replies and request packets
185: do not assert neighbour reachability.
186: */
187: if (arp->ar_op != htons(ARPOP_REPLY) ||
188: skb->pkt_type != PACKET_HOST)
189: state = NUD_STALE;
190: neigh_update(n, sha, state, override ? NEIGH_UPDATE_F_OVERRIDE : 0);
191: neigh_release(n);
192: }
193:
194: out:
195: if (in_dev)
196: in_dev_put(in_dev);
197: consume_skb(skb);
198: return 0;
199: }