mcast_solicit和mcast_resolicit控制使用多播发送邻居地址探测报文的次数,对于arp协议,内核默认的mcast_solicit为3。
通过PROC文件mcast_solicit可查看和修改其值。
$ cat /proc/sys/net/ipv4/neigh/ens33/mcast_solicit
3
$ cat /proc/sys/net/ipv4/neigh/ens33/mcast_resolicit
0
在arp邻居表arp_tbl中将NEIGH_VAR_MCAST_PROBES索引所对应的表项初始化为3。
struct neigh_table arp_tbl = {
.family = AF_INET,
.key_len = 4,
.protocol = cpu_to_be16(ETH_P_IP),
.hash = arp_hash,
.key_eq = arp_key_eq,
.constructor = arp_constructor,
.proxy_redo = parp_redo,
.id = "arp_cache",
.parms = {
.tbl = &arp_tbl,
.reachable_time = 30 * HZ,
.data = {
[NEIGH_VAR_MCAST_PROBES] = 3,
内核中静态变量neigh_sysctl_table定义了mcast_solicit的PROC文件信息。
static struct neigh_sysctl_table {
struct ctl_table_header *sysctl_header;
struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
} neigh_sysctl_template __read_mostly = {
.neigh_vars = {
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
...
NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
netlink接口
除了以上的PROC文件外,还可使用ip ntable命令查看和修改设备的邻居表参数mcast_probes。
# $ ip ntable show dev ens33
inet arp_cache
dev ens33
refcnt 1 reachable 37268 base_reachable 30000 retrans 1000
gc_stale 60000 delay_probe 5000 queue 101
app_probes 0 ucast_probes 3 mcast_probes 3
anycast_delay 1000 proxy_delay 800 proxy_queue 64 locktime 1000
inet6 ndisc_cache
dev ens33
refcnt 1 reachable 31516 base_reachable 30000 retrans 1000
gc_stale 60000 delay_probe 5000 queue 101
app_probes 0 ucast_probes 3 mcast_probes 3
anycast_delay 1000 proxy_delay 800 proxy_queue 64 locktime 0
与PROC文件不同,这里使用的名称为mcast_probes,其值等于3。如下将设备ens33的邻居表参数mcast_probes修改为2。
# ip ntable change name arp_cache dev ens33 mcast_probes 2
内核函数neightbl_set负责以上ip ntable change命令的处理。函数nla_get_u32读取IP命令行设置的mcast_probes的值。对于arp协议,宏NEIGH_VAR_SET将修改全局变量arp_tbl的成员parms的data数组,具体为以NEIGH_VAR_MCAST_PROBES为所对应的成员的值。
static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack)
{
struct neigh_table *tbl;
struct nlattr *tb[NDTA_MAX+1];
if (tb[NDTA_PARMS]) {
struct neigh_parms *p;
p = lookup_neigh_parms(tbl, net, ifindex);
...
for (i = 1; i <= NDTPA_MAX; i++) {
if (tbp[i] == NULL) continue;
switch (i) {
...
case NDTPA_MCAST_PROBES:
NEIGH_VAR_SET(p, MCAST_PROBES,
nla_get_u32(tbp[i]));
break;
case NDTPA_MCAST_REPROBES:
NEIGH_VAR_SET(p, MCAST_REPROBES,
nla_get_u32(tbp[i]));
break;
显示命令ip ntable show由内核中的函数neightbl_fill_parms处理,,负责填充内核中mcast_probes的参数值,对于mcast_probes的值,由nla_put_u32函数由邻居表参数中取出并进行填充。
static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
{
...
if ((parms->dev &&
...
nla_put_u32(skb, NDTPA_MCAST_PROBES,
NEIGH_VAR(parms, MCAST_PROBES)) ||
nla_put_u32(skb, NDTPA_MCAST_REPROBES,
NEIGH_VAR(parms, MCAST_REPROBES)) ||
mcast_solicit处理
在邻居表项定时处理函数中,如果表项的状态不是NUD_REACHABLE,但是设置了NUD_DELAY状态位,并且,当前时刻已经超出了发送邻居探测报文的时间(DELAY_PROBE_TIME),即邻居地址已经探测过了,但是状态还没改变,这里取出其中的NUD_DELAY状态位,设置唯一的状态位NUD_PROBE,将probes计数清零,在以下的probes计数是否超限的判断中,确保通过。
static void neigh_timer_handler(struct timer_list *t)
{
...
if (state & NUD_REACHABLE) {
...
} else if (state & NUD_DELAY) {
if (time_before_eq(now,
neigh->confirmed + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
...
} else {
neigh_dbg(2, "neigh %p is probed\n", neigh);
neigh->nud_state = NUD_PROBE;
neigh->updated = jiffies;
atomic_set(&neigh->probes, 0);
notify = 1;
next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
}
} else {
/* NUD_PROBE|NUD_INCOMPLETE */
next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
}
if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
neigh->nud_state = NUD_FAILED;
notify = 1;
neigh_invalidate(neigh);
goto out;
}
...
if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
neigh_probe(neigh);
函数neigh_max_probes判断最多可发送的探测报文数量,由三部分组成,前两部分为单播和应用层探测数量,即UCAST_PROBES和APP_PROBES的值。最后一部分为多播类型的探测数量,根据当前邻居表项的NUD_PROBE状态位有所不同,如果没有设置NUD_PROBE标志位,即以上函数中表项状态为NUD_DELAY时的探测,使用MCAST_PROBES定义的数量限制;反之,如果表项设置了NUD_PROBE状态位,使用MCAST_REPROBES定义的数量限制。
static __inline__ int neigh_max_probes(struct neighbour *n)
{
struct neigh_parms *p = n->parms;
return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
(n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
NEIGH_VAR(p, MCAST_PROBES));
}
对于arp协议,最终将调用arp_solicit函数处理邻居地址探测,如果在neigh_timer_handler函数中将邻居表项的状态设置为了NUD_PROBE,并将其probes计数清零,这里首先尝试单播地址探测方式。最终由arp_send_dst发送请求报文。
如果单播类型UCAST_PROBES探测数量已经用完,函数中的变量dst_hw为空,据此arp_send_dst执行多播发送。
static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
u8 dst_ha[MAX_ADDR_LEN], *dst_hw = NULL;
int probes = atomic_read(&neigh->probes);
...
probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
if (probes < 0) {
if (!(neigh->nud_state & NUD_VALID))
pr_debug("trying to ucast probe in NUD_INVALID\n");
neigh_ha_snapshot(dst_ha, neigh, dev);
dst_hw = dst_ha;
} else {
probes -= NEIGH_VAR(neigh->parms, APP_PROBES);
if (probes < 0) {
neigh_app_ns(neigh);
return;
}
}
if (skb && !(dev->priv_flags & IFF_XMIT_DST_RELEASE))
dst = skb_dst(skb);
arp_send_dst(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
dst_hw, dev->dev_addr, NULL, dst);
不同于以上的邻居表项定时处理,在数据处理流程中,如果邻居表项的状态为NUD_FAILED或者NUD_NONE等不可用状态,例如表项刚刚创建,还没有学习动对应的邻居地址,并且多播类型(MCAST_PROBES)和APP_PROBES类型的probes数量之和不等于零,由于报文发送需要邻居地址,需要立即发送probe报文(设置immediate_probe为真)。这里不确定邻居的地址,使用多播或者APP方式获取地址。
注意这里在调用neigh_proe之前,将表项中成员probes设置为UCAST_PROBES对应的值,这样可直接跳过单播发送,即在以上函数arp_solicit中不会给dst_hw变量赋值,arp_send_dst执行多播类型的地址探测。
int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
bool immediate_probe = false;
if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
goto out_unlock_bh;
if (neigh->dead)
goto out_dead;
...
if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
NEIGH_VAR(neigh->parms, APP_PROBES)) {
unsigned long next, now = jiffies;
atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES));
neigh->nud_state = NUD_INCOMPLETE;
neigh->updated = now;
next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/2);
neigh_add_timer(neigh, next);
immediate_probe = true;
} else {
...
return 1;
}
} else if (neigh->nud_state & NUD_STALE) {
...
out_unlock_bh:
if (immediate_probe)
neigh_probe(neigh);
内核版本 5.0