FIB配置过程续
当给设备设置IP地址的的时候,内核给inetdev_chain
通知链发送了一个NETDEV_UP
事件,FIB系统正好对这个事件感兴趣,就把下面这个结构注册到了inetaddr_chain
上:
static struct notifier_block fib_inetaddr_notifier = {
.notifier_call = fib_inetaddr_event,
};
为什么FIB系统会对这个事情感兴趣?因为给设备分配IP地址相对于给系统增加一条type为RTN_LOCAL
的路由,其路由范围是RT_SCOPE_HOST
,没错吧?给系统增加IP地址,其实也就和路由软件(OSPF、RIP等)往内核里增加路由表项一样,基本的流程相同,只是个别参数不同而已(我甚至觉得,路由可以理解为地址,区别在于这是别人的地址,以后如果实在不理解路由的本质,你就把它认为是别人的地址就得了)。重点在这里是你了解了FIB系统是如何增删路由表项的。
其回调函数就是fib_inetaddr_event
,此函数如果收到NETDEV_DOWN
消息,它就删除FIB中存在的地址,如果ifa_dev->ifa_list
是空,则disable这个设备,否则就刷新一下路由cache(注意,这里是路由cache,不是FIB表)
很明显,目前的流程是走向了左边。其参数就是inet_insert_ifa
中传入的ifa。我们只是访问其成员变量,而不再更改。
void fib_add_ifaddr(struct in_ifaddr *ifa)
{
struct in_device *in_dev = ifa->ifa_dev;
struct net_device *dev = in_dev->dev;
struct in_ifaddr *prim = ifa;
__be32 mask = ifa->ifa_mask;
__be32 addr = ifa->ifa_local;
__be32 prefix = ifa->ifa_address & mask;
if (ifa->ifa_flags & IFA_F_SECONDARY) {
prim = inet_ifa_byprefix(in_dev, prefix, mask);
if (!prim) {
pr_warn("%s: bug: prim == NULL\n", __func__);
return;
}
}
//如果是loopback接口配置,addr是127.0.0.1,而配置例子中192.168.18.2
fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
if (!(dev->flags & IFF_UP))
return;
/* Add broadcast address, if it is explicitly assigned. */
if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
//ifa->ifa_broadcast是192.168.18.255,而对于loopback接口是不会进入这一行
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
(prefix != addr || ifa->ifa_prefixlen < 32)) {
//第二个参数loopback接口是RTN_LOCAL, prefix是127,而配置例子对应的是RTN_UNICAST,prefix是192.168.18,这导致它们存取的FIB表不一样。
if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
fib_magic(RTM_NEWROUTE,
dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
prefix, ifa->ifa_prefixlen, prim);
/* Add network specific broadcasts, when it takes a sense */
if (ifa->ifa_prefixlen < 31) {
//loopback接口prefix是127, 而配置例子是192.168.18
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
//loopback接口的prefix|~mask是255.255.255.127,而配置例子是192.168.18.255
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
32, prim);
}
}
}
我们已经给出函数中调用fib_magic
的第三个参数。根据这些参数我们看看它的执行是什么结果:
/*创建并初始化一个核内路由命令消息,这实际是和netlink消息的处理过程
*(请参考inet_rtm_newaddr函数)一样,
* 但是又不能直接引用netlink的代码,因为不太好设置传入的参数。在处理这些FIB 引擎时,netlink已经被锁住*/
static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa)
{
struct dn_fib_table *tb;
struct {
struct nlmsghdr nlh;
struct rtmsg rtm;
} req;
struct {
struct nlattr hdr;
__le16 dst;
} dst_attr = {
.dst = dst,
};
struct {
struct nlattr hdr;
__le16 prefsrc;
} prefsrc_attr = {
.prefsrc = ifa->ifa_local,
};
struct {
struct nlattr hdr;
u32 oif;
} oif_attr = {
.oif = ifa->ifa_dev->dev->ifindex,
};
struct nlattr *attrs[RTA_MAX+1] = {
[RTA_DST] = (struct nlattr *) &dst_attr,
[RTA_PREFSRC] = (struct nlattr * ) &prefsrc_attr,
[RTA_OIF] = (struct nlattr *) &oif_attr,
};
memset(&req.rtm, 0, sizeof(req.rtm));
//获得ip_fib_main_table或ip_fib_local_table的指针,而不是根据其函数名创建一个新的FIB表
if (type == RTN_UNICAST)
tb = dn_fib_get_table(RT_MIN_TABLE, 1);
else
tb = dn_fib_get_table(RT_TABLE_LOCAL, 1);
if (tb == NULL)
return;
//下面就开始构造一个nlm和rtm消息体,其实仅仅是把它们传入tb_insert函数,而非要把他们真正发送出去
req.nlh.nlmsg_len = sizeof(req);
req.nlh.nlmsg_type = cmd;
req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
req.nlh.nlmsg_pid = 0;
req.nlh.nlmsg_seq = 0;
req.rtm.rtm_dst_len = dst_len;
req.rtm.rtm_table = tb->n;
//要记住这种方式下尽管是用户运行ifconfig命令产生了路由,但实际是由内核产生了路由
req.rtm.rtm_protocol = RTPROT_KERNEL;
req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
req.rtm.rtm_type = type;
//往系统配置地址本质就是往路由系统增加一个比较特殊的路由,所以cmd是NEWROUTE,而不是NEWADDR
if (cmd == RTM_NEWROUTE)
tb->insert(tb, &req.rtm, attrs, &req.nlh, NULL);
else
tb->delete(tb, &req.rtm, attrs, &req.nlh, NULL);
}
tb->insert
在此指向了fn_hash_insert
。
static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
{
struct fn_hash *table = (struct fn_hash *) tb->tb_data;
struct fib_node *new_f, *f;
struct fib_alias *fa, *new_fa;
struct fn_zone *fz;
struct fib_info *fi;
u8 tos = cfg->fc_tos;
__be32 key;
int err;
/*cfg->fc_dst_len网络掩码长度*/
if (cfg->fc_dst_len >