参考资料
<<linux内核网络栈源代码情景分析>>
route路由表概述
在IP协议的实现中,只要发送数据包都要查询路由表,选择合适的路由选项,确定下一站的地址,并构造MAC地址,进而将数据包发往链路层进行处理,本文就继续学习route相关内容
route概述
route数据结构
路由表项的数据结构如下;
/* This is an entry in the IP routing table. */
struct rtable
{
struct rtable *rt_next; // 指向下一个rtable表项
unsigned long rt_dst; // 目的地址
unsigned long rt_mask; // 子网掩码
unsigned long rt_gateway; // 网关地址
unsigned char rt_flags; // 标志位
unsigned char rt_metric; // 度量值
short rt_refcnt; // 使用计数
unsigned long rt_use; // 被使用标志位
unsigned short rt_mss; // MSS值
unsigned long rt_window; // 窗口大小
struct device *rt_dev; // 与该路由项绑定的接口
};
rtable结构表示一个路由表项,系统路由表即是由许多rtable结构构成的一个链表。
rt_del函数
该函数是删除一个rtable数据;
/*
* The routing table list
*/
static struct rtable *rt_base = NULL; // 指向路由表项构成的链表
/*
* Pointer to the loopback route
*/
static struct rtable *rt_loopback = NULL; // 指向本地路由
/*
* Remove a routing table entry. 删除一个路由表项 dst 表示这个表项对应的目的地址 devname表示表项绑定的接口名称
*/
static void rt_del(unsigned long dst, char *devname)
{
struct rtable *r, **rp;
unsigned long flags;
rp = &rt_base; // 获取头部信息
/*
* This must be done with interrupts off because we could take
* an ICMP_REDIRECT.
*/
save_flags(flags);
cli(); // 禁止中断
while((r = *rp) != NULL) // 遍历循环
{
/* Make sure both the destination and the device match */
if ( r->rt_dst != dst ||
(devname != NULL && strcmp((r->rt_dev)->name,devname) != 0) ) // 检查地址是否相同并且devname不为空 并且devname相同的表项
{
rp = &r->rt_next; // 如果不满足就循环下一个
continue;
}
*rp = r->rt_next; // 找到该链接
/*
* If we delete the loopback route update its pointer.
*/
if (rt_loopback == r) // 检查是否和rt_loopback相同 如果相同则置空
rt_loopback = NULL;
kfree_s(r, sizeof(struct rtable)); // 释放该表项
}
restore_flags(flags);
}
该函数的主要工作就是循环表项链表,依次比较查找对应的表项,然后删除,功能较为简单。
ip_rt_flush函数
删除与一个设备相绑定的所有路由表项
/*
* Remove all routing table entries for a device. This is called when
* a device is downed. 删除与一个设备相绑定的所有路由表项
*/
void ip_rt_flush(struct device *dev)
{
struct rtable *r;
struct rtable **rp;
unsigned long flags;
rp = &rt_base; // 获取链表头部
save_flags(flags);
cli(); // 禁止中断
while ((r = *rp) != NULL) { // 选好链表
if (r->rt_dev != dev) { // 检查设备是否相同
rp = &r->rt_next; // 如果不相同则下一个
continue;
}
*rp = r->rt_next; // 获取链表的下一个元素
if (rt_loopback == r) // 如果和rt_loopback相同则置空
rt_loopback = NULL;
kfree_s(r, sizeof(struct rtable)); // 删除该数据
}
restore_flags(flags);
}
掩码的判断函数
/*
* Used by 'rt_add()' when we can't get the netmask any other way..
*
* If the lower byte or two are zero, we guess the mask based on the
* number of zero 8-bit net numbers, otherwise we use the "default"
* masks judging by the destination address and our device netmask. 计算对应目的地址的网络掩码
*/
static inline unsigned long default_mask(unsigned long dst)
{
dst = ntohl(dst);
if (IN_CLASSA(dst))
return htonl(IN_CLASSA_NET);
if (IN_CLASSB(dst))
return htonl(IN_CLASSB_NET);
return htonl(IN_CLASSC_NET);
}
/*
* If no mask is specified then generate a default entry. 猜测目的地之对应的掩码
*/
static unsigned long guess_mask(unsigned long dst, struct device * dev)
{
unsigned long mask;
if (!dst)
return 0;
mask = default_mask(dst); // 获取子网掩码
if ((dst ^ dev->pa_addr) & mask) // 如果为0则表示不属于同一网段 如果相同则返回计算的掩码
return mask;
return dev->pa_mask; // 返回本地接口地址的掩码
}
通过这两个函数来检查掩码的相关内容。
ip_rt_add函数
/*
* Find the route entry through which our gateway will be reached 寻找到达该网关或者路由器的本地接口
*/
static inline struct device * get_gw_dev(unsigned long gw)
{
struct rtable * rt;
for (rt = rt_base ; ; rt = rt->rt_next) // 循环链表
{
if (!rt) // 如果为空直接返回
return NULL;
if ((gw ^ rt->rt_dst) & rt->rt_mask) // 如果网关或者路由器地址与本地接口地址属于一个子网 则该接口可直达该网关或者路由器
continue;
/*
* Gateways behind gateways are a no-no
*/
if (rt->rt_flags & RTF_GATEWAY) // 如果该表项是一个网关转发的地址 则表示出现了循环的状况
return NULL;
return rt->rt_dev; // 返回绑定的网络接口
}
}
/*
* Rewrote rt_add(), as the old one was weird - Linus
*
* This routine is used to update the IP routing table, either
* from the kernel (ICMP_REDIRECT) or via an ioctl call issued
* by the superuser. 添加一个新的路由表项
*/
void ip_rt_add(short flags, unsigned long dst, unsigned long mask,
unsigned long gw, struct device *dev, unsigned short mtu, unsigned long window)
{
struct rtable *r, *rt;
struct rtable **rp;
unsigned long cpuflags;
/*
* A host is a unique machine and has no network bits.
*/
if (flags & RTF_HOST) // 如果目的地址是主机地址 则子网掩码为255.255.255.255
{
mask = 0xffffffff;
}
/*
* Calculate the network mask
*/
else if (!mask) // 如果没有指定子网掩码
{
if (!((dst ^ dev->pa_addr) & dev->pa_mask)) // 检测目的网络与本地接口地址是否属于同一个子网
{
mask = dev->pa_mask; // 获取掩码
flags &= ~RTF_GATEWAY; // 清楚掉该RTF_GATEWAY标志位
if (flags & RTF_DYNAMIC)
{
/*printk("Dynamic route to my own net rejected\n");*/
return;
}
}
else
mask = guess_mask(dst, dev); // 如果不为同一个子网则猜测子网掩码
dst &= mask;
}
/*
* A gateway must be reachable and not a local address
*/
if (gw == dev->pa_addr) // 如果输入gw时本地接口地址 就表示目的主机或者网络是直达的 清除掉RTF_GATEWAY标志位
flags &= ~RTF_GATEWAY;
if (flags & RTF_GATEWAY)
{
/*
* Don't try to add a gateway we can't reach..
*/
if (dev != get_gw_dev(gw)) // 判断传入的dev是否与该函数返回的是同一个设备 如果不是则返回
return;
flags |= RTF_GATEWAY;
}
else
gw = 0; // 如果目的地址是一个直达网络或者主机 则网关地址设置为0
/*
* Allocate an entry and fill it in.
*/
rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC); // 申请内存
if (rt == NULL)
{
return;
}
memset(rt, 0, sizeof(struct rtable)); // 重置内存
rt->rt_flags = flags | RTF_UP;
rt->rt_dst = dst;
rt->rt_dev = dev;
rt->rt_gateway = gw; // 设置网关
rt->rt_mask = mask;
rt->rt_mss = dev->mtu - HEADER_SIZE;
rt->rt_window = 0; /* Default is no clamping */
/* Are the MSS/Window valid ? */
if(rt->rt_flags & RTF_MSS) // 设置MSS值
rt->rt_mss = mtu;
if(rt->rt_flags & RTF_WINDOW) // 设置窗口值大小
rt->rt_window = window;
/*
* What we have to do is loop though this until we have
* found the first address which has a higher generality than
* the one in rt. Then we can put rt in right before it.
* The interrupts must be off for this process.
*/
save_flags(cpuflags);
cli(); // 禁止中断
/*
* Remove old route if we are getting a duplicate.
*/
rp = &rt_base; // 获取链表头部
while ((r = *rp) != NULL) // 遍历链表
{
if (r->rt_dst != dst ||
r->rt_mask != mask) // 如果目的地址或者mask不相同
{
rp = &r->rt_next; // 下一个
continue;
}
*rp = r->rt_next; // 否则删除该表项
if (rt_loopback == r)
rt_loopback = NULL;
kfree_s(r, sizeof(struct rtable));
}
/*
* Add the new route
*/
rp = &rt_base; // 添加新的表项
while ((r = *rp) != NULL) {
if ((r->rt_mask & mask) != mask) // 判断网址大小
break;
rp = &r->rt_next; // 获取当前的rp
}
rt->rt_next = r; // 插入该表项
*rp = rt;
/*
* Update the loopback route
*/
if ((rt->rt_dev->flags & IFF_LOOPBACK) && !rt_loopback) // 更新路由表项
rt_loopback = rt;
/*
* Restore the interrupts and return
*/
restore_flags(cpuflags);
return;
}
Ip_rt_add函数主要用来添加一个新的路由表项,首先对子网掩码和网关地址以及路由标志位进行校正,然后创建一个新的rtable结构并对其进行初始化,再将初始化完成后的表项添加到系统路由表中。
rt_new函数
/*
* Check if a mask is acceptable. 检查子网掩码是否正确
*/
static inline int bad_mask(unsigned long mask, unsigned long addr)
{
if (addr & (mask = ~mask)) // 将子网掩码取反后与地址进行与操作 如果非0 则表示地址对应的子网掩码不正确
return 1;
mask = ntohl(mask);
if (mask & (mask+1)) // 检查0 1 交错的情况 如果0 1交错则子网掩码不对
return 1;
return 0;
}
/*
* Process a route add request from the user 根据传入的rtentry结构添加一个新的路由表项
*/
static int rt_new(struct rtentry *r)
{
int err;
char * devname;
struct device * dev = NULL;
unsigned long flags, daddr, mask, gw;
/*
* If a device is specified find it.
*/
if ((devname = r->rt_dev) != NULL) // 遍历rtentry列表
{
err = getname(devname, &devname); // 获取设备的名称
if (err)
return err;
dev = dev_get(devname); // 获取对应的dev
putname(devname);
if (!dev)
return -EINVAL;
}
/*
* If the device isn't INET, don't allow it
*/
if (r->rt_dst.sa_family != AF_INET) // 检查是否是INET协议 如果不是则直接返回
return -EAFNOSUPPORT;
/*
* Make local copies of the important bits
*/
flags = r->rt_flags; // 取出各个参数的值
daddr = ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
mask = ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
gw = ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
/*
* BSD emulation: Permits route add someroute gw one-of-my-addresses
* to indicate which iface. Not as clean as the nice Linux dev technique
* but people keep using it...
*/
if (!dev && (flags & RTF_GATEWAY)) // 如果dev为NULL 并且RTF_GATEWAY标志位被设置 就需要对提供的网关地址进行检查
{
struct device *dev2;
for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next) // 遍历设备列表
{
if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw) // 如果目的主机或者网络可直达 清楚RTF_GATEWAY标志位
{
flags &= ~RTF_GATEWAY;
dev = dev2;
break;
}
}
}
/*
* Ignore faulty masks
*/
if (bad_mask(mask, daddr)) // 检查子网掩码是否正确
mask = 0;
/*
* Set the mask to nothing for host routes.
*/
if (flags & RTF_HOST) // 如果目的地址是主机地址 则设置子网掩码全为1
mask = 0xffffffff;
else if (mask && r->rt_genmask.sa_family != AF_INET) // 如果不是AF_INET域则返回
return -EAFNOSUPPORT;
/*
* You can only gateway IP via IP..
*/
if (flags & RTF_GATEWAY) // 对于一个非直达网络或者主机
{
if (r->rt_gateway.sa_family != AF_INET) // 如果不是AF_INET域则返回
return -EAFNOSUPPORT;
if (!dev)
dev = get_gw_dev(gw); // 获取网关设备
}
else if (!dev) // 如果是直达设备
dev = ip_dev_check(daddr); // 获取该设备
/*
* Unknown device.
*/
if (dev == NULL)
return -ENETUNREACH;
/*
* Add the route
*/
ip_rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window); // 添加到新的路由表项中
return 0;
}
该函数主要是根据上层传入的rtentry参数添加一个新的路由表项,首先先检查rtentry结构中各个字段的合法性,根据rtentry结构字段值产生路由表项所需要的字段值,最后调用ip_rt_add函数完成对新的路由表项的添加。
路由查找函数ip_rt_route和ip_rt_local函数
/*
* This is hackish, but results in better code. Use "-S" to see why.
*/
#define early_out ({ goto no_route; 1; })
/*
* Route a packet. This needs to be fairly quick. Florian & Co.
* suggested a unified ARP and IP routing cache. Done right its
* probably a brilliant idea. I'd actually suggest a unified
* ARP/IP routing/Socket pointer cache. Volunteers welcome 查找一个合适的路由表项
*/
struct rtable * ip_rt_route(unsigned long daddr, struct options *opt, unsigned long *src_addr)
{
struct rtable *rt;
for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next) // 遍历列表
{
if (!((rt->rt_dst ^ daddr) & rt->rt_mask)) // 检测目的网络与本地接口地址是否属于同一个子网
break;
/*
* broadcast addresses can be special cases..
*/
if (rt->rt_flags & RTF_GATEWAY) // 如果是网关则继续
continue;
if ((rt->rt_dev->flags & IFF_BROADCAST) &&
(rt->rt_dev->pa_brdaddr == daddr)) // 检查是否广播 广播地址是否相同
break;
}
if(src_addr!=NULL)
*src_addr= rt->rt_dev->pa_addr; // 获取该设备上的地址
if (daddr == rt->rt_dev->pa_addr) { // 如果路由地址是本机地址 则是一个回环地址
if ((rt = rt_loopback) == NULL) // 如果rt_loopback地址为空则跳转到空地址
goto no_route;
}
rt->rt_use++; // 计算加一
return rt; // 返回该rt
no_route:
return NULL;
}
struct rtable * ip_rt_local(unsigned long daddr, struct options *opt, unsigned long *src_addr) // 完成对本地链路上主机或者网络地址的路由查询工作
{
struct rtable *rt;
for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next)
{
/*
* No routed addressing.
*/
if (rt->rt_flags&RTF_GATEWAY) // 先查找是路由的设备 如果不是直达地址则跳过
continue;
if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
break;
/*
* broadcast addresses can be special cases..
*/
if ((rt->rt_dev->flags & IFF_BROADCAST) &&
rt->rt_dev->pa_brdaddr == daddr)
break;
}
if(src_addr!=NULL)
*src_addr= rt->rt_dev->pa_addr; //
if (daddr == rt->rt_dev->pa_addr) {
if ((rt = rt_loopback) == NULL)
goto no_route;
}
rt->rt_use++;
return rt;
no_route:
return NULL;
}
Ip_rt_route函数主要是查找一个合适的路由表项数据,rp_rt_local函数主要是完成本地链路上主机或者网路地址的路由查询工作。主要就是查询相关的网络设备。
总结
route.c主要就是路由表的相关操作,路由表本质上是通过一个链表来维护的,查询路由本质上就是在链表上查找每个元素进行检查并比较表项中目的地址和实际要发送数据包中的目的地址进行网络号的比较,进而达到查找的目的。由于本人才疏学浅,如有错误请批评指正。