linux路由选择流程

49 篇文章 11 订阅
16 篇文章 0 订阅

1 ip_rcv_finish

ip_rcv这个函数主要是对数据包做各种正确性验证,然后调用掉网络过滤子系统的在PRE_ROUTEING链上的回调函数,经过网络子系统的处理在调用ip_rec_finish,ip_rcv_finish主要的工作:确定数据包是前送还是在本机协议栈上传,如果是前送要确保输出网络设备和下一个接受栈的地址。

我们看一下ip_rcv_finish的源码,首先判断skb->dst是否为NULL,如果是表明数据包的去向还没有定,那么需要调用ip_route_input决定数据包的去向

static int ip_rcv_finish(struct sk_buff *skb)
{
    const struct iphdr *iph = ip_hdr(skb);
    struct rtable *rt;
    /*
     *    Initialise the virtual path cache for the packet. It describes
     *    how the packet travels inside Linux networking.
     */
    if (skb->dst == NULL) {      //目的地址是空
        int err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
                     skb->dev);
    }
 .....
}

ip_route_input函数主要是调用rt_hash根据目的地址得到路由哈希值,然后调用ip_route_input_slow继续进一步处理

int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
		   u8 tos, struct net_device *dev)
{
	struct rtable * rth;
	unsigned	hash;
.....
	return ip_route_input_slow(skb, daddr, saddr, tos, dev);	//下一步进行路由判断
}

2 ip_route_input_slow

ip_route_inout_slow最重要的是调用fib_lookup根据目的地址查找路由表得到路由结果struct fib_result *res,决定数据包去向,如果res.type == RTN_LOCAL那么就是本地接受的数据,那么设置数据包下一步处理函数为ip_local_deliver,如果是转发数据包那么进一步调用ip_mkroute_input

static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
			       u8 tos, struct net_device *dev)
{
	struct fib_result res;

......

	if ((err = fib_lookup(net, &fl, &res)) != 0) {			//这个函数很重要,查询路由表,将路由结果保存在struct fib_result *res, 决定数据包的走向
		if (!IN_DEV_FORWARD(in_dev))
			goto e_hostunreach;
		goto no_route;
	}

......
	if (res.type == RTN_LOCAL) {			//本地接受的数据包,
		int result;
		result = fib_validate_source(saddr, daddr, tos,
					     net->loopback_dev->ifindex,
					     dev, &spec_dst, &itag);
		if (result < 0)
			goto martian_source;
		if (result)
			flags |= RTCF_DIRECTSRC;
		spec_dst = daddr;
		goto local_input;
	}
......

	err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);	//路由转发处理

brd_input:
	if (skb->protocol != htons(ETH_P_IP))
		goto e_inval;

	if (ipv4_is_zeronet(saddr))
		spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
	else {
		err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
					  &itag);
		if (err < 0)
			goto martian_source;
		if (err)
			flags |= RTCF_DIRECTSRC;
	}
	flags |= RTCF_BROADCAST;
	res.type = RTN_BROADCAST;
	RT_CACHE_STAT_INC(in_brd);

local_input:                //本地接受处理,做一些初始化
	rth = dst_alloc(&ipv4_dst_ops);
	if (!rth)

	rth->rt_iif	=
	rth->fl.iif	= dev->ifindex;
	rth->u.dst.dev	= net->loopback_dev;
	dev_hold(rth->u.dst.dev);
	rth->idev	= in_dev_get(rth->u.dst.dev);
	rth->rt_gateway	= daddr;
	rth->rt_spec_dst= spec_dst;
	rth->u.dst.input= ip_local_deliver;		//设置本地结束处理函数
	rth->rt_flags 	= flags|RTCF_LOCAL;

......

}

2.1 fib_lookup

fib_lookup查询路由表,将路由结果保存在struct fib_result *res, 决定数据包的走向,先查找本地路由表,再查找main路由表

static inline int fib_lookup(struct net *net, const struct flowi *flp,
			     struct fib_result *res)
{
	struct fib_table *table;

	table = fib_get_table(net, RT_TABLE_LOCAL);
	if (!table->tb_lookup(table, flp, res))			//先查找本地路由表
		return 0;

	table = fib_get_table(net, RT_TABLE_MAIN);		//再查找main路由表
	if (!table->tb_lookup(table, flp, res))
		return 0;
	return -ENETUNREACH;
}

3 ip_mkroute_input

ip_mkroute_input实际调用的是__mkroute_input,__mkroute_input根据路由结果确定数据包的发包网卡,设置数据包转发函数ip_forward。设置出口处理函数ip_output

static int __mkroute_input(struct sk_buff *skb,
			   struct fib_result *res,
			   struct in_device *in_dev,
			   __be32 daddr, __be32 saddr, u32 tos,
			   struct rtable **result)
{

	struct rtable *rth;
	int err;
	struct in_device *out_dev;
	unsigned flags = 0;
	__be32 spec_dst;
	u32 itag;

	/* get a working reference to the output device */
	out_dev = in_dev_get(FIB_RES_DEV(*res));		//根据路由表结果得到出口数据包dev
	if (out_dev == NULL) {
		if (net_ratelimit())
			printk(KERN_CRIT "Bug in ip_route_input" \
			       "_slow(). Please, report\n");
		return -EINVAL;
	}


	err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res),		//根据原地址,目的地址验证路由转发的可靠性
				  in_dev->dev, &spec_dst, &itag);
	if (err < 0) {
		ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
					 saddr);

		err = -EINVAL;
		goto cleanup;
	}

	if (err)
		flags |= RTCF_DIRECTSRC;

	if (out_dev == in_dev && err &&
	    (IN_DEV_SHARED_MEDIA(out_dev) ||
	     inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
		flags |= RTCF_DOREDIRECT;

	if (skb->protocol != htons(ETH_P_IP)) {
		/* Not IP (i.e. ARP). Do not create route, if it is
		 * invalid for proxy arp. DNAT routes are always valid.
		 */
		if (out_dev == in_dev) {
			err = -EINVAL;
			goto cleanup;
		}
	}


	rth = dst_alloc(&ipv4_dst_ops);
	if (!rth) {
		err = -ENOBUFS;
		goto cleanup;
	}

	atomic_set(&rth->u.dst.__refcnt, 1);
	rth->u.dst.flags= DST_HOST;
	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
		rth->u.dst.flags |= DST_NOPOLICY;
	if (IN_DEV_CONF_GET(out_dev, NOXFRM))
		rth->u.dst.flags |= DST_NOXFRM;
	rth->fl.fl4_dst	= daddr;
	rth->rt_dst	= daddr;
	rth->fl.fl4_tos	= tos;
	rth->fl.mark    = skb->mark;
	rth->fl.fl4_src	= saddr;
	rth->rt_src	= saddr;
	rth->rt_gateway	= daddr;
	rth->rt_iif 	=
		rth->fl.iif	= in_dev->dev->ifindex;
	rth->u.dst.dev	= (out_dev)->dev;		//设置出口dev
	dev_hold(rth->u.dst.dev);
	rth->idev	= in_dev_get(rth->u.dst.dev);
	rth->fl.oif 	= 0;
	rth->rt_spec_dst= spec_dst;

	rth->u.dst.input = ip_forward;		//设置转发函数
	rth->u.dst.output = ip_output;		//设置出口函数
	rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));

	rt_set_nexthop(rth, res, itag);

	rth->rt_flags = flags;

	*result = rth;
	err = 0;
 cleanup:
	/* release the working reference to the output device */
	in_dev_put(out_dev);
	return err;
}

4 整体函数流程

 

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值