如果是需要转发的包,呼叫net/ipv4/ip_forwardd.c:ip_forward();
此函数需要做的工作如下:
1) check ttl ,如果ttl <= 1 ,丢弃该包,发送icmp time exceed
消息给发送方;
2) check 是否skb 的tailroom 足够大来容纳目的地的链路层的头部,如果必要,可以expand skb;
3) ttl++;
4) 如果我们的包的长度大于目的设备的MTU,并且IP 头部的don’t fragment bit 被置位,则向发送方发送ICMP
flag needed 消息;
5) call NF_IP_FORWARD hook
6) 如果NF_IP_FORWARD hook 返回NF_ACCEPT
,则呼叫net/ipv4/ip_forward.c:ip_forward_finish();
在ip_forward_finish() 函数中检测是否需要在ip
头设置其它的一些选项,然后会呼叫include/net/ip.h:ip_send() 。
在ip_send()中,会检测我们是否需要对此ip包进行分片,如果需要,则调用ip_fragment(),否则调用net/ipv4/ip_forwardd:ip_finish_output()
。
在ip_finish_output() 中,不干别的,只是调用另一个NETFILTER hook
:NF_IP_POST_ROUTING ,然后呼叫ip_finish_output2() 函数。
Ip_finish_output2() 函数中,呼叫skb
对应的硬件头的发送函数hh->hh_output(skb)
Linux2.4.0IP层转发(ip_forward)流程
int ip_forward(struct sk_buff *skb)
{
struct net_device *dev2;
struct iphdr *iph;
struct rtable *rt;
struct ip_options * opt =
&(IPCB(skb)->opt);
unsigned short mtu;
if (IPCB(skb)->opt.router_alert
&& ip_call_ra_chain(skb))
return NET_RX_SUCCESS;
if (skb->pkt_type != PACKET_HOST)
goto drop;
iph = skb->nh.iph;
rt = (struct rtable*)skb->dst;
if (iph->ttl <= 1)
goto too_many_hops;
if (opt->is_strictroute
&& rt->rt_dst !=
rt->rt_gateway)
goto sr_failed;
skb->priority =
rt_tos2priority(iph->tos);
dev2 = rt->u.dst.dev;
mtu = rt->u.dst.pmtu;
if (rt->rt_flags&RTCF_DOREDIRECT
&& !opt->srr)
ip_rt_send_redirect(skb);
if ((skb = skb_cow(skb, dev2->hard_header_len))
== NULL)
return NET_RX_DROP;
iph = skb->nh.iph;
opt = &(IPCB(skb)->opt);
ip_decrease_ttl(iph);
if (skb->len > mtu
&&
(ntohs(iph->frag_off) & IP_DF))
goto frag_needed;
#ifdef CONFIG_IP_ROUTE_NAT
if (rt->rt_flags & RTCF_NAT)
{
if (ip_do_nat(skb)) {
kfree_skb(skb);
return NET_RX_BAD;
}
}
#endif
return NF_HOOK(PF_INET, NF_IP_FORWARD, skb,
skb->dev, dev2,
ip_forward_finish);
frag_needed:
IP_INC_STATS_BH(IpFragFails);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
goto drop;
sr_failed:
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0);
goto drop;
too_many_hops:
icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
drop:
kfree_skb(skb);
return NET_RX_DROP;
}
static inline int ip_forward_finish(struct sk_buff *skb)
{
struct ip_options * opt =
&(IPCB(skb)->opt);
IP_INC_STATS_BH(IpForwDatagrams);
if (opt->optlen == 0) {
#ifdef CONFIG_NET_FASTROUTE
struct rtable *rt = (struct rtable*)skb->dst;
if (rt->rt_flags&RTCF_FAST
&& !netdev_fastroute_obstacles)
{
struct dst_entry *old_dst;
unsigned h =
((*(u8*)&rt->key.dst)^(*(u8*)&rt->key.src))&NETDEV_FASTROUTE_HMASK;
write_lock_irq(&skb->dev->fastpath_lock);
old_dst =
skb->dev->fastpath[h];
skb->dev->fastpath[h] =
dst_clone(&rt->u.dst);
write_unlock_irq(&skb->dev->fastpath_lock);
dst_release(old_dst);
}
#endif
return (ip_send(skb));
}
ip_forward_options(skb);
return (ip_send(skb));
}
static inline int ip_send(struct sk_buff *skb)
{
if (skb->len >
skb->dst->pmtu)
return ip_fragment(skb, ip_finish_output);
else
return ip_finish_output(skb);
}
__inline__ int ip_finish_output(struct sk_buff *skb)
{
struct net_device *dev =
skb->dst->dev;
skb->dev = dev;
skb->protocol = __constant_htons(ETH_P_IP);
return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
ip_finish_output2);
}
static inline int ip_finish_output2(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
struct hh_cache *hh = dst->hh;
#ifdef CONFIG_NETFILTER_DEBUG
nf_debug_ip_finish_output2(skb);
#endif
if (hh) {
read_lock_bh(&hh->hh_lock);
memcpy(skb->data - 16,
hh->hh_data, 16);
read_unlock_bh(&hh->hh_lock);
skb_push(skb, hh->hh_len);
return hh->hh_output(skb);
} else if (dst->neighbour)
return
dst->neighbour->output(skb);
printk(KERN_DEBUG "khm\n");
kfree_skb(skb);
return -EINVAL;
}