ip_rcv_finish

最新推荐文章于 2022-05-29 19:59:59 发布

cft56200_ln

最新推荐文章于 2022-05-29 19:59:59 发布

阅读量584

点赞数

分类专栏：网络编程

网络编程专栏收录该内容

32 篇文章 2 订阅

订阅专栏

ip_rcv_finish

2013-04-20 14:37:14| 分类： linux-NET |举报 |字号订阅

下载LOFTER 我的照片书 |

执行完钩子函数后，IP数据包被传送到ip_rcv_finish做进一步处理，这个函数主要功能是做路由选择（kernel/net/ipv4/ip_input.c）
static int ip_rcv_finish(struct sk_buff *skb)
{
    const struct iphdr *iph = ip_hdr(skb);
    struct rtable *rt;

    /*
    *    Initialise the virtual path cache for the packet. It describes
    *    how the packet travels inside Linux networking.
    */
    if (skb_dst(skb) == NULL) {
        int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
                           iph->tos, skb->dev);
        if (unlikely(err)) {
            if (err == -EHOSTUNREACH)
                IP_INC_STATS_BH(dev_net(skb->dev),
                        IPSTATS_MIB_INADDRERRORS);
            else if (err == -ENETUNREACH)
                IP_INC_STATS_BH(dev_net(skb->dev),
                        IPSTATS_MIB_INNOROUTES);
            else if (err == -EXDEV)
                NET_INC_STATS_BH(dev_net(skb->dev),
                        LINUX_MIB_IPRPFILTER);
            goto drop;
        }
    }

#ifdef CONFIG_IP_ROUTE_CLASSID
    if (unlikely(skb_dst(skb)->tclassid)) {
        struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
        u32 idx = skb_dst(skb)->tclassid;
        st[idx&0xFF].o_packets++;
        st[idx&0xFF].o_bytes += skb->len;
        st[(idx>>16)&0xFF].i_packets++;
        st[(idx>>16)&0xFF].i_bytes += skb->len;
    }
#endif

    if (iph->ihl > 5 && ip_rcv_options(skb))
        goto drop;

    rt = skb_rtable(skb);
    if (rt->rt_type == RTN_MULTICAST) {
        IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST,
                skb->len);
    } else if (rt->rt_type == RTN_BROADCAST)
        IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST,
                skb->len);

    return dst_input(skb);

drop:
    kfree_skb(skb);
    return NET_RX_DROP;
}
我们现看(kernel/include/net/route.h)ip_route_input_noref()--->(kernel/net/route.c)ip_route_input_common()

int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
               u8 tos, struct net_device *dev, bool noref)
{
    struct rtable * rth;
    unsigned    hash;
    int iif = dev->ifindex;
    struct net *net;
    int res;

    net = dev_net(dev);

    rcu_read_lock();

    if (!rt_caching(net))
        goto skip_cache;

    tos &= IPTOS_RT_MASK;
    hash = rt_hash(daddr, saddr, iif, rt_genid(net));
//给进来的包skb查找路由，现在缓冲区的路由hash表中查找，如果没找到，在调用ip_route_input_slow()到路由表中查找。
    for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
         rth = rcu_dereference(rth->dst.rt_next)) {
        if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) |
             ((__force u32)rth->rt_key_src ^ (__force u32)saddr) |
             (rth->rt_route_iif ^ iif) |
             (rth->rt_key_tos ^ tos)) == 0 &&
            rth->rt_mark == skb->mark &&
            net_eq(dev_net(rth->dst.dev), net) &&
            !rt_is_expired(rth)) {
            ipv4_validate_peer(rth);
           .....

    }
    res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
    rcu_read_unlock();
    return res;
}
static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                   u8 tos, struct net_device *dev)
{
    struct fib_result res;
    struct in_device *in_dev = __in_dev_get_rcu(dev);
    struct flowi4    fl4;
    unsigned    flags = 0;
    u32        itag = 0;
    struct rtable * rth;
    unsigned    hash;
    __be32        spec_dst;
    int        err = -EINVAL;
    struct net    * net = dev_net(dev);
....

    if (!IN_DEV_FORWARD(in_dev))
        goto e_hostunreach;
    if (res.type != RTN_UNICAST)
        goto martian_destination;

    err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);//不是发给本机数据，这里处理路由转发
out:    return err;
......
local_input:
    rth = rt_dst_alloc(net->loopback_dev,
               IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
    if (!rth)
        goto e_nobufs;

    rth->dst.input= ip_local_deliver;//这里表示这个数据包是发给本机的，可以传送到传输层
    rth->dst.output= ip_rt_bug;
#ifdef CONFIG_IP_ROUTE_CLASSID
    rth->dst.tclassid = itag;
#endif
....
}
static int ip_mkroute_input(struct sk_buff *skb,
                struct fib_result *res,
                const struct flowi4 *fl4,
                struct in_device *in_dev,
                __be32 daddr, __be32 saddr, u32 tos)
{
    struct rtable* rth = NULL;
    int err;
    unsigned hash;

#ifdef CONFIG_IP_ROUTE_MULTIPATH
    if (res->fi && res->fi->fib_nhs > 1)
        fib_select_multipath(res);
#endif

    /* create a routing cache entry */
    err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
    if (err)
        return err;

    /* put it into the cache */
    hash = rt_hash(daddr, saddr, fl4->flowi4_iif,
               rt_genid(dev_net(rth->dst.dev)));
    rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif);
    if (IS_ERR(rth))
        return PTR_ERR(rth);
    return 0;
}
static int __mkroute_input(struct sk_buff *skb,
               const struct fib_result *res,
               struct in_device *in_dev,
               __be32 daddr, __be32 saddr, u32 tos,
               struct rtable **result)
{
    struct rtable *rth;
    int err;
    struct in_device *out_dev;
    unsigned int flags = 0;
    __be32 spec_dst;
    u32 itag;

    /* get a working reference to the output device */
    out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
    if (out_dev == NULL) {
        if (net_ratelimit())
            pr_crit("Bug in ip_route_input_slow(). Please report.\n");
        return -EINVAL;
    }

    err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
                in_dev->dev, &spec_dst, &itag);
    if (err < 0) {
        ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
                    saddr);

        goto cleanup;
    }

    if (err)
        flags |= RTCF_DIRECTSRC;

    if (out_dev == in_dev && err &&
        (IN_DEV_SHARED_MEDIA(out_dev) ||
         inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
        flags |= RTCF_DOREDIRECT;

    if (skb->protocol != htons(ETH_P_IP)) {
        /* Not IP (i.e. ARP). Do not create route, if it is
        * invalid for proxy arp. DNAT routes are always valid.
        *
        * Proxy arp feature have been extended to allow, ARP
        * replies back to the same interface, to support
        * Private VLAN switch technologies. See arp.c.
        */
        if (out_dev == in_dev &&
            IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
            err = -EINVAL;
            goto cleanup;
        }
    }

    rth = rt_dst_alloc(out_dev->dev,
               IN_DEV_CONF_GET(in_dev, NOPOLICY),
               IN_DEV_CONF_GET(out_dev, NOXFRM));
    if (!rth) {
        err = -ENOBUFS;
        goto cleanup;
    }

    rth->rt_key_dst    = daddr;
    rth->rt_key_src    = saddr;
    rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
    rth->rt_flags = flags;
    rth->rt_type = res->type;
    rth->rt_key_tos    = tos;
    rth->rt_dst    = daddr;
    rth->rt_src    = saddr;
    rth->rt_route_iif = in_dev->dev->ifindex;
    rth->rt_iif     = in_dev->dev->ifindex;
    rth->rt_oif     = 0;
    rth->rt_mark    = skb->mark;
    rth->rt_gateway    = daddr;
    rth->rt_spec_dst= spec_dst;
    rth->rt_peer_genid = 0;
    rth->peer = NULL;
    rth->fi = NULL;
//这里设置了dst.input，在ip_rcv_finish最后会调用return dst_input(skb);实际就是调用了 ip_forward;
    rth->dst.input = ip_forward;
    rth->dst.output = ip_output;

    rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag);

    *result = rth;
    err = 0;
cleanup:
    return err;
}
我们先分析发给本机数据包的情况，即 rth->dst.input= ip_local_deliver
int ip_local_deliver(struct sk_buff *skb)
{
    /*
    *    Reassemble IP fragments.
    */

    if (ip_is_fragment(ip_hdr(skb))) {
        if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER))
            return 0;
    }

    return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
               ip_local_deliver_finish);
}
static int ip_local_deliver_finish(struct sk_buff *skb)
{
    struct net *net = dev_net(skb->dev);

    __skb_pull(skb, ip_hdrlen(skb)); //(1) 剥离IP头

    /* Point into the IP datagram, just past the header. */
    skb_reset_transport_header(skb);

    rcu_read_lock();
    {
        int protocol = ip_hdr(skb)->protocol;
        int hash, raw;
        const struct net_protocol *ipprot;

    resubmit:
        raw = raw_local_deliver(skb, protocol);
   //inet_protos[]是INETSOCKET层全部协议的接收处理函数集的一个数组，

//每个协议以自己的协议号(比如icmp协议就以IPPROTO_ICMP，也就是数字

//1作为下标，把自己添加到该数组,数组中存储了net_protocol结构，例如针

//对TCP协议，相关的结构定义如下：

/----------------------------------------------------

      static const struct net_protocol tcp_protocol = {
   .handler =   tcp_v4_rcv,
   .err_handler =   tcp_v4_err,
   .gso_send_check = tcp_v4_gso_send_check,
   .gso_segment =   tcp_tso_segment,
   .gro_receive =   tcp4_gro_receive,
   .gro_complete =   tcp4_gro_complete,
   .no_policy =   1,
   .netns_ok =   1,
};

在kernel/net/ipv4/af_inet.c的函数inet_init中已经通过inet_add_protocol将tcp_protocol 协议加入到数组inet_protos中

-------------------------------------------------------/

        hash = protocol & (MAX_INET_PROTOS - 1);
        ipprot = rcu_dereference(inet_protos[hash]);
        if (ipprot != NULL) {
            int ret;

            if (!net_eq(net, &init_net) && !ipprot->netns_ok) {
                if (net_ratelimit())
                    printk("%s: proto %d isn't netns-ready\n",
                        __func__, protocol);
                kfree_skb(skb);
                goto out;
            }

            if (!ipprot->no_policy) {
                if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
                    kfree_skb(skb);
                    goto out;
                }
                nf_reset(skb);
            }
           ret = ipprot->handler(skb);//此时转到传输层处理数据，若为TCP协议则调用tcp_v4_rcv
            if (ret < 0) {
                protocol = -ret;
                goto resubmit;
            }
            IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);
        } else {
            if (!raw) {
                if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
                    IP_INC_STATS_BH(net, IPSTATS_MIB_INUNKNOWNPROTOS);
                    icmp_send(skb, ICMP_DEST_UNREACH,
                        ICMP_PROT_UNREACH, 0);
                }
            } else
                IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);
            kfree_skb(skb);
        }
    }
out:
    rcu_read_unlock();

    return 0;
}

cft56200_ln

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
ip_rcv_finish

ip_rcv_finish 2013-04-20 14:37:14| 分类：linux-NET |举报|字号订阅下载LOFTER我的照片书 | 执行完钩子函数后，IP数据包被传送到ip_rcv_finish做进一步处理，这个函数主要功能是做路由选择（ker
复制链接

扫一扫

专栏目录