dpvs中lvs核心处理函数:dp_vs_in

  • __dp_vs_in

    • 开始进入lvs模块,这块代码非常复杂,简单说,对于存在proxy连接,判断方向(client->LB或是rs->LB),直接发送流量.新来的连接,查找virtual server,根据LB算法查找对应后端real server,建立连接,并保存这个会话
    static int __dp_vs_in(void *priv, struct rte_mbuf *mbuf,
                          const struct inet_hook_state *state, int af)
    {
        struct dp_vs_iphdr  iph;
        struct dp_vs_proto *prot;
        struct dp_vs_conn * conn;
        int       dir, verdict, err, related;
        bool      drop = false;
        lcoreid_t cid, peer_cid;
        //获取L2层数据包类型
        eth_type_t etype = mbuf->packet_type; /* FIXME: use other field ? */
    
        assert(mbuf && state);
        //获取当前运行的lcore id
        cid = peer_cid = rte_lcore_id();
        //数据包不是发往本机的,直接返回ACCEPT,之后执行ipv4_rcv_fin
        if (unlikely(etype != ETH_PKT_HOST))
        {
            return(INET_ACCEPT);
        }
        //填充内部dp_vs_iphdr,如果出错,主要是协议族不正确,直接返回ACCEPT
        if (dp_vs_fill_iphdr(af, mbuf, &iph) != EDPVS_OK)
        {
            return(INET_ACCEPT);
        }
        //处理ICMP消息,类似于linux内核中icmp_error相关的处理逻辑,暂时不展开
        if (unlikely(iph.proto == IPPROTO_ICMP ||
                     iph.proto == IPPROTO_ICMPV6))
        {
            /* handle related ICMP error to existing conn */
            verdict = dp_vs_in_icmp(af, mbuf, &related);
            if (related || verdict != INET_ACCEPT)
            {
                return(verdict);
            }
    
            /* let unrelated and valid ICMP goes down,
            * may implement ICMP fwd in the futher. */
        }
        //查找四层处理协议,目前实现了tcp,udp和icmp
        prot = dp_vs_proto_lookup(iph.proto);
        //如果未找到相关的传输层proto,返回ACCEPT
        if (unlikely(!prot))
        {
            return(INET_ACCEPT);
        }
    
        /*
         * Defrag ipvs-forwarding TCP/UDP is not supported for some reasons,
         *
         * - RSS/flow-director do not support TCP/UDP fragments, means it's
         *   not able to direct frags to same lcore as original TCP/UDP packets.
         * - per-lcore conn table will miss if frags reachs wrong lcore.
         *
         * If we redirect frags to "correct" lcore, it may cause performance
         * issue. Also it need to understand RSS algorithm. Moreover, for the
         * case frags in same flow are not occur in same lcore, a global lock is
         * needed, which is not a good idea.
         */
        //目前不支持ip分片,此处与flow director相关
        if (af == AF_INET && ip4_is_frag(ip4_hdr(mbuf)))
        {
            RTE_LOG(DEBUG, IPVS, "%s: frag not support.\\n", __func__);
            return(INET_DROP);
        }
    
        /* packet belongs to existing connection ? */
        //调用proto相关conn_lookup函数查找会话,tcp中为tcp_conn_lookup。有可能会 drop 掉。dir 是设置数据流方向,从client到LB,
        //还是从real server到LB,peer_cid为查找时决定处理该连接的lcore id
        conn = prot->conn_lookup(prot, &iph, mbuf, &dir, false, &drop, &peer_cid);
        //如果决定drop,则直接判定为INET_DROP动作后返回
        if (unlikely(drop))
        {
            RTE_LOG(DEBUG, IPVS, "%s: deny ip try to visit.\\n", __func__);
            return(INET_DROP);
        }
    
        /*
         * The connection is not locally found, however the redirect is found so
         * forward the packet to the remote redirect owner core.
         */
        //如果不在当前lcore上处理,则恢复mbuf->data_off指向L2 header后转发给其他lcore处理,此处ring enqueue成功后返回INET_STOLEN,
        //否则返回DROP丢弃数据包
        if (cid != peer_cid)
        {
            /* recover mbuf.data_off to outer Ether header */
            rte_pktmbuf_prepend(mbuf, (uint16_t)sizeof(struct ether_hdr));
    
            return(dp_vs_redirect_pkt(mbuf, peer_cid));
        }
        //对于新建的连接,肯定是没有会话的,conn_sched根据请求选择一个后端real server建立连接
        if (unlikely(!conn))
        {
            /* try schedule RS and create new connection */
            //调用proto中conn_sched接口选择一个后端rs建立连接,如果创建连接失败,返回verdict
            if (prot->conn_sched(prot, &iph, mbuf, &conn, &verdict) != EDPVS_OK)
            {
                /* RTE_LOG(DEBUG, IPVS, "%s: fail to schedule.\\n", __func__); */
                return(verdict);
            }
    
            /* only SNAT triggers connection by inside-outside traffic. */
            //snat模式,则是内部服务器访问外部服务,内网服务器--->dpvs--->外网服务器(baidu),所以设置dir=DPVS_CONN_DIR_OUTBOUND
            if (conn->dest->fwdmode == DPVS_FWD_MODE_SNAT)
            {
                dir = DPVS_CONN_DIR_OUTBOUND;
            }
            else
            {
                //其余模式设置dir=DPVS_CONN_DIR_INBOUND
                dir = DPVS_CONN_DIR_INBOUND;
            }
        }
        //特殊处理,syn_proxy
        if (conn->flags & DPVS_CONN_F_SYNPROXY)
        {
            if (dir == DPVS_CONN_DIR_INBOUND)
            {
                /* Filter out-in ack packet when cp is at SYN_SENT state.
                 * Drop it if not a valid packet, store it otherwise */
                if (0 == dp_vs_synproxy_filter_ack(mbuf, conn, prot,
                                                   &iph, &verdict))
                {
                    dp_vs_stats_in(conn, mbuf);
                    dp_vs_conn_put(conn);
                    return(verdict);
                }
    
                /* "Reuse" synproxy sessions.
                 * "Reuse" means update syn_proxy_seq struct
                 * and clean ack_mbuf etc. */
                if (0 != dp_vs_synproxy_ctrl_conn_reuse)
                {
                    if (0 == dp_vs_synproxy_reuse_conn(af, mbuf, conn, prot,
                                                       &iph, &verdict))
                    {
                        dp_vs_stats_in(conn, mbuf);
                        dp_vs_conn_put(conn);
                        return(verdict);
                    }
                }
            }
            else
            {
                /* Syn-proxy 3 logic: receive syn-ack from rs */
                if (dp_vs_synproxy_synack_rcv(mbuf, conn, prot,
                                              iph.len, &verdict) == 0)
                {
                    dp_vs_stats_out(conn, mbuf);
                    dp_vs_conn_put(conn);
                    return(verdict);
                }
            }
        }
        //状态迁移,tcp中为tcp_state_trans
        if (prot->state_trans)
        {
            err = prot->state_trans(prot, conn, mbuf, dir);
            if (err != EDPVS_OK)
            {
                RTE_LOG(WARNING, IPVS, "%s: fail to trans state.", __func__);
            }
        }
        conn->old_state = conn->state;
    
        /* holding the conn, need a "put" later. */
        //根据流量方向dir,来选择如何写数据
        if (dir == DPVS_CONN_DIR_INBOUND)
        {
            return(xmit_inbound(mbuf, prot, conn));
        }
        else
        {
            return(xmit_outbound(mbuf, prot, conn));
        }
    }
    
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值