-
__dp_vs_in
- 开始进入lvs模块,这块代码非常复杂,简单说,对于存在proxy连接,判断方向(client->LB或是rs->LB),直接发送流量.新来的连接,查找virtual server,根据LB算法查找对应后端real server,建立连接,并保存这个会话
static int __dp_vs_in(void *priv, struct rte_mbuf *mbuf, const struct inet_hook_state *state, int af) { struct dp_vs_iphdr iph; struct dp_vs_proto *prot; struct dp_vs_conn * conn; int dir, verdict, err, related; bool drop = false; lcoreid_t cid, peer_cid; //获取L2层数据包类型 eth_type_t etype = mbuf->packet_type; /* FIXME: use other field ? */ assert(mbuf && state); //获取当前运行的lcore id cid = peer_cid = rte_lcore_id(); //数据包不是发往本机的,直接返回ACCEPT,之后执行ipv4_rcv_fin if (unlikely(etype != ETH_PKT_HOST)) { return(INET_ACCEPT); } //填充内部dp_vs_iphdr,如果出错,主要是协议族不正确,直接返回ACCEPT if (dp_vs_fill_iphdr(af, mbuf, &iph) != EDPVS_OK) { return(INET_ACCEPT); } //处理ICMP消息,类似于linux内核中icmp_error相关的处理逻辑,暂时不展开 if (unlikely(iph.proto == IPPROTO_ICMP || iph.proto == IPPROTO_ICMPV6)) { /* handle related ICMP error to existing conn */ verdict = dp_vs_in_icmp(af, mbuf, &related); if (related || verdict != INET_ACCEPT) { return(verdict); } /* let unrelated and valid ICMP goes down, * may implement ICMP fwd in the futher. */ } //查找四层处理协议,目前实现了tcp,udp和icmp prot = dp_vs_proto_lookup(iph.proto); //如果未找到相关的传输层proto,返回ACCEPT if (unlikely(!prot)) { return(INET_ACCEPT); } /* * Defrag ipvs-forwarding TCP/UDP is not supported for some reasons, * * - RSS/flow-director do not support TCP/UDP fragments, means it's * not able to direct frags to same lcore as original TCP/UDP packets. * - per-lcore conn table will miss if frags reachs wrong lcore. * * If we redirect frags to "correct" lcore, it may cause performance * issue. Also it need to understand RSS algorithm. Moreover, for the * case frags in same flow are not occur in same lcore, a global lock is * needed, which is not a good idea. */ //目前不支持ip分片,此处与flow director相关 if (af == AF_INET && ip4_is_frag(ip4_hdr(mbuf))) { RTE_LOG(DEBUG, IPVS, "%s: frag not support.\\n", __func__); return(INET_DROP); } /* packet belongs to existing connection ? */ //调用proto相关conn_lookup函数查找会话,tcp中为tcp_conn_lookup。有可能会 drop 掉。dir 是设置数据流方向,从client到LB, //还是从real server到LB,peer_cid为查找时决定处理该连接的lcore id conn = prot->conn_lookup(prot, &iph, mbuf, &dir, false, &drop, &peer_cid); //如果决定drop,则直接判定为INET_DROP动作后返回 if (unlikely(drop)) { RTE_LOG(DEBUG, IPVS, "%s: deny ip try to visit.\\n", __func__); return(INET_DROP); } /* * The connection is not locally found, however the redirect is found so * forward the packet to the remote redirect owner core. */ //如果不在当前lcore上处理,则恢复mbuf->data_off指向L2 header后转发给其他lcore处理,此处ring enqueue成功后返回INET_STOLEN, //否则返回DROP丢弃数据包 if (cid != peer_cid) { /* recover mbuf.data_off to outer Ether header */ rte_pktmbuf_prepend(mbuf, (uint16_t)sizeof(struct ether_hdr)); return(dp_vs_redirect_pkt(mbuf, peer_cid)); } //对于新建的连接,肯定是没有会话的,conn_sched根据请求选择一个后端real server建立连接 if (unlikely(!conn)) { /* try schedule RS and create new connection */ //调用proto中conn_sched接口选择一个后端rs建立连接,如果创建连接失败,返回verdict if (prot->conn_sched(prot, &iph, mbuf, &conn, &verdict) != EDPVS_OK) { /* RTE_LOG(DEBUG, IPVS, "%s: fail to schedule.\\n", __func__); */ return(verdict); } /* only SNAT triggers connection by inside-outside traffic. */ //snat模式,则是内部服务器访问外部服务,内网服务器--->dpvs--->外网服务器(baidu),所以设置dir=DPVS_CONN_DIR_OUTBOUND if (conn->dest->fwdmode == DPVS_FWD_MODE_SNAT) { dir = DPVS_CONN_DIR_OUTBOUND; } else { //其余模式设置dir=DPVS_CONN_DIR_INBOUND dir = DPVS_CONN_DIR_INBOUND; } } //特殊处理,syn_proxy if (conn->flags & DPVS_CONN_F_SYNPROXY) { if (dir == DPVS_CONN_DIR_INBOUND) { /* Filter out-in ack packet when cp is at SYN_SENT state. * Drop it if not a valid packet, store it otherwise */ if (0 == dp_vs_synproxy_filter_ack(mbuf, conn, prot, &iph, &verdict)) { dp_vs_stats_in(conn, mbuf); dp_vs_conn_put(conn); return(verdict); } /* "Reuse" synproxy sessions. * "Reuse" means update syn_proxy_seq struct * and clean ack_mbuf etc. */ if (0 != dp_vs_synproxy_ctrl_conn_reuse) { if (0 == dp_vs_synproxy_reuse_conn(af, mbuf, conn, prot, &iph, &verdict)) { dp_vs_stats_in(conn, mbuf); dp_vs_conn_put(conn); return(verdict); } } } else { /* Syn-proxy 3 logic: receive syn-ack from rs */ if (dp_vs_synproxy_synack_rcv(mbuf, conn, prot, iph.len, &verdict) == 0) { dp_vs_stats_out(conn, mbuf); dp_vs_conn_put(conn); return(verdict); } } } //状态迁移,tcp中为tcp_state_trans if (prot->state_trans) { err = prot->state_trans(prot, conn, mbuf, dir); if (err != EDPVS_OK) { RTE_LOG(WARNING, IPVS, "%s: fail to trans state.", __func__); } } conn->old_state = conn->state; /* holding the conn, need a "put" later. */ //根据流量方向dir,来选择如何写数据 if (dir == DPVS_CONN_DIR_INBOUND) { return(xmit_inbound(mbuf, prot, conn)); } else { return(xmit_outbound(mbuf, prot, conn)); } }
dpvs中lvs核心处理函数:dp_vs_in
最新推荐文章于 2024-07-24 15:11:20 发布