简介
-
LB只处理进入的请求,将流量分发给后端,返回数据由Real Server直接返回到Client,所以模式叫Direct Routing
-
原理:修改二层头mac地址,所以它的局限性也很明显,只能在同一个二层,不能跨网段。由于直接返回给client,一般不会面对公网用户
-
部署图
DR Mode配置(one-arm)
-
To use DR
- dpvs needs a LAN IP first. (for one-arm, it must be different from VIP)
- the RS and DPVS must in same sub-network (on-link)
- On RS: VIP must be added to its lo interface
- On RS: arp_ignore must be set to lo interface
- DPVS need a RS-faced IP itself (here means "LAN-side" IP, it's not the same conception as Local-IP (LIP) used by FNAT, just a normal IP address). Because DPVS need communicated with RS es. For one-arm, this LAN IP and VIP are on same DPDK interface. But they cannot be same, because VIP will also be set on RSes, if we do not use a separated LAN-IP, RSes will not reply the ARP request. Furthermore, the LAN-IP of DPVS must be added before VIP. For tow-arm DR, DPVS also need a LAN side IP to talk with LAN-side hosts, while VIP is configured on client-faced (WAN) interface.
-
On DPVS, The DR configuration can be,
# on DPVS # add LAN IP for DPVS, it must be different from VIP # and must be added before VIP ./dpip addr add 192.168.100.1/24 dev dpdk0 # add VIP and the route will generate automatically ./dpip addr add 192.168.100.254/32 dev dpdk0 # route for LAN network, just a hint # ./dpip route add 192.168.100.0/24 dev dpdk0 # add service <VIP:vport> to forwarding, scheduling mode is RR # use ipvsadm --help for more info ./ipvsadm -A -t 192.168.100.254:80 -s rr # add two RS for service, forwarding mode is DR ./ipvsadm -a -t 192.168.100.254:80 -r 192.168.100.2 -g ./ipvsadm -a -t 192.168.100.254:80 -r 192.168.100.3 -g
-
And then on RS es,
# for each real server rs$ ip addr add 192.168.100.254/32 dev lo # ignore arp on lo rs$ sysctl -w net.ipv4.conf.lo.arp_ignore=1 net.ipv4.conf.lo.arp_ignore = 1
-
Try if client can access VIP with DR mode
client$ curl 192.168.100.254 You ip:port: 192.168.100.46:13862
原理解析
-
dp_vs_conn_bind_dest
static int dp_vs_conn_bind_dest(struct dp_vs_conn *conn, struct dp_vs_dest *dest) { ... switch (dest->fwdmode) { ... case DPVS_FWD_MODE_DR: conn->packet_xmit = dp_vs_xmit_dr; break; ... } conn->dest = dest; return(EDPVS_OK); }
-
dp_vs_in
static int __dp_vs_in(void *priv, struct rte_mbuf *mbuf, const struct inet_hook_state *state, int af) { ... /* holding the conn, need a "put" later. */ //根据流量方向dir,来选择如何写数据 if (dir == DPVS_CONN_DIR_INBOUND) { return(xmit_inbound(mbuf, prot, conn)); } else { return(xmit_outbound(mbuf, prot, conn)); } }
-
dp_vs_xmit_dr
- DR模式inbound方向传输函数
- DR没有outbond方向传输函数
- DR模式下直接修改目的mac地址转发至real server
int dp_vs_xmit_dr(struct dp_vs_proto *proto, struct dp_vs_conn *conn, struct rte_mbuf *mbuf) { int af = conn->af; assert(af == AF_INET || af == AF_INET6); //包裹函数,根据协议族,调用不同的处理函数 return af == AF_INET ? __dp_vs_xmit_dr4(proto, conn, mbuf) : __dp_vs_xmit_dr6(proto, conn, mbuf); } static int __dp_vs_xmit_dr4(struct dp_vs_proto *proto, struct dp_vs_conn *conn, struct rte_mbuf *mbuf) { struct flow4 fl4; struct ipv4_hdr *iph = ip4_hdr(mbuf); struct route_entry *rt; int err, mtu; if (unlikely(mbuf->userdata != NULL)) { RTE_LOG(WARNING, IPVS, "%s: Already have route %p ?\\n", __func__, mbuf->userdata); route4_put((struct route_entry *)mbuf->userdata); } //查找输出路由 memset(&fl4, 0, sizeof(struct flow4)); fl4.fl4_daddr.s_addr = conn->daddr.in.s_addr; fl4.fl4_saddr.s_addr = iph->src_addr; fl4.fl4_tos = iph->type_of_service; rt = route4_output(&fl4); if (!rt) { err = EDPVS_NOROUTE; goto errout; } /* dr xmit support cache of route to rs*/ dp_vs_conn_cache_rt(conn, rt, true); //判断mtu,是否需要分片,如果mbuf要求不分片的话,回复icmp错误消息 mtu = rt->mtu; if (mbuf->pkt_len > mtu && (iph->fragment_offset & htons(IPV4_HDR_DF_FLAG))) { RTE_LOG(DEBUG, IPVS, "%s: frag needed.\\n", __func__); icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_UNREACH_NEEDFRAG, htonl(mtu)); err = EDPVS_FRAG; goto errout; } //设置mbuf的类型为ipv4数据 mbuf->packet_type = ETHER_TYPE_IPv4; //通过邻居系统将mbuf发送出去 err = neigh_output(AF_INET, (union inet_addr *)&conn->daddr.in, mbuf, rt->port); route4_put(rt); return err; errout: if (rt) route4_put(rt); rte_pktmbuf_free(mbuf); return err; }