有一个问题:IPVS中的local client 指什么? 在2.6.32的代码中,多了对于Local client的判断。
查阅IPVS的官方文档http://www.linuxvirtualserver.org/Documents.html, 可以看到所谓Local client 就是指director本身也作为一个server的情况,在这种情况下,director可以处理packet localy,而不是进行转发,所以称为local client(我觉得这里叫local server 更合适)。
对于local client的情况,如果要将包发送给本地,那么在ip_vs_in中会创建这样的一个connection entry。现在对于local client在ip_vs_in中的处理还是没弄明白。理论上,对于从local server发给client 的包,它不会经过forward这个点,但是会经过localOutput这个点。所以,可以在localOutput这个点,添加钩子,并判断数据包是不是conn_out_get,然后调用handle_response进行处理。但是,现在在程序中,把这些处理放到了ip_vs_in函数中,它是添加在localInput这个hook点的,不符合逻辑。
所以,合理的推断是,第一次数据包进入ip_vs_in时,创建了connection entry。然后,经过ip_vs_in的处理,将目的地址和端口改成了DR的另一个地址和端口,然后将包发送。发送之后,数据包会再次经过ip_vs_in。也就是说 从内核发往本地的另一个网口的包,会再次的经过ip_rcv。目前我还没有找到这个调用过程,但是我猜是这样的(这个推断应该是正确的)。
另外,在IPVS中,对于connection,从client(sip, sport)->server(dip,dport)和server(dip,dport)->client(sip,sport)的双向的数据为同一个connection entry。
对于tcp和udp,在协议中都包含连接查找函数了conn_in_get和conn_out_get,分别用于正向连接查找和反向查找。它们最终调用的也分别是__ip_vs_conn_in_get和__ip_vs_conn_out_get,在这两个函数,对于正向和反向,最后查找到了的是同一个connection。
另外,conn_out_get只用在NAT的情况 和 local client的情况。
static struct nf_hook_ops ip_vs_in_ops = {
.hook = ip_vs_in,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_IP_LOCAL_IN,
.priority = 100,
};
ip_vs_in 是所有从client发过来的数据包的总入口。也就是说数据包首先经过ip_vs_in的处理。
/*
* Check if it's for virtual services, look it up,
* and send it on its way...
*/
static unsigned int
ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
int ret, restart, af, pkts;
af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
/*
* Big tappo: only PACKET_HOST, including loopback for local client
* Don't handle local packets on IPv6 for now
*/
if (unlikely(skb->pkt_type != PACKET_HOST)) {
IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
skb->pkt_type,
iph.protocol,
IP_VS_DBG_ADDR(af, &iph.daddr));
return NF_ACCEPT;
}
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related, verdict = ip_vs_in_icmp_v6(skb, &related, hooknum);
if (related)
return verdict;
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
} else
#endif
if (unlikely(iph.protocol == IPPROTO_ICMP)) {
int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
if (related)
return verdict;
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
/* Protocol supported? */
pp = ip_vs_proto_get(iph.protocol);
if (unlikely(!pp))
return NF_ACCEPT;
/*
* Check if the packet belongs to an existing connection entry
*/
cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
if (unlikely(!cp)) {
int v;
/* For local client packets, it could be a response ??? wusq*/
cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
if (cp)
return handle_response(af, skb, pp, cp, iph.len);
if (!pp->conn_schedule(af, skb, pp, &v, &cp))
return v;
}
if (unlikely(!cp)) {
/* sorry, all this trouble for a no-hit :) */
IP_VS_DBG_PKT(12, pp, skb, 0,
"packet continues traversal as normal");
return NF_ACCEPT;
}
IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet");
/* Check the server status */
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */
if (sysctl_ip_vs_expire_nodest_conn) {
/* try to expire the connection immediately */
ip_vs_conn_expire_now(cp);
}
/* don't restart its timer, and silently
drop the packet. */
__ip_vs_conn_put(cp);
return NF_DROP;
}
ip_vs_in_stats(cp, skb);
restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
if (cp->packet_xmit)
ret = cp->packet_xmit(skb, cp, pp);
/* do not touch skb anymore */
else {
IP_VS_DBG_RL("warning: packet_xmit is null");
ret = NF_ACCEPT;
}
/* Increase its packet counter and check if it is needed
* to be synchronized
*
* Sync connection if it is about to close to
* encorage the standby servers to update the connections timeout
*/
pkts = atomic_add_return(1, &cp->in_pkts);
if (af == AF_INET &&
(ip_vs_sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
cp->state == IP_VS_TCP_S_ESTABLISHED) &&
(pkts % sysctl_ip_vs_sync_threshold[1]
== sysctl_ip_vs_sync_threshold[0])) ||
((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
(cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
(cp->state == IP_VS_TCP_S_TIME_WAIT)))))
ip_vs_sync_conn(cp);
cp->old_state = cp->state;
ip_vs_conn_put(cp);
return ret;
}
forward处的钩子。这个函数对转发包进行处理, 只用在NAT模式的均衡处理,处理的是服务器返回的包,因为TUNNEL和DR方式下都是直接发给了client,不经过load balancer的处理。客户端请求的包也不经过这个hook,客户端的请求包经过的是local in的hook。
但如果设置了DNAT规则,数据包在PREROUTING点进行了目的地址修改,这样就不会再进入INPUT点而是直接转到FORWARD点处理,这时时针对该包的 IPVS连接是没有建立的.
static struct nf_hook_ops ip_vs_out_ops =