IPVS的实现利用了Netfilter的三个Hook点,分别是:NF_INET_LOCAL_IN、NF_INET_LOCAL_OUT和NF_INET_FORWARD。在每个Hook点,IPVS注册了两个钩子函数。如下所示:
static const struct nf_hook_ops ip_vs_ops[] = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply4,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC - 2,
},
/* After packet filtering, forward packet through VS/DR, VS/TUN,
* or VS/NAT(change destination), so that filtering rules can be applied to IPVS. */
{
.hook = ip_vs_remote_request4,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC - 1,
},
/* Before ip_vs_in, change source only for VS/NAT */
{
.hook = ip_vs_local_reply4,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST + 1,
},
/* After mangle, schedule and forward local requests */
{
.hook = ip_vs_local_request4,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST + 2,
},
/* After packet filtering (but before ip_vs_out_icmp), catch icmp destined for 0.0.0.0/0, which is for incoming IPVS connections */
{
.hook = ip_vs_forward_icmp,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_FORWARD,
.priority = 99,
},
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply4,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_FORWARD,
.priority = 100,
},
}
如下表所示,IPVS中对于Request和Reply的定义,是按照由外部客户端到IPVS内部的报文为Request;而由IPVS内部回复到外部客户端的报文为Reply。所以,Hook函数的命名中带有request的都对应IPVS核心函数ip_vs_in;而Hook函数命名中带有reply的函数都对应IPVS的核心函数ip_vs_out。
HOOK | 函数 | 核心函数 | Priority |
---|---|---|---|
NF_INET_LOCAL_IN | ip_vs_reply4 | ip_vs_out | NF_IP_PRI_NAT_SRC - 2 |
NF_INET_LOCAL_IN | ip_vs_remote_request4 | ip_vs_in | NF_IP_PRI_NAT_SRC - 1 |
NF_INET_LOCAL_OUT | ip_vs_local_reply4 | ip_vs_out | NF_IP_PRI_NAT_DST + 1 |
NF_INET_LOCAL_OUT | ip_vs_local_request4 | ip_vs_in | NF_IP_PRI_NAT_DST + 2 |
NF_INET_FORWARD | ip_vs_forward_icmp | ip_vs_in_icmp | 99 |
NF_INET_FORWARD | ip_vs_reply4 | ip_vs_out | 100 |
Hook点LOCAL_IN
在Hook点NF_INET_LOCAL_IN上,IPVS挂载了两个函数ip_vs_reply4和ip_vs_remote_request4,其中前者优先级高于后者。前者ip_vs_reply4主要用于NAT/Masq转发模式,其核心处理函数为ip_vs_out,负责处理IPVS系统回复给外部客户端的报文,包括修改IP地址等。
由于是回复报文,要求系统中已经存在连接,否则处理IPVS系统中真实服务器可能发送的ICMP报文。
static unsigned int ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
{
/* Check if the packet belongs to an existing entry
*/
cp = pp->conn_out_get(ipvs, af, skb, &iph);
if (likely(cp)) {
if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
goto ignore_cp;
return handle_response(af, skb, pd, cp, &iph, hooknum);
}
if (sysctl_nat_icmp_send(ipvs) &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP ||
pp->protocol == IPPROTO_SCTP)) {
另外一个Hook函数ip_vs_remote_request4,其核心函数为ip_vs_in,负责处理由外部客户端进入IPVS系统的报文,如果没有可用的连接,将使用调度函数进行调度处理,创建连接结构。
static unsigned int ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
{
if (unlikely(!cp)) {
int v;
if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph))
return v;
}
ip_vs_in_stats(cp, skb);
ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
if (cp->packet_xmit)
ret = cp->packet_xmit(skb, cp, pp, &iph);
}
Hook点LOCAL_OUT
与前一节的NF_INET_LOCAL_IN Hook点不同,此处的NF_INET_LOCAL_OUT的Hook点用于处理IPVS本机发送的报文。而前者用于处理外部客户端进入IPVS系统的报文。
在Hook点NF_INET_LOCAL_OUT上,IPVS挂载了两个函数ip_vs_local_reply4和ip_vs_local_request4,其中前者优先级高于后者。前者ip_vs_local_reply4的核心函数为ip_vs_out,主要用于NAT/Masq转发模式,负责NAT地址的修改。
函数ip_vs_local_request4的核心函数为ip_vs_in,其负责处理由本机应用层进入IPVS系统的报文的调度和发送。
Hook点FORWARD
在Hook点NF_INET_FORWARD上,IPVS挂载了两个函数ip_vs_forward_icmp和ip_vs_reply4,其中前者优先级高于后者。前者ip_vs_forward_icmp的核心处理函数为ip_vs_in_icmp,用于处理外部进入IPVS系统的ICMP报文,将其调度到对应的真实服务器上。
static int ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, unsigned int hooknum)
{
cp = pp->conn_in_get(ipvs, AF_INET, skb, &ciph);
if (!cp) {
int v;
if (!sysctl_schedule_icmp(ipvs))
return NF_ACCEPT;
if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
return v;
new_cp = true;
}
verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph);
函数ip_vs_reply4,核心函数为ip_vs_out,主要用于NAT/Masq转发模式,负责NAT地址的修改。对于真实服务器回复的报文,其目的地址为外部客户端的地址,非IPVS系统的虚拟地址,所以其将进入此转发Hook点,此时进行SNAT转换,将源地址转换为IPVS的虚拟地址。
内核版本 4.15