以tcp协议为例
struct ip_vs_protocol ip_vs_protocol_tcp = {
.name = "TCP",
.protocol = IPPROTO_TCP,
.dont_defrag = 0,
.appcnt = ATOMIC_INIT(0),
.init = ip_vs_tcp_init,
.exit = ip_vs_tcp_exit,
.register_app = tcp_register_app,
.unregister_app = tcp_unregister_app,
.conn_schedule = tcp_conn_schedule,
.conn_in_get = tcp_conn_in_get,
.conn_out_get = tcp_conn_out_get,
.snat_handler = tcp_snat_handler,
.dnat_handler = tcp_dnat_handler,
.csum_check = tcp_csum_check,
.state_name = tcp_state_name,
.state_transition = tcp_state_transition,
.app_conn_bind = tcp_app_conn_bind,
.debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = tcp_timeout_change,
.set_state_timeout = tcp_set_state_timeout,
};
IPVS定义的超时,和netfilter类似,不过比netfilter的超时少得多,而且这些值不是通过/proc调整,而是通过ipvsadm命令来调整
static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
[IP_VS_TCP_S_NONE] = 2*HZ,
[IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,
[IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,
[IP_VS_TCP_S_SYN_RECV] = 1*60*HZ,
[IP_VS_TCP_S_FIN_WAIT] = 2*60*HZ,
[IP_VS_TCP_S_TIME_WAIT] = 2*60*HZ,
[IP_VS_TCP_S_CLOSE] = 10*HZ,
[IP_VS_TCP_S_CLOSE_WAIT] = 60*HZ,
[IP_VS_TCP_S_LAST_ACK] = 30*HZ,
[IP_VS_TCP_S_LISTEN] = 2*60*HZ,
[IP_VS_TCP_S_SYNACK] = 120*HZ,
[IP_VS_TCP_S_LAST] = 2*HZ,
};
连接调度的目的是找到一个合适的目的服务器,生成新连接。该函数在ip_vs_in()函数中调用:pp->conn_schedule(af, skb, pp, &v, &cp),这样就调用了tcp_conn_schedule.
在ip_vs_in中 然后 调用 ret = cp->packet_xmit(skb, cp, pp); 如果packe_xmit 设置为nat方式,即ip_vs_nat_xmit函数,那么
其中调用:pp->dnat_handler(skb, pp, cp) tcp_dnat_handler
其中调用:ip_vs_app_pkt_in(cp, skb),对于ftp 会调用 ip_vs_ftp_in,在这个函数里 会 过滤 client发过来的数据包,对于主动连接,会找到PORT xxx 指令,进而建立一个connection,并把该connection的control connection 设置为 cp.
而如果packet_xmit 设置为DR或者tunnel方式,则不会调用dnat_handler,所以也不会调用到ip_vs_ftp_in。对于这两种xmit方式,都需要利用ipvsadm 设置 persistent属性。那这里是不是意味着 对于 nat方式的FTP,不需要设置 persistent connetion就可以工作呢?
static int tcp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp)
{
struct ip_vs_service *svc;
struct tcphdr _tcph, *th;
//取出tcp头
th = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_tcph), &_tcph);
if (th == NULL) {
*verdict = NF_DROP;
return 0;
}
//如果是syn包且有虚拟服务器
if (th->syn && (svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol, skb->nh.iph->daddr, th->dest))) {
if (ip_vs_todrop()) { //是否这虚拟服务器本身已经负载严重
ip_vs_service_put(svc);
*verdict = NF_DROP;
return 0;
}
*cpp = ip_vs_schedule(svc, skb); //建立ipvs连接
if (!*cpp) { //没有成功
*verdict = ip_vs_leave(svc, skb, pp); //后续处理,更新统计包,发送icmp不可达数据包等.
return 0;
}
ip_vs_service_put(svc);
}
return 1;
}
/* 很重要的函数
* IPVS main scheduling function
* It selects a server according to the virtual service, and creates a connection entry.
* Protocols supported: TCP, UDP
*/
struct ip_vs_conn *ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_conn *cp = NULL;
struct iphdr *iph = skb->nh.iph;
struct ip_vs_dest *dest;
__u16 _ports[2], *pptr;
//TCP/UDP头指针,[0]为源端口,[1]为目的端口
pptr = skb_header_pointer(skb, iph->ihl*4, sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
if (svc->flags & IP_VS_SVC_F_PERSISTENT) //处理持久服务器
return ip_vs_sched_persist(svc, skb, pptr);
//目的端口不等于服务端口,IPVS不处理该包
if (!svc->fwmark && pptr[1] != svc->port) {
if (!svc->port)
IP_VS_ERR("Schedule: port zero only supported in persistent services, check your ipvs configuration\n");
return NULL;
}
//调用调度器的调度函数获取一个目的服务器指针,调度器的调度函数看上面IPVS调度算法.
dest = svc->scheduler->schedule(svc, skb);
if (dest == NULL) {
IP_VS_DBG(1, "Schedule: no dest found.\n");
return NULL;
}
//新建一个IPVS连接
cp = ip_vs_conn_new(iph->protocol, iph->saddr, pptr[0], iph->daddr, pptr[1], dest->addr, dest->port ? dest->port : pptr[1], 0, dest);
if (cp == NULL)
return NULL;
//更新服务和连接相关计数器统计
ip_vs_conn_stats(cp, svc);
return cp;
}
建立一个新连接
struct ip_vs_conn * ip_vs_conn_new(int proto, __u32 caddr, __u16 cport, __u32 vaddr, __u16 vport,
__u32 daddr, __u16 dport, unsigned flags, struct ip_vs_dest *dest)
{
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
//从cache中分配连接
cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
if (cp == NULL) {
IP_VS_ERR_RL("ip_vs_conn_new: no memory available.\n");
return NULL;
}
memset(cp, 0, sizeof(*cp));
INIT_LIST_HEAD(&cp->c_list);
init_timer(&cp->timer);
cp->timer.data = (unsigned long)cp;
cp->timer.function = ip_vs_conn_expire; //连接超时函数
cp->protocol = proto;
cp->caddr = caddr;
cp->cport = cport;
cp->vaddr = vaddr;
cp->vport = vport;
cp->daddr = daddr;
cp->dport = dport;
cp->flags = flags;
spin_lock_init(&cp->lock);
atomic_set(&cp->refcnt, 1);// 引用初始值为1
atomic_set(&cp->n_control, 0);// 子连接数置0
atomic_set(&cp->in_pkts, 0);
atomic_inc(&ip_vs_conn_count);
if (flags & IP_VS_CONN_F_NO_CPORT)
atomic_inc(&ip_vs_conn_no_cport_cnt);
ip_vs_bind_dest(cp, dest); // 将连接和目的服务器进行绑定
cp->state = 0; //连接初始状态为0
cp->timeout = 3*HZ; // 缺省超时为3秒
ip_vs_bind_xmit(cp);// 绑定连接的数据包的发送方法
//绑定协议应用,其实目前只有TCP的FTP一种,所以用了unlikely
if (unlikely(pp && atomic_read(&pp->appcnt)))
//调用协议的app_conn_bind成员函数,对TCP协议来说就是tcp_app_conn_bind()函数
//只在NAT模式下有效
//检查该端口是否属于某多连接应用协议,是的话连接上绑定该协议处理, 相当于netfilter的连接的helper
ip_vs_bind_app(cp, pp);//实现 return pp->app_conn_bind(cp);
//将该连接节点加入到IPVS连接表中
ip_vs_conn_hash(cp);
return cp;
}
绑定连接目的服务器
static inline void ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
{
if (!dest)
return;
atomic_inc(&dest->refcnt);
//根据服务器情况设置连接标志,主要是用来确定连接数据包的发送方法
cp->flags |= atomic_read(&dest->conn_flags);
cp->dest = dest;
if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
/* It is a normal connection, so increase the inactive connection counter because it is in TCP SYNRECV
state (inactive) or other protocol inacive state */
atomic_inc(&dest->inactconns); // 增加目的服务器的不活动连接计数,目前还属于不活动连接
} else {
/* It is a persistent connection/template, so increase the peristent connection counter */
atomic_inc(&dest->persistconns); // 如果是永久连接或模板,增加目的服务器的永久连接计数
}
//检查目的服务器的连接数是否超载了
if (dest->u_threshold != 0 && ip_vs_dest_totalconns(dest) >= dest->u_threshold)
dest->flags |= IP_VS_DEST_F_OVERLOAD;
}
绑定发送方法,参看下面发送方法实现
static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
{