2010年03月02日 星期二 15时55分20秒
在simpLB_2_6的基础上,加入了连接跟踪机制。现在可以用TCP协议的应用测试了 。
依然存在的缺陷:用于连接跟踪的定时器
1、simpLB.c
/*简单的轮询调度算法,添加了连接跟踪,可测试HTTP,但不稳定*/
/*用于连接跟踪的定时器,还不能释放超时连接*/
#include <linux/kernel.h>
#include <linux/tcp.h> /* for tcphdr */
#include <net/ip.h>
#include <net/tcp.h> /* for csum_tcpudp_magic */
#include <net/udp.h>
#include <net/icmp.h> /* for icmp_send */
#include <net/route.h> /* for ip_route_output */
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <linux/icmpv6.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/timer.h>
#include "sahu_lb_tools.h"
#define VPORT_TCP 80
#define DPORT_TCP 80
#define VPORT_UDP 4950
#define DPORT_UDP 4950
MODULE_LICENSE("GPL");
/* This is the structure we shall use to register our function */
/* IP address we want to Nat*/
static unsigned char *vmLB_ip = "/xc0/xa8/x7a/x01"; /* 192.168.122.1*/
static unsigned char *vm01_ip = "/xc0/xa8/x63/x65"; /* 192.168.99.101 */
static unsigned char *vm02_ip = "/xc0/xa8/x63/x66"; /* 192.168.99.102 */
static unsigned char *srv_list[2];
static int sahu_id = 0;
/* This is the hook function itself */
unsigned int sahu_pre_routing(unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
unsigned char *srv_addr=NULL;
char addr_str[16];
struct sk_buff *sb = skb;
struct iphdr *iph;
struct tcphdr *tcph;
struct sahu_lb_conn *cp;
if(!sb) return NF_ACCEPT;
iph = ip_hdr(sb);
if(!iph) return NF_ACCEPT;
if (iph->daddr == *(__be32 *)vmLB_ip){/*local in*/
srv_addr = srv_list[(sahu_id++)%2];
if(iph->protocol == IPPROTO_TCP){
tcph = (void *)skb_network_header(skb) + ip_hdrlen(skb) ;
cp = sahu_lb_conn_in_get(iph->protocol,iph->saddr,tcph->source,iph->daddr,tcph->dest);
if(!cp){
printk("A new connection/n");
cp = sahu_lb_conn_new(iph->protocol,iph->saddr,tcph->source,*(__be32 *)vmLB_ip,tcph->dest,*(__be32 *)srv_addr,tcph->dest);
}else{
srv_addr = (unsigned char *)&cp->daddr;
inet_i2str(cp->daddr,addr_str);
printk("Alreay has a connection:cp->daddr:%s/n",addr_str);
}
tcp_dnat_base(skb,*(__be32 *)vmLB_ip,*(__be32 *)srv_addr,htons(VPORT_TCP),htons(DPORT_TCP));
}else if(iph->protocol == IPPROTO_UDP){
udp_dnat_base(skb,*(__be32 *)vmLB_ip,*(__be32 *)srv_addr,htons(VPORT_UDP),htons(DPORT_UDP));
}else{
}
iph->daddr= *(unsigned int *)srv_addr;
ip_send_check(iph);
// skb->local_df = 1;
printk("DNat: %d.%d.%d.%d To:%d.%d.%d.%d/n",
*vmLB_ip, *(vmLB_ip + 1), *(vmLB_ip + 2),*(vmLB_ip + 3),
*srv_addr,*(srv_addr + 1), *(srv_addr + 2),*(srv_addr +3));
return NF_ACCEPT;
}else{
inet_i2str(iph->daddr,addr_str);
printk("No DNat for %s/n",addr_str);
return NF_ACCEPT;
}
}
unsigned int sahu_post_routing(unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
unsigned char *srv_addr=NULL;
char addr_str[16];
struct sk_buff *sb = skb;
struct iphdr *iph;
if(!sb) return NF_ACCEPT;
iph = ip_hdr(sb);
if(!iph) return NF_ACCEPT;
srv_addr = srv_list[(sahu_id+1)%2];
if (iph->saddr == *(unsigned int *)srv_addr){
if(iph->protocol == IPPROTO_TCP){
tcp_snat_base(skb,*(__be32 *)vmLB_ip,*(__be32 *)srv_addr,htons(VPORT_TCP),htons(DPORT_TCP));
}else if(iph->protocol == IPPROTO_UDP){
udp_snat_base(skb,*(__be32 *)vmLB_ip,*(__be32 *)srv_addr,htons(VPORT_UDP),htons(DPORT_UDP));
}else{
}
iph->saddr= *(unsigned int *)vmLB_ip;
ip_send_check(iph);
// skb->local_df = 1;
printk("SNat: %d.%d.%d.%d To:%d.%d.%d.%d/n",
*srv_addr, *(srv_addr + 1), *(srv_addr + 2),*(srv_addr + 3),
*vmLB_ip, *(vmLB_ip + 1), *(vmLB_ip + 2),*(vmLB_ip + 3));
return NF_ACCEPT;
}else{
inet_i2str(iph->saddr,addr_str);
printk("No SNat for %s/n",addr_str);
return NF_ACCEPT;
}
}
/* netfilter hooks in this kernel module*/
static struct nf_hook_ops sahu_ops[] __read_mostly = {
{
.hook = sahu_pre_routing,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_PRE_ROUTING,
.priority = 100,
},
{
.hook = sahu_post_routing,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_POST_ROUTING,
.priority = 100,
}
};
/* Initialisation routine */
int init_module()
{
int ret;
ret = nf_register_hooks(sahu_ops,ARRAY_SIZE(sahu_ops));
if(ret<0){
pr_info("can't install simpLB into kernel!/n");
}else{
pr_info("simpLB install into kernel!/n");
}
srv_list[0] = vm01_ip;
srv_list[1] = vm02_ip;
sahu_lb_conn_init();
return 0;
}
/* Cleanup routine */
void cleanup_module()
{
nf_unregister_hooks(sahu_ops,ARRAY_SIZE(sahu_ops));
sahu_lb_conn_cleanup();
pr_info("simpLB removed from kernel!/n");
}
2、sahu_lb_tools.h
#include <linux/random.h>
#include <linux/jhash.h>
#define SAHU_LB_CONN_TAB_SIZE 100
#define SAHU_LB_CONN_TIMEOUT (3*HZ)
static struct list_head *sahu_lb_conn_tab;
static unsigned int sahu_lb_conn_rnd;
/*struct: sahu_lb_conn*/
struct sahu_lb_conn{
struct list_head c_list;
__u32 caddr;
__u32 vaddr;
__u32 daddr;
__u16 cport;
__u16 vport;
__u16 dport;
__u16 protocol;
atomic_t refcnt;
struct timer_list timer;
volatile unsigned long timeout;
};
/*function: sahu_lb_conn_init*/
int sahu_lb_conn_init(void){
int idx;
sahu_lb_conn_tab = vmalloc(SAHU_LB_CONN_TAB_SIZE*sizeof(struct list_head *));
if(!sahu_lb_conn_tab){
printk("sahu_lb_conn_init: error/n");
return -1;
}
for(idx=0;idx<SAHU_LB_CONN_TAB_SIZE;idx++){
INIT_LIST_HEAD(&sahu_lb_conn_tab[idx]);
}
get_random_bytes(&sahu_lb_conn_rnd,sizeof(sahu_lb_conn_rnd));
return 0;
}
/*function: sahu_lb_conn_cleanup*/
void sahu_lb_conn_cleanup(void){
vfree(sahu_lb_conn_tab);
}
/*function: sahu_lb_conn_hashkey*/
static unsigned int sahu_lb_conn_hashkey(unsigned int proto,__u32 addr,__u16 port){
return jhash_3words((__force u32)addr,(__force u32)port,proto,sahu_lb_conn_rnd);
}
/*function: sahu_lb_conn_hash*/
static inline int sahu_lb_conn_hash(struct sahu_lb_conn *cp){
unsigned int hash;
hash = sahu_lb_conn_hashkey(cp->protocol,cp->caddr,cp->cport);
//list_add(&cp->c_list,&sahu_lb_conn_tab[hash]);
list_add(&cp->c_list,sahu_lb_conn_tab);
return 0;
}
/*function: sahu_lb_conn_unhash*/
static inline int sahu_lb_conn_unhash(struct sahu_lb_conn *cp){
unsigned int hash;
hash = sahu_lb_conn_hashkey(cp->protocol,cp->caddr,cp->cport);
list_del(&cp->c_list);
return 0;
}
/*function: sahu_lb_conn_expire*/
static void sahu_lb_conn_expire(unsigned long data){
// struct sahu_lb_conn *cp = (struct sahu_lb_conn *)data;
// if(atomic_read(&cp->refcnt)==0){
// sahu_lb_conn_unhash(cp);
// kfree(cp);
// del_timer_sync(&cp->timer);
// return;
// }
// del_timer(&cp->timer);
// cp->timer.expires = jiffies + SAHU_LB_CONN_TIMEOUT;
// add_timer(&cp->timer);
}
/*function: sahu_lb_conn_new*/
struct sahu_lb_conn *
sahu_lb_conn_new(int proto, __u32 caddr,__u16 cport,__u32 vaddr,__u16 vport,__u32 daddr,__u16 dport){
struct sahu_lb_conn *cp;
cp = kmalloc(sizeof(struct sahu_lb_conn),GFP_ATOMIC);
if(cp == NULL){
printk("sahu_lb_conn_new: no memory avaliable./n");
return NULL;
}
memset(cp,0,sizeof(*cp));
INIT_LIST_HEAD(&cp->c_list);
init_timer(&cp->timer);
cp->timer.data = (unsigned long)cp;
cp->timer.function = &sahu_lb_conn_expire;
cp->timer.expires = jiffies + SAHU_LB_CONN_TIMEOUT;
add_timer(&cp->timer);
cp->protocol = proto;
cp->caddr = caddr;
cp->cport = cport;
cp->vaddr = vaddr;
cp->vport = vport;
cp->daddr = daddr;
cp->dport = dport;
atomic_set(&cp->refcnt,0);
sahu_lb_conn_hash(cp);
return cp;
}
/*funciton: sahu_lb_conn_in_get*/
static inline struct sahu_lb_conn *
sahu_lb_conn_in_get(int protocol,__u32 caddr,__u16 cport,__u32 vaddr,__u16 vport){
unsigned int hash;
struct sahu_lb_conn *cp;
hash = sahu_lb_conn_hashkey(protocol,caddr,cport);
//list_for_each_entry(cp,&sahu_lb_conn_tab[hash],c_list){
list_for_each_entry(cp,sahu_lb_conn_tab,c_list){
if(caddr==cp->caddr && cport==cp->cport &&
vaddr==cp->vaddr && vport==cp->vport &&
protocol==cp->protocol){
atomic_set(&cp->refcnt,1);
return cp;
}
}
return NULL;
}
/*function: inet_i2str*/
int inet_i2str(unsigned int addr,char *addr_str){
unsigned char *p;
int i;
p=(unsigned char *)(&addr);
for(i=0;i<4;i++){
addr_str[i*4+0]=*(p+i)/100+'0';
addr_str[i*4+1]=*(p+i)/10-(*(p+i)/100)*10+'0';
addr_str[i*4+2]=*(p+i)%10+'0';
addr_str[i*4+3]='.';
}
addr_str[15]='/0';
return 0;
}
/*function: sahu_lb_check_diff4*/
static inline __wsum sahu_lb_check_diff4(__be32 old, __be32 new, __wsum oldsum){
__be32 diff[2] = { ~old, new };
return csum_partial(diff, sizeof(diff), oldsum);
}
/*function: sahu_lb_check_diff2*/
static inline __wsum sahu_lb_check_diff2(__be16 old, __be16 new, __wsum oldsum){
__be16 diff[2] = { ~old, new };
return csum_partial(diff, sizeof(diff), oldsum);
}
/*function: tcp_snat_base*/
static int tcp_snat_base(struct sk_buff *skb,__be32 vaddr,__be32 daddr,__be16 vport,__be16 dport){
struct tcphdr *tcph;
unsigned int tcphoff;
int oldlen;
tcphoff = ip_hdrlen(skb);
oldlen = skb->len - tcphoff;
tcph = (void *)skb_network_header(skb) + tcphoff;
tcph->source = vport;
tcph->check=
csum_fold(sahu_lb_check_diff4(daddr,vaddr,
sahu_lb_check_diff2(dport,vport,
~csum_unfold(tcph->check))));
if(skb->ip_summed==CHECKSUM_COMPLETE)
skb->ip_summed=CHECKSUM_NONE;
return 0;
}
/*function: tcp_dnat_base*/
static int tcp_dnat_base(struct sk_buff *skb,__be32 vaddr,__be32 daddr,__be16 vport,__be16 dport){
struct tcphdr *tcph;
unsigned int tcphoff;
int oldlen;
tcphoff = ip_hdrlen(skb);
oldlen = skb->len - tcphoff;
tcph = (void *)skb_network_header(skb) + tcphoff;
tcph->dest = dport;
tcph->check=
csum_fold(sahu_lb_check_diff4(vaddr,daddr,
sahu_lb_check_diff2(vport,dport,
~csum_unfold(tcph->check))));
if(skb->ip_summed==CHECKSUM_COMPLETE)
skb->ip_summed=CHECKSUM_NONE;
return 0;
}
/*function: udp_snat_base*/
static int udp_snat_base(struct sk_buff *skb,__be32 vaddr,__be32 daddr,__be16 vport,__be16 dport){
struct udphdr *udph;
unsigned int udphoff;
int oldlen;
udphoff = ip_hdrlen(skb);
oldlen = skb->len - udphoff;
udph = (void *)skb_network_header(skb) + udphoff;
udph->source = vport;
udph->check=
csum_fold(sahu_lb_check_diff4(daddr,vaddr,
sahu_lb_check_diff2(dport,vport,
~csum_unfold(udph->check))));
if(!udph->check){
udph->check = CSUM_MANGLED_0;
}
if(skb->ip_summed==CHECKSUM_COMPLETE)
skb->ip_summed=CHECKSUM_NONE;
return 0;
}
/*function: udp_dnat_base*/
static int udp_dnat_base(struct sk_buff *skb,__be32 vaddr,__be32 daddr,__be16 vport,__be16 dport){
struct udphdr *udph;
unsigned int udphoff;
int oldlen;
udphoff = ip_hdrlen(skb);
oldlen = skb->len - udphoff;
udph = (void *)skb_network_header(skb) + udphoff;
udph->dest = dport;
udph->check=
csum_fold(sahu_lb_check_diff4(vaddr,daddr,
sahu_lb_check_diff2(vport,dport,
~csum_unfold(udph->check))));
if(!udph->check){
udph->check = CSUM_MANGLED_0;
}
if(skb->ip_summed==CHECKSUM_COMPLETE)
skb->ip_summed=CHECKSUM_NONE;
return 0;
}
3、Makefile
obj-m +=simpLB.o
all:
make -C /lib/modules/`uname -r`/build M=`pwd`
clean:
make -C /lib/modules/`uname -r`/build M=`pwd` clean
install:
/sbin/insmod simpLB.ko
remove:
/sbin/rmmod simpLB
4、测试
测试环境和simpLB_2_6的相同。
可于Client上数次运行:lynx -dump 192.168.122.1,可以发现,Server_1和Server_2依次返回主页面。行如:
sahu@sahusoft:~/work/kernel$ lynxD vmLB
It works!
@vm01
sahu@sahusoft:~/work/kernel$ lynxD vmLB
It works!
@vm02
sahu@sahusoft:~/work/kernel$ lynxD vmLB
It works!
@vm01
sahu@sahusoft:~/work/kernel$ lynxD vmLB
It works!
@vm02