在此之前,如果不懂IP分片技术的话,请参照这里。IP分片技术比较简单暴力,没有TCP那样复杂复杂的窗口协议。基本上只是暴力的拆分和重组,代码基本在ip_defragment.c中。
先从总体上说说。首先,每个IP(主机)都会有IP分片包(注意是IP,不是IP对)。所以,每个IP都有一个如下的结构体来维护上面的所以IP分片:
struct hostfrags {
struct ipq *ipqueue;//这里维护IP碎片队列
int ip_frag_mem;
u_int ip;//主机对应的IP地址
//很明显,下面三行告诉我们,这是哈希表的一个元素
int hash_index;
struct hostfrags *prev;
struct hostfrags *next;
};
//下面这个就是维护所有IP的哈希表了。
static struct hostfrags **fragtable;
每个IP下面又有很多的被分片的IP包——IP碎片队列,IP碎片队列的定义在这:
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
unsigned char *mac; /* pointer to MAC header */
struct ip *iph; /* pointer to IP header */
int len; /* total length of original datagram */
short ihlen; /* length of the IP header */
short maclen; /* length of the MAC header */
struct timer_list timer; /* when will this queue expire? */
struct ipfrag *fragments; /* linked list of received fragments */
struct hostfrags *hf;
struct ipq *next; /* linked list pointers */
struct ipq *prev;
// struct device *dev; /* Device - for icmp replies */
};
最终的IP碎片的定义在这:
/* Describe an IP fragment. */
struct ipfrag
{
int offset; /* offset of fragment in IP datagram */
int end; /* last byte of data in datagram */
int len; /* length of this fragment */
struct sk_buff *skb; /* complete received fragment */
unsigned char *ptr; /* pointer into real fragment data */
struct ipfrag *next; /* linked list pointers */
struct ipfrag *prev;
};
由于libnids中的分片重组代码是从内核中拿出来修改的,所以保留了内核的注释。这里就不多做解释了。
好了步入处理逻辑,照例,先看初始化:
void ip_frag_init(int n)
{
struct timeval tv;
gettimeofday(&tv, 0);
time0 = tv.tv_sec;
fragtable = (struct hostfrags **) calloc(n, sizeof(struct hostfrags *));
if (!fragtable)
nids_params.no_mem("ip_frag_init");
hash_size = n;
}
简单到不能再简单——分片了一个主机的哈希表。分完手工。好吧,看重组逻辑:
//先是判断是否为分片的函数
int ip_defrag_stub(struct ip *iph, struct ip **defrag)
{
int offset, flags, tot_len;
struct sk_buff *skb;
numpack++;
//先处理超时事件
timenow = 0;//刷新时间
while (timer_head && timer_head->expires < jiffies())
{
this_host = ((struct ipq *) (timer_head->data))->hf;
timer_head->function(timer_head->data);
}
//然后计算分片的偏移
offset = ntohs(iph->ip_off);
flags = offset & ~IP_OFFSET;
offset &= IP_OFFSET;
//此包不是分片
if (((flags & IP_MF) == 0) && (offset == 0))
{
ip_defrag(iph, 0);
return IPF_NOTF;
}
//此包是分片,先申请一个sk_buff把分片的数据保存起来,然后交给defrag函数
tot_len = ntohs(iph->ip_len);
skb = (struct sk_buff *) malloc(tot_len + sizeof(struct sk_buff));
if (!skb)
nids_params.no_mem("ip_defrag_stub");
skb->data = (char *) (skb + 1);
memcpy(skb->data, iph, tot_len);
skb->truesize = tot_len + 16 + nids_params.dev_addon;
skb->truesize = (skb->truesize + 15) & ~15;
skb->truesize += nids_params.sk_buff_size;
//如果集齐了一个ip包的所有分片ip_defrag将返回合并后的ip包,此时返回IPF_NEW,进行下一步的ip包处理
//否则,返回IPF_ISF,跳过ip包处理
if ((*defrag = (struct ip *)ip_defrag((struct ip *) (skb->data), skb)))
return IPF_NEW;
return IPF_ISF;
}
/* Process an incoming IP datagram fragment. */
//这里就是分片重组的主要逻辑了
static char *ip_defrag(struct ip *iph, struct sk_buff *skb)
{
struct ipfrag *prev, *next, *tmp;
struct ipfrag *tfp;
struct ipq *qp;
char *skb2;
unsigned char *ptr;
int flags, offset;
int i, ihl, end;
//如果是分片,而且host哈希表里还没有对应的host项的话,果断新建一个
//此处还负责将this_host变量设为当前ip对应的host
if (!hostfrag_find(iph) && skb)
hostfrag_create(iph);
/* Start by cleaning up the memory. */
//内存用太多了,panic之,然后释放当前host分片所用的内存
if (this_host)
if (this_host->ip_frag_mem > IPFRAG_HIGH_THRESH)
ip_evictor();
/* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
//这里,找到这个ip包对应的ip分片链表
if (this_host)
qp = ip_find(iph);
else
qp = 0;
/* Is this a non-fragmented datagram? */
offset = ntohs(iph->ip_off);
flags = offset & ~IP_OFFSET;
offset &= IP_OFFSET;
if (((flags & IP_MF) == 0) && (offset == 0))
{
if (qp != NULL)
ip_free(qp); /* Fragmented frame replaced by full
unfragmented copy */
return 0;
}
/* ip_evictor() could have removed all queues for the current host */
if (!this_host)
hostfrag_create(iph);
offset <<= 3; /* offset is in 8-byte chunks */
ihl = iph->ip_hl * 4;
/*
If the queue already existed, keep restarting its timer as long as
we still are receiving fragments. Otherwise, create a fresh queue
entry.
*/
//如果当前host下来过此包的碎片
if (qp != NULL)
{
/* ANK. If the first fragment is received, we should remember the correct
IP header (with options) */
if (offset == 0)
{
qp->ihlen = ihl;
memcpy(qp->iph, iph, ihl + 8);
}
del_timer(&qp->timer);
qp->timer.expires = jiffies() + IP_FRAG_TIME; /* about 30 seconds */
qp->timer.data = (unsigned long) qp; /* pointer to queue */
qp->timer.function = ip_expire; /* expire function */
add_timer(&qp->timer);
}
//否则新建一个碎片队列
else
{
/* If we failed to create it, then discard the frame. */
if ((qp = ip_create(iph)) == NULL)
{
kfree_skb(skb, FREE_READ);
return NULL;
}
}
/* Attempt to construct an oversize packet. */
//再大的ip包也不能大过65535啊,一经发现,直接放弃
if (ntohs(iph->ip_len) + (int) offset > 65535)
{
// NETDEBUG(printk("Oversized packet received from %s\n", int_ntoa(iph->ip_src.s_addr)));
nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERSIZED, iph, 0);
kfree_skb(skb, FREE_READ);
return NULL;
}
//下面就开始在碎片队列里面找位置了,同时处理好重叠
//如果有重叠,把重叠的旧的部分去掉
/* Determine the position of this fragment. */
end = offset + ntohs(iph->ip_len) - ihl;
/* Point into the IP datagram 'data' part. */
ptr = (unsigned char *)(skb->data + ihl);
/* Is this the final fragment? */
if ((flags & IP_MF) == 0)
qp->len = end;
/*
Find out which fragments are in front and at the back of us in the
chain of fragments so far. We must know where to put this
fragment, right?
*/
prev = NULL;
for (next = qp->fragments; next != NULL; next = next->next)
{
if (next->offset >= offset)
break; /* bingo! */
prev = next;
}
/*
We found where to put this one. Check for overlap with preceding
fragment, and, if needed, align things so that any overlaps are
eliminated.
*/
if (prev != NULL && offset < prev->end)
{
nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERLAP, iph, 0);
i = prev->end - offset;
offset += i; /* ptr into datagram */
ptr += i; /* ptr into fragment data */
}
/*
Look for overlap with succeeding segments.
If we can merge fragments, do it.
*/
for (tmp = next; tmp != NULL; tmp = tfp)
{
tfp = tmp->next;
if (tmp->offset >= end)
break; /* no overlaps at all */
nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERLAP, iph, 0);
i = end - next->offset; /* overlap is 'i' bytes */
tmp->len -= i; /* so reduce size of */
tmp->offset += i; /* next fragment */
tmp->ptr += i;
/*
If we get a frag size of <= 0, remove it and the packet that it
goes with. We never throw the new frag away, so the frag being
dumped has always been charged for.
*/
if (tmp->len <= 0)
{
if (tmp->prev != NULL)
tmp->prev->next = tmp->next;
else
qp->fragments = tmp->next;
if (tmp->next != NULL)
tmp->next->prev = tmp->prev;
next = tfp; /* We have killed the original next frame */
frag_kfree_skb(tmp->skb, FREE_READ);
frag_kfree_s(tmp, sizeof(struct ipfrag));
}
}
//下面往队列中插入当前碎片
/* Insert this fragment in the chain of fragments. */
tfp = NULL;
tfp = ip_frag_create(offset, end, skb, ptr);
/*
No memory to save the fragment - so throw the lot. If we failed
the frag_create we haven't charged the queue.
*/
if (!tfp)
{
nids_params.no_mem("ip_defrag");
kfree_skb(skb, FREE_READ);
return NULL;
}
/* From now on our buffer is charged to the queues. */
tfp->prev = prev;
tfp->next = next;
if (prev != NULL)
prev->next = tfp;
else
qp->fragments = tfp;
if (next != NULL)
next->prev = tfp;
/*
OK, so we inserted this new fragment into the chain. Check if we
now have a full IP datagram which we can bump up to the IP
layer...
*/
//查看是不是碎片都搜集齐了,如果齐了,组合成一个大ip包返回
if (ip_done(qp))
{
skb2 = ip_glue(qp); /* glue together the fragments */
return (skb2);
}
return (NULL);
}