转载:http://blog.csdn.net/msda/article/details/8494575
在此之前,如果不懂IP分片技术的话,请参照这里。IP分片技术比较简单暴力,没有TCP那样复杂复杂的窗口协议。基本上只是暴力的拆分和重组,代码基本在ip_defragment.c中。
先从总体上说说。首先,每个IP(主机)都会有IP分片包(注意是IP,不是IP对)。所以,每个IP都有一个如下的结构体来维护上面的所以IP分片:
- struct hostfrags {
- struct ipq *ipqueue;//这里维护IP碎片队列
- int ip_frag_mem;
- u_int ip;//主机对应的IP地址
- //很明显,下面三行告诉我们,这是哈希表的一个元素
- int hash_index;
- struct hostfrags *prev;
- struct hostfrags *next;
- };
- //下面这个就是维护所有IP的哈希表了。
- static struct hostfrags **fragtable;
每个IP下面又有很多的被分片的IP包——IP碎片队列,IP碎片队列的定义在这:
- /* Describe an entry in the "incomplete datagrams" queue. */
- struct ipq {
- unsigned char *mac; /* pointer to MAC header */
- struct ip *iph; /* pointer to IP header */
- int len; /* total length of original datagram */
- short ihlen; /* length of the IP header */
- short maclen; /* length of the MAC header */
- struct timer_list timer; /* when will this queue expire? */
- struct ipfrag *fragments; /* linked list of received fragments */
- struct hostfrags *hf;
- struct ipq *next; /* linked list pointers */
- struct ipq *prev;
- // struct device *dev; /* Device - for icmp replies */
- };
最终的IP碎片的定义在这:
- /* Describe an IP fragment. */
- struct ipfrag
- {
- int offset; /* offset of fragment in IP datagram */
- int end; /* last byte of data in datagram */
- int len; /* length of this fragment */
- struct sk_buff *skb; /* complete received fragment */
- unsigned char *ptr; /* pointer into real fragment data */
- struct ipfrag *next; /* linked list pointers */
- struct ipfrag *prev;
- };
由于libnids中的分片重组代码是从内核中拿出来修改的,所以保留了内核的注释。这里就不多做解释了。
好了步入处理逻辑,照例,先看初始化:
- void ip_frag_init(int n)
- {
- struct timeval tv;
- gettimeofday(&tv, 0);
- time0 = tv.tv_sec;
- fragtable = (struct hostfrags **) calloc(n, sizeof(struct hostfrags *));
- if (!fragtable)
- nids_params.no_mem("ip_frag_init");
- hash_size = n;
- }
- //先是判断是否为分片的函数
- int ip_defrag_stub(struct ip *iph, struct ip **defrag)
- {
- int offset, flags, tot_len;
- struct sk_buff *skb;
- numpack++;
- //先处理超时事件
- timenow = 0;//刷新时间
- while (timer_head && timer_head->expires < jiffies())
- {
- this_host = ((struct ipq *) (timer_head->data))->hf;
- timer_head->function(timer_head->data);
- }
- //然后计算分片的偏移
- offset = ntohs(iph->ip_off);
- flags = offset & ~IP_OFFSET;
- offset &= IP_OFFSET;
- //此包不是分片
- if (((flags & IP_MF) == 0) && (offset == 0))
- {
- ip_defrag(iph, 0);
- return IPF_NOTF;
- }
- //此包是分片,先申请一个sk_buff把分片的数据保存起来,然后交给defrag函数
- tot_len = ntohs(iph->ip_len);
- skb = (struct sk_buff *) malloc(tot_len + sizeof(struct sk_buff));
- if (!skb)
- nids_params.no_mem("ip_defrag_stub");
- skb->data = (char *) (skb + 1);
- memcpy(skb->data, iph, tot_len);
- skb->truesize = tot_len + 16 + nids_params.dev_addon;
- skb->truesize = (skb->truesize + 15) & ~15;
- skb->truesize += nids_params.sk_buff_size;
- //如果集齐了一个ip包的所有分片ip_defrag将返回合并后的ip包,此时返回IPF_NEW,进行下一步的ip包处理
- //否则,返回IPF_ISF,跳过ip包处理
- if ((*defrag = (struct ip *)ip_defrag((struct ip *) (skb->data), skb)))
- return IPF_NEW;
- return IPF_ISF;
- }
- /* Process an incoming IP datagram fragment. */
- //这里就是分片重组的主要逻辑了
- static char *ip_defrag(struct ip *iph, struct sk_buff *skb)
- {
- struct ipfrag *prev, *next, *tmp;
- struct ipfrag *tfp;
- struct ipq *qp;
- char *skb2;
- unsigned char *ptr;
- int flags, offset;
- int i, ihl, end;
- //如果是分片,而且host哈希表里还没有对应的host项的话,果断新建一个
- //此处还负责将this_host变量设为当前ip对应的host
- if (!hostfrag_find(iph) && skb)
- hostfrag_create(iph);
- /* Start by cleaning up the memory. */
- //内存用太多了,panic之,然后释放当前host分片所用的内存
- if (this_host)
- if (this_host->ip_frag_mem > IPFRAG_HIGH_THRESH)
- ip_evictor();
- /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
- //这里,找到这个ip包对应的ip分片链表
- if (this_host)
- qp = ip_find(iph);
- else
- qp = 0;
- /* Is this a non-fragmented datagram? */
- offset = ntohs(iph->ip_off);
- flags = offset & ~IP_OFFSET;
- offset &= IP_OFFSET;
- if (((flags & IP_MF) == 0) && (offset == 0))
- {
- if (qp != NULL)
- ip_free(qp); /* Fragmented frame replaced by full
- unfragmented copy */
- return 0;
- }
- /* ip_evictor() could have removed all queues for the current host */
- if (!this_host)
- hostfrag_create(iph);
- offset <<= 3; /* offset is in 8-byte chunks */
- ihl = iph->ip_hl * 4;
- /*
- If the queue already existed, keep restarting its timer as long as
- we still are receiving fragments. Otherwise, create a fresh queue
- entry.
- */
- //如果当前host下来过此包的碎片
- if (qp != NULL)
- {
- /* ANK. If the first fragment is received, we should remember the correct
- IP header (with options) */
- if (offset == 0)
- {
- qp->ihlen = ihl;
- memcpy(qp->iph, iph, ihl + 8);
- }
- del_timer(&qp->timer);
- qp->timer.expires = jiffies() + IP_FRAG_TIME; /* about 30 seconds */
- qp->timer.data = (unsigned long) qp; /* pointer to queue */
- qp->timer.function = ip_expire; /* expire function */
- add_timer(&qp->timer);
- }
- //否则新建一个碎片队列
- else
- {
- /* If we failed to create it, then discard the frame. */
- if ((qp = ip_create(iph)) == NULL)
- {
- kfree_skb(skb, FREE_READ);
- return NULL;
- }
- }
- /* Attempt to construct an oversize packet. */
- //再大的ip包也不能大过65535啊,一经发现,直接放弃
- if (ntohs(iph->ip_len) + (int) offset > 65535)
- {
- // NETDEBUG(printk("Oversized packet received from %s\n", int_ntoa(iph->ip_src.s_addr)));
- nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERSIZED, iph, 0);
- kfree_skb(skb, FREE_READ);
- return NULL;
- }
- //下面就开始在碎片队列里面找位置了,同时处理好重叠
- //如果有重叠,把重叠的旧的部分去掉
- /* Determine the position of this fragment. */
- end = offset + ntohs(iph->ip_len) - ihl;
- /* Point into the IP datagram 'data' part. */
- ptr = (unsigned char *)(skb->data + ihl);
- /* Is this the final fragment? */
- if ((flags & IP_MF) == 0)
- qp->len = end;
- /*
- Find out which fragments are in front and at the back of us in the
- chain of fragments so far. We must know where to put this
- fragment, right?
- */
- prev = NULL;
- for (next = qp->fragments; next != NULL; next = next->next)
- {
- if (next->offset >= offset)
- break; /* bingo! */
- prev = next;
- }
- /*
- We found where to put this one. Check for overlap with preceding
- fragment, and, if needed, align things so that any overlaps are
- eliminated.
- */
- if (prev != NULL && offset < prev->end)
- {
- nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERLAP, iph, 0);
- i = prev->end - offset;
- offset += i; /* ptr into datagram */
- ptr += i; /* ptr into fragment data */
- }
- /*
- Look for overlap with succeeding segments.
- If we can merge fragments, do it.
- */
- for (tmp = next; tmp != NULL; tmp = tfp)
- {
- tfp = tmp->next;
- if (tmp->offset >= end)
- break; /* no overlaps at all */
- nids_params.syslog(NIDS_WARN_IP, NIDS_WARN_IP_OVERLAP, iph, 0);
- i = end - next->offset; /* overlap is 'i' bytes */
- tmp->len -= i; /* so reduce size of */
- tmp->offset += i; /* next fragment */
- tmp->ptr += i;
- /*
- If we get a frag size of <= 0, remove it and the packet that it
- goes with. We never throw the new frag away, so the frag being
- dumped has always been charged for.
- */
- if (tmp->len <= 0)
- {
- if (tmp->prev != NULL)
- tmp->prev->next = tmp->next;
- else
- qp->fragments = tmp->next;
- if (tmp->next != NULL)
- tmp->next->prev = tmp->prev;
- next = tfp; /* We have killed the original next frame */
- frag_kfree_skb(tmp->skb, FREE_READ);
- frag_kfree_s(tmp, sizeof(struct ipfrag));
- }
- }
- //下面往队列中插入当前碎片
- /* Insert this fragment in the chain of fragments. */
- tfp = NULL;
- tfp = ip_frag_create(offset, end, skb, ptr);
- /*
- No memory to save the fragment - so throw the lot. If we failed
- the frag_create we haven't charged the queue.
- */
- if (!tfp)
- {
- nids_params.no_mem("ip_defrag");
- kfree_skb(skb, FREE_READ);
- return NULL;
- }
- /* From now on our buffer is charged to the queues. */
- tfp->prev = prev;
- tfp->next = next;
- if (prev != NULL)
- prev->next = tfp;
- else
- qp->fragments = tfp;
- if (next != NULL)
- next->prev = tfp;
- /*
- OK, so we inserted this new fragment into the chain. Check if we
- now have a full IP datagram which we can bump up to the IP
- layer...
- */
- //查看是不是碎片都搜集齐了,如果齐了,组合成一个大ip包返回
- if (ip_done(qp))
- {
- skb2 = ip_glue(qp); /* glue together the fragments */
- return (skb2);
- }
- return (NULL);
- }