IP分片实现分析

数据包的分片与重组发生在IP层, 当IP数据包的长度超过PMTU,并且允许IP分片时,就会进行分片操作,分片后的数据包有独立的IP报头,并且独立路由,在接收端的IP层进行重组. 目前有两种分片处理方式,快速分片和慢速分片.

  1. 分片工作
  2. 层在处理分片时,把TCP/UPD的负载分割成MTU大小的片段,并且为每个片段设置IP报头,更新IP报头offset和检验和,如果是慢速分片,还需要进行数据的拷贝(处理frag_list和frag数组)
  3. 分片方式
  4.  

 1 存在frag_list 链表,且链表上每个分片的大小都不超过MTU(其实这是TCP/UDP层进行了预分片处理)

2 除最后一个分片外,其他分片都要8字节对齐

3 此skb没有被克隆

3 源码分析

int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))

{

         /*判读是否可以进行快速分片

          如果存在frag_list,则进入快速分片流程 */   

          if (skb_has_frag_list(skb)) {

                   struct sk_buff *frag, *frag2;

                   int first_len = skb_pagelen(skb);/*计算skb数据大小包括线性数据和SG数据 */

                      /*长度大于MTU,没有8自己对齐,已经被分片,或者已经被克隆,都会进入慢速分片流程 */

                    if (first_len - hlen > mtu ||

                       ((first_len - hlen) & 7) ||

                       ip_is_fragment(iph) ||

                       skb_cloned(skb))

                            goto slow_path;

                   skb_walk_frags(skb, frag) {

                            /* 继续检查每个分片是否满足快速分片条件 */

                            if (frag->len > mtu ||

                                ((frag->len & 7) && frag->next) ||

                                skb_headroom(frag) < hlen)

                                     goto slow_path_clean;

                            /* Partially cloned skb? */

                            if (skb_shared(frag))

                                     goto slow_path_clean;

                     BUG_ON(frag->sk);

                            if (skb->sk) {

                                     frag->sk = skb->sk;

                                     frag->destructor = sock_wfree;

                            }

                       /*从skb中减去分片大小 */

                            skb->truesize -= frag->truesize;

                   }

       /*处理第一个skb,重新设置IP报头 */

                   err = 0;

                   offset = 0;

                   frag = skb_shinfo(skb)->frag_list;

                   skb_frag_list_init(skb);

                   skb->data_len = first_len - skb_headlen(skb);

                   skb->len = first_len;

                   iph->tot_len = htons(first_len);

                   iph->frag_off = htons(IP_MF);

                   ip_send_check(iph);

         /*处理frag_list分片 */

                   for (;;) {

                            /* Prepare header of the next frame,

                             * before previous one went down. */

                            if (frag) {

               /*重新构建分片IP报头 */

                                     frag->ip_summed = CHECKSUM_NONE;

                                     skb_reset_transport_header(frag);

                                     __skb_push(frag, hlen);

                                     skb_reset_network_header(frag);

                                     memcpy(skb_network_header(frag), iph, hlen);

                                     iph = ip_hdr(frag);

                                     iph->tot_len = htons(frag->len);

                                     ip_copy_metadata(frag, skb);

                                     if (offset == 0)

                                               ip_options_fragment(frag);

                                     offset += skb->len - hlen;

                                     iph->frag_off = htons(offset>>3);

                                     if (frag->next != NULL)

                                               iph->frag_off |= htons(IP_MF);

                                     /* Ready, complete checksum */

                                     ip_send_check(iph);

                            }

               /*发送IP分片 */

                            err = output(skb);

                            skb = frag;

                            frag = skb->next;

                            skb->next = NULL;

                   }

/*慢速分片 */

slow_path:

        

         iph = ip_hdr(skb);

         /*计算数据包总长度 */

         left = skb->len - hlen;                 /* Space per frame */

         ptr = hlen;                  /* Where to start from */

         ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb));

         offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;

         not_last_frag = iph->frag_off & htons(IP_MF);

         while (left > 0) {

                   len = left;

                   /* IF: it doesn't fit, use 'mtu' - the data space left */

                   if (len > mtu)

                            len = mtu;

                   /* IF: we are not sending up to and including the packet end

                      then align the next start on an eight byte boundary */

                   if (len < left)      {

                            len &= ~7;

                   }

       /*重新分配一个新的分片 */

                   if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {

                            NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");

                            err = -ENOMEM;

                            goto fail;

                   }

                /*重新构建skb和IP报头 */           

                   ip_copy_metadata(skb2, skb);

                   skb_reserve(skb2, ll_rs);

                   skb_put(skb2, len + hlen);

                   skb_reset_network_header(skb2);

                   skb2->transport_header = skb2->network_header + hlen

                   if (skb->sk)

                            skb_set_owner_w(skb2, skb->sk);

                /*复制skb线性数据包 */

                   skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);

               /*复制skb 的frag_list和SG数据 */

                   if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))

                            BUG();

                   left -= len;

                   iph = ip_hdr(skb2);

                   /*设置分片偏移量,用于重组 */

                iph->frag_off = htons((offset >> 3));

                   if (offset == 0)

                            ip_options_fragment(skb);

                   if (left > 0 || not_last_frag)

                            iph->frag_off |= htons(IP_MF);/*设置MF标志,用于重组 */

                   ptr += len;

                   offset += len;

                   iph->tot_len = htons(len + hlen);

                   ip_send_check(iph);

                /*最后输出新的IP数据包 */

                   err = output(skb2);

                   if (err)

                            goto fail;

                   IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);

         }

         consume_skb(skb);

         IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);

         return err;

 

fail:

         kfree_skb(skb);

         IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);

         return err;

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值