tcp/ip 协议栈Linux源码分析五 IPv6分片报文重组分析一

做防火墙模块的时候遇到过IPv6分片报文处理的问题是,当时的问题是netfilter无法基于传输层的端口拦截IPv6分片报文,但是IPv4的分片报文可以。分析了内核源码得知是因为netfilter的连接跟踪模块重组了IPv4分片报文,但是对于IPv6的分片报文没有重归组导致,自3.10.x版本后的内核修改了这一块,在PRE_ROUTING前netfilter重组了IPv6分片报文。

之前写过几篇博客分析了IPv4分片报文的处理,接下来分析下IPv6分片报文的处理。IPv6分片报文重组原理基本上和IPv4类似,都需要维护一个分片链表,都有定时器处理垃圾回收等等,细节方面上略有不同

内核版本:3.4.39

IPv6模块启动的时候会去注册分片报文处理函数以及IPv6报文处理函数。

网卡驱动收到IPv6报文后查找协议处理函数,即ipv6_rcv,这个函数简单处理后传递给pre-routing,如果没有意外就传递给ip6_rcv_finish(),这个函数就是查找路由,然后调用相应的处理函数,如果是发给本机的就调用ip6_input(),在这个函数里面就是让local-in链上的钩子函数处理一遍,没有意外的话就传递给ip6_input_finish(),在这个函数里面会遍历各个传输层协议处理函数,比如路由选项、目的地选项、分片选项、tcp协议或者udp协议等等,我们主要关注分片选项的处理。完整的流程图如下:

 分片报文是调用ipv6_frag_rcv函数来处理,在看这个函数之前,先看下ipv6分片表的组织图:

 收到分片报文后会根据报文的三元素(saddr, daddr, ip ID)结合一个随机数rnd计算一个哈希值,然后根据这个哈希值去查找哈希数组,每个数组元素由一个链表组成,链表中挂着哈希值相同的分片队列,收到报文则去查找匹配的分片队列,匹配了则插入进去,如果所有分片报文都集齐了则开始重组。用于计算哈希值的随机数rnd不是固定的,内核会起一个定时器定期修改该值,然后重新分配分片队列,这样做是为了防止攻击。因为分片队列占用系统内存,如果一直都无法集齐的话,这段内存就会被垃圾回收定时器回收,回收分片内存的时候按照FIFO的原则,上图中的lru链表就是用来处理这个的。

看下分片选项处理函数ipv6_frag_rcv()的实现:

static int ipv6_frag_rcv(struct sk_buff *skb)
{
	struct frag_hdr *fhdr;
	struct frag_queue *fq;
	const struct ipv6hdr *hdr = ipv6_hdr(skb);
	struct net *net = dev_net(skb_dst(skb)->dev);

    //防止分片嵌套,分片报文会在下面设置这个标志位,这里是防止存在多个分片选项头
	if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
		goto fail_hdr;

    //增加统计计数
	IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);

	/* Jumbo payload inhibits frag. header */
    //jumbo 类型特大报文不允许分片
	if (hdr->payload_len==0)
		goto fail_hdr;

    //设置分片选项头指针
	if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
				 sizeof(struct frag_hdr))))
		goto fail_hdr;

	hdr = ipv6_hdr(skb);

	//获取分片头部指针
	fhdr = (struct frag_hdr *)skb_transport_header(skb);

    //检查片偏移和MF标志位是否合法,不合法则设置IP6SKB_FRAGMENTED标志位
    //并返回1
	if (!(fhdr->frag_off & htons(0xFFF9))) {
		/* It is not a fragmented frame */
		skb->transport_header += sizeof(struct frag_hdr);
		IP6_INC_STATS_BH(net,
				 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);

		IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);

		//设置一个标志位
		IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
		return 1;
	}

    //如果分片报文占用内存超过阈值,则调用ip6_evictor释放部分旧的分片报文
	if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh)
		ip6_evictor(net, ip6_dst_idev(skb_dst(skb)));

    //根据源地址,目的地址,IP ID去分片表中找到相应的分片哈希队列,找到则返回
    //找不到则新建一个,该函数返回失败的唯一可能性是创建失败。
	fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr);
	if (fq != NULL) {
		int ret;

		spin_lock(&fq->q.lock);

        //找到队列后则进行入队或重组操作
		ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);

		spin_unlock(&fq->q.lock);
		fq_put(fq);
		return ret;
	}

	IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
	kfree_skb(skb);
	return -1;

fail_hdr:
	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
	icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
	return -1;
}

找到分片队列后则将该报文插入到队列里合适的位置,这个处理交给ip6_frag_queue函数完成:

static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
			   struct frag_hdr *fhdr, int nhoff)
{
	struct sk_buff *prev, *next;
	struct net_device *dev;
	int offset, end;
	struct net *net = dev_net(skb_dst(skb)->dev);

    //重组完成或者队列被GC回收了会设置该标志位,这时候收到后续报文直接丢弃即可。
	if (fq->q.last_in & INET_FRAG_COMPLETE)
		goto err;

    //片偏移都是8字节的整数倍
	offset = ntohs(fhdr->frag_off) & ~0x7;

	//获取可分片数据部分长度,用payload_len长度减去其它扩展选项头长度
	end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));

    //分片报文最大长度不能超过IPV6_MAXPLEN(65535)
	if ((unsigned int)end > IPV6_MAXPLEN) {
		IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
				 IPSTATS_MIB_INHDRERRORS);
		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
				  ((u8 *)&fhdr->frag_off -
				   skb_network_header(skb)));
		return -1;
	}

    //重新计算校验和
	if (skb->ip_summed == CHECKSUM_COMPLETE) {
		const unsigned char *nh = skb_network_header(skb);
		skb->csum = csum_sub(skb->csum,
				     csum_partial(nh, (u8 *)(fhdr + 1) - nh,
						  0));
	}

	/* Is this the final fragment? */
	if (!(fhdr->frag_off & htons(IP6_MF))) {
		/* If we already have some bits beyond end
		 * or have different end, the segment is corrupted.
		 */
		//已经收到最后的分片了,检查长度是否是否有异常,没有的话
		//更新长度并设置标志位
		if (end < fq->q.len ||
		    ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len))
			goto err;
		fq->q.last_in |= INET_FRAG_LAST_IN;
		fq->q.len = end;
	} else {
		/* Check if the fragment is rounded to 8 bytes.
		 * Required by the RFC.
		 */
		//如果不是最后一个分片报文则end必须是8字节整数倍,否则按照协议报错
		if (end & 0x7) {
			/* RFC2460 says always send parameter problem in
			 * this case. -DaveM
			 */
			IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
					 IPSTATS_MIB_INHDRERRORS);
			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
					  offsetof(struct ipv6hdr, payload_len));
			return -1;
		}
		if (end > fq->q.len) {
			/* Some bits beyond end -> corruption. */
			//长度不匹配,丢弃该报文
			if (fq->q.last_in & INET_FRAG_LAST_IN)
				goto err;

			//更新长度	
			fq->q.len = end;
		}
	}

    //数据长度为0,这种情况直接丢弃该报文
	if (end == offset)
		goto err;

    //将data指针指向数据部分
	/* Point into the IP datagram 'data' part. */
	if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
		goto err;

    //将数据调整到线性缓存区
	if (pskb_trim_rcsum(skb, end - offset))
		goto err;

	/* Find out which fragments are in front and at the back of us
	 * in the chain of fragments so far.  We must know where to put
	 * this fragment, right?
	 */
	prev = fq->q.fragments_tail;
	if (!prev || FRAG6_CB(prev)->offset < offset) {
		next = NULL;
		goto found;
	}
	prev = NULL;
	for(next = fq->q.fragments; next != NULL; next = next->next) {
		if (FRAG6_CB(next)->offset >= offset)
			break;	/* bingo! */
		prev = next;
	}

found:
	/* RFC5722, Section 4, amended by Errata ID : 3089
	 *                          When reassembling an IPv6 datagram, if
	 *   one or more its constituent fragments is determined to be an
	 *   overlapping fragment, the entire datagram (and any constituent
	 *   fragments) MUST be silently discarded.
	 */

	/* Check for overlap with preceding fragment. */
	//根据RFC5722,如果分片报文数据部分有重叠的话则丢弃整个分片队列
	if (prev &&
	    (FRAG6_CB(prev)->offset + prev->len) > offset)
		goto discard_fq;

	/* Look for overlap with succeeding segment. */
	if (next && FRAG6_CB(next)->offset < end)
		goto discard_fq;

	FRAG6_CB(skb)->offset = offset;

	/* Insert this fragment in the chain of fragments. */
	skb->next = next;
	if (!next)
		fq->q.fragments_tail = skb;
	if (prev)
		prev->next = skb;
	else
		fq->q.fragments = skb;

	dev = skb->dev;
	if (dev) {
		fq->iif = dev->ifindex;
		skb->dev = NULL;
	}
	//更新分片队列时间戳和分片队列总长度
	fq->q.stamp = skb->tstamp;
	fq->q.meat += skb->len;

	//增加分片占用的内存大小
	atomic_add(skb->truesize, &fq->q.net->mem);

	/* The first fragment.
	 * nhoffset is obtained from the first fragment, of course.
	 */
	//如果是第一个分片报文,设置下一个扩展选项首部指针并设置INET_FRAG_FIRST_IN标识
	if (offset == 0) {
		fq->nhoffset = nhoff;
		fq->q.last_in |= INET_FRAG_FIRST_IN;
	}

    //如果分片都收集齐了,则调用重组函数进行重组
	if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
	    fq->q.meat == fq->q.len)
		return ip6_frag_reasm(fq, prev, dev);

	write_lock(&ip6_frags.lock);
	list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
	write_unlock(&ip6_frags.lock);
	return -1;

discard_fq:
	fq_kill(fq);
err:
	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
		      IPSTATS_MIB_REASMFAILS);
	kfree_skb(skb);
	return -1;
}

如果分片报文都集齐了,则调用ip6_frag_reasm函数进行重组:

static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
			  struct net_device *dev)
{
	struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
	struct sk_buff *fp, *head = fq->q.fragments;
	int    payload_len;
	unsigned int nhoff;

    //先将该队列从分片表中移除
	fq_kill(fq);

	/* Make the one we just received the head. */
	//将最后收到的skb指向分片队列首部,这时候需要提供一份自身副本同时释放自身数据然后
	//指向队首元素。这样做主要是因为重组完成后后续处理是基于最后收到的报文
	if (prev) {
		head = prev->next;
		fp = skb_clone(head, GFP_ATOMIC);

		if (!fp)
			goto out_oom;

		fp->next = head->next;
		if (!fp->next)
			fq->q.fragments_tail = fp;
		prev->next = fp;

		skb_morph(head, fq->q.fragments);
		head->next = fq->q.fragments->next;

		kfree_skb(fq->q.fragments);
		fq->q.fragments = head;
	}

	WARN_ON(head == NULL);
	WARN_ON(FRAG6_CB(head)->offset != 0);

	/* Unfragmented part is taken from the first segment. */
	//获取分片部分的总长度
	payload_len = ((head->data - skb_network_header(head)) -
		       sizeof(struct ipv6hdr) + fq->q.len -
		       sizeof(struct frag_hdr));

    //单个IP报文最大长度不能超过65535		       
	if (payload_len > IPV6_MAXPLEN)
		goto out_oversize;

	/* Head of list must not be cloned. */
	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
		goto out_oom;

	/* If the first fragment is fragmented itself, we split
	 * it to two chunks: the first with data and paged part
	 * and the second, holding only fragments. */
	// 分离head的数据部分和分片部分,方便后面处理
	if (skb_has_frag_list(head)) {
		struct sk_buff *clone;
		int i, plen = 0;

		if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL)
			goto out_oom;
		clone->next = head->next;
		head->next = clone;
		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
		skb_frag_list_init(head);
		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
		clone->len = clone->data_len = head->data_len - plen;
		head->data_len -= clone->len;
		head->len -= clone->len;
		clone->csum = 0;
		clone->ip_summed = head->ip_summed;
		atomic_add(clone->truesize, &fq->q.net->mem);
	}

	/* We have to remove fragment header from datagram and to relocate
	 * header in order to calculate ICV correctly. */
	//重新构造报文首部
	nhoff = fq->nhoffset;
	skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
	memmove(head->head + sizeof(struct frag_hdr), head->head,
		(head->data - head->head) - sizeof(struct frag_hdr));
	head->mac_header += sizeof(struct frag_hdr);
	head->network_header += sizeof(struct frag_hdr);

    //将分片报文挂到frag_list下面
	skb_shinfo(head)->frag_list = head->next;
	skb_reset_transport_header(head);
	skb_push(head, head->data - skb_network_header(head));

    //重新计算长度和校验和
	for (fp=head->next; fp; fp = fp->next) {
		head->data_len += fp->len;
		head->len += fp->len;
		if (head->ip_summed != fp->ip_summed)
			head->ip_summed = CHECKSUM_NONE;
		else if (head->ip_summed == CHECKSUM_COMPLETE)
			head->csum = csum_add(head->csum, fp->csum);
		head->truesize += fp->truesize;
	}
	atomic_sub(head->truesize, &fq->q.net->mem);

	head->next = NULL;
	head->dev = dev;
	head->tstamp = fq->q.stamp;
	ipv6_hdr(head)->payload_len = htons(payload_len);
	IP6CB(head)->nhoff = nhoff;
	IP6CB(head)->flags |= IP6SKB_FRAGMENTED;

	/* Yes, and fold redundant checksum back. 8) */
	if (head->ip_summed == CHECKSUM_COMPLETE)
		head->csum = csum_partial(skb_network_header(head),
					  skb_network_header_len(head),
					  head->csum);

	rcu_read_lock();
	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
	rcu_read_unlock();
	fq->q.fragments = NULL;
	fq->q.fragments_tail = NULL;
	return 1;

out_oversize:
	if (net_ratelimit())
		printk(KERN_DEBUG "ip6_frag_reasm: payload len = %d\n", payload_len);
	goto out_fail;
out_oom:
	if (net_ratelimit())
		printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n");
out_fail:
	rcu_read_lock();
	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
	rcu_read_unlock();
	return -1;
}

总的看来,关于IPv6分片队列的入队,回收,重组操作和IPv4类似,掌握了IPv4的重组基本上也就掌握了IPv6的重组。

  • 2
    点赞
  • 17
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值