u32过滤器

最新推荐文章于 2022-01-02 21:24:51 发布

fanxiaoyu321

最新推荐文章于 2022-01-02 21:24:51 发布

阅读量1.6k

点赞数 1

分类专栏： linux网络设备接口层文章标签： u32 流量控制 filter

本文链接：https://blog.csdn.net/xiaoyu_750516366/article/details/121177930

版权

linux网络设备接口层专栏收录该内容

25 篇文章 14 订阅

订阅专栏

本文详细介绍了u32过滤器的命令行用法，包括handle参数、classid和flowid的作用，以及SELECTOR的各种匹配方式。同时，探讨了u32在内核中的数据结构，如tc_u_common、tc_u_hnode和tc_u_knode，并详细解析了u32的初始化、获取和修改过程。内容涵盖协议头字段匹配、mark匹配及示例应用。

摘要由CSDN通过智能技术生成

u32是最通用的一个数据包filter，它基于数据包的任意偏移位置匹配数据包，所以功能最强大，当然语法也最繁琐。个人理解这块重点掌握u32过滤器怎么用，不用过多关注其实现细节，这里之所以展开分析，主要是为了通过其对内核的流控框架有个比较清晰的认知，方便需要时进一步仔细研究。

命令行说明

# 简化版命令格式如下:
tc  filter ... [ handle HANDLE ] u32 OPTION_LIST [ classid CLASSID ]

HANDLE := { u12_hex_htid:[u8_hex_hash:[u12_hex_nodeid] | 0xu32_hex_value }
OPTION_LIST := [ OPTION_LIST ] OPTION
CLASSID := { root | none | [u16_major]:u16_minor | u32_hex_value }

OPTION := { match SELECTOR | action ACTION }
SELECTOR := { u32 VAL_MASK_32 | u16 VAL_MASK_16 | u8 VAL_MASK_8 | ip IP | ip6 IP6 | { tcp | udp } \
    TCPUDP | icmp ICMP | mark VAL_MASK_32 | ether ETHER }
IP := { { src | dst } { default | any | all | ip_address [ / { prefixlen | netmask } ] } \
    AT | { dsfield | ihl | protocol | icmp_type | icmp_code } VAL_MASK_8 | \
    { sport | dport } VAL_MASK_16 | nofrag | firstfrag | df | mf }
IP6  :=  {  {  src  |  dst  }  {  default | any | all | ip6_address [/prefixlen ] } \
    AT | priority VAL_MASK_8 | { protocol | icmp_type | icmp_code } VAL_MASK_8 \
    | flowlabel VAL_MASK_32 | { sport | dport } VAL_MASK_16 }
TCPUDP := { src | dst } VAL_MASK_16
ICMP := { type VAL_MASK_8 | code VAL_MASK_8 }
ETHER := { src | dst } ether_address AT
VAL_MASK_32 := u32_value u32_hex_mask [ AT ]
VAL_MASK_16 := u16_value u16_hex_mask [ AT ]
VAL_MASK_8 := u8_value u8_hex_mask [ AT ]
AT := [ at [ nexthdr+ ] int_value ]

handle参数指定filter的句柄；一般来说该字段不需要，让内核自己生成即可，当要配置非常多的filter时该字段才能派上用场。u32的句柄分为3部分：高12位代表hasn表ID，对应内核tc_u_hnode.handle，通过它可以在tc_u_common.hlist中索引hnode对象；中间8位代表hash ID，可以用其索引hnode.ht数组索引；低12位代表node ID，每条filter命令携带的一组match条件，它们对应内核的tc_u_knode对象，低12位可以来索引该对象；
classid参数和flowid的作用相同，表示该filter的过滤结果，即将数据包分类到哪个class；

最灵活的参数是SELECTOR，它告诉u32要怎么匹配数据包。可以对它进行简单分类：

基本的u32、u16、u8匹配；
协议头字段匹配；
mark匹配；

基本匹配

u32、u16、u8分别表示按照4字节、2字节和1字节在指定偏移位置匹配数据包内容，匹配条件以value和mask的方式指定。偏移位置用at关键字加一个正整数表示从网络层首部开头的偏移量，如果指定了nexthdr+，那么偏移量从网络层的上一层开头开始偏移。

协议头字段匹配

ip、ip6、tcp、udp、icmp以及ether分别表示匹配对应协议的头部字段。

对于ip协议和ipv6协议：

{ src | dst } { default | any | all | ip_address [ / { prefixlen | netmask } ] } AT表示匹配IP地址，default、any、all等关键字表示匹配任意的IP地址，也可以携带前缀或者子网掩码;
dsfiled VAL_MASK_8表示匹配tos和优先级字段；
ihl VAL_MASK_8表示匹配首部长度字段，值的单位位4字节；
protocol VAL_MASK_8表示匹配协议字段；
{icmp_type | icmp_code } VAL_MASK_8假定下一个头部是icmp，检查其type和code，该选项最好不要使用，它假定IP首部是不包含选项的；
{ sport | dport } VAL_MASK_16假定下一个头部是tcp或者udp，去匹配其端口号，类似的原因，最好不要使用该选项；
nofrag | firstfrag | df | mf分别表示匹配非分片报文、第一个分片报文以及df和mf标记是否设置；
priority VAL_MASK_8匹配Ipv6报文的优先级字段；
flowlabel VAL_MASK_32匹配Ipv6报文的flow id字段；

对于tcp和udp协议：

{ src | dst } VAL_MASK_16分别匹配tcp和udp报文的源端口以及目的端口；

对于icmp协议：

{ type VAL_MASK_8 | code VAL_MASK_8 }分别匹配icmp报文的type和code字段；

对于以太帧：

ether ETHER可以匹配以太帧的源和目的mac地址，最好不要使用该选项，它假定报文的开头是以太帧内容；

mark匹配

mark VAL_MASK_32可以匹配数据包的fwmark值，这使得u32可以配合Netfilter工作；

示例

tc qdisc add dev eth0 root handle 1: htb default 20

tc class add dev eth0 parent 1: classid 1:10 htb rate 50Kbps ceil 50Kbps
tc class add dev eth0 parent 1:10 classid 1:20 htb rate 20Kbps ceil 50Kbps
tc class add dev eth0 parent 1:10 classid 1:30 htb rate 30Kbps ceil 50Kbps

tc filter add dev eth0 parent 1: prio 10 u32 match u8 64 0xff at 8 flowid 1:20
tc filter add dev eth0 parent 1: prio 10 u32 match u8 64 0xff at 8 flowid 1:30

数据结构

match参数: tc_u32_key/tc_u32_sel

// 一个32位匹配，表示数据包的"off&offmask"偏移量处的4字节数据和"val&mask"是否相同
struct tc_u32_key {
    __be32 mask;
    __be32 val;
	int	off;
	int	offmask;
};

struct tc_u32_sel {
    unsigned char flags;
    unsigned char offshift;
    unsigned char nkeys; // keys数组当前实际长度，每个match会占用一个keys元素

    __be16 offmask;
    __u16 off;
    short offoff;

    short hoff;
    __be32 hmask;
    struct tc_u32_key keys[0];
}

命令行参数解析: u32_parse_opt()

struct filter_util u32_filter_util = {
	.id = "u32",
	.parse_fopt = u32_parse_opt, // u32 filter参数解析接口
	.print_fopt = u32_print_opt,
};

static int u32_parse_opt(struct filter_util *qu, char *handle, int argc,
    char **argv, struct nlmsghdr *n)
{
	struct {
		struct tc_u32_sel sel;
		struct tc_u32_key keys[128]; // 每个命令最多128个match条件
	} sel = {}; // 保存所有命令行match参数
	struct tcmsg *t = NLMSG_DATA(n);
	struct rtattr *tail;
	int sel_ok = 0, terminal_ok = 0;
	int sample_ok = 0;
	__u32 htid = 0;
	__u32 order = 0;
	__u32 flags = 0;

    // 由于不同filter对handle字段的解释不同，所以tc filter框架让具体filter自己解释handle参数
	if (handle && get_u32_handle(&t->tcm_handle, handle)) {
		fprintf(stderr, "Illegal filter ID\n");
		return -1;
	}
	if (argc == 0)
		return 0;

	tail = NLMSG_TAIL(n);
	addattr_l(n, MAX_MSG, TCA_OPTIONS, NULL, 0);

	while (argc > 0) {
		if (matches(*argv, "match") == 0) {
			NEXT_ARG();
			if (parse_selector(&argc, &argv, &sel.sel, n)) { // 解析match参数
				fprintf(stderr, "Illegal \"match\"\n");
				return -1;
			}
			sel_ok++;
			continue;
		} else if (matches(*argv, "offset") == 0) {
			NEXT_ARG();
			if (parse_offset(&argc, &argv, &sel.sel)) {
				fprintf(stderr, "Illegal \"offset\"\n");
				return -1;
			}
			continue;
		} else if (matches(*argv, "hashkey") == 0) {
			NEXT_ARG();
			if (parse_hashkey(&argc, &argv, &sel.sel)) {
				fprintf(stderr, "Illegal \"hashkey\"\n");
				return -1;
			}
			continue;
		} else if (matches(*argv, "classid") == 0 || strcmp(*argv, "flowid") == 0) {
		    // 指定该filter应该将数据包分类到哪个class
			unsigned int flowid;
			NEXT_ARG();
			if (get_tc_classid(&flowid, *argv)) {
				fprintf(stderr, "Illegal \"classid\"\n");
				return -1;
			}
			addattr_l(n, MAX_MSG, TCA_U32_CLASSID, &flowid, 4);
			sel.sel.flags |= TC_U32_TERMINAL;
		} else if (matches(*argv, "divisor") == 0) {
			unsigned int divisor;
			NEXT_ARG();
			if (get_unsigned(&divisor, *argv, 0) ||
			    divisor == 0 ||
			    divisor > 0x100 || ((divisor - 1) & divisor)) {
				fprintf(stderr, "Illegal \"divisor\"\n");
				return -1;
			}
			addattr_l(n, MAX_MSG, TCA_U32_DIVISOR, &divisor, 4);
		} else if (matches(*argv, "order") == 0) {
			NEXT_ARG();
			if (get_u32(&order, *argv, 0)) {
				fprintf(stderr, "Illegal \"order\"\n");
				return -1;
			}
		} else if (strcmp(*argv, "link") == 0) {
			unsigned int linkid;

			NEXT_ARG();
			if (get_u32_handle(&linkid, *argv)) {
				fprintf(stderr, "Illegal \"link\"\n");
				return -1;
			}
			if (linkid && TC_U32_NODE(linkid)) {
				fprintf(stderr, "\"link\" must be a hash table.\n");
				return -1;
			}
			addattr_l(n, MAX_MSG, TCA_U32_LINK, &linkid, 4);
		} else if (strcmp(*argv, "ht") == 0) {
			unsigned int ht;

			NEXT_ARG();
			if (get_u32_handle(&ht, *argv)) {
				fprintf(stderr, "Illegal \"ht\"\n");
				return -1;
			}
			if (handle && TC_U32_NODE(ht)) {
				fprintf(stderr, "\"ht\" must be a hash table.\n");
				return -1;
			}
			if (sample_ok)
				htid = (htid & 0xFF000) | (ht & 0xFFF00000);
			else
				htid = (ht & 0xFFFFF000);
		} else if (strcmp(*argv, "sample") == 0) {
			__u32 hash;
			unsigned int divisor = 0x100;
			struct {
				struct tc_u32_sel sel;
				struct tc_u32_key keys[4];
			} sel2 = {};

			NEXT_ARG();
			if (parse_selector(&argc, &argv, &sel2.sel, n)) {
				fprintf(stderr, "Illegal \"sample\"\n");
				return -1;
			}
			if (sel2.sel.nkeys != 1) {
				fprintf(stderr, "\"sample\" must contain exactly ONE key.\n");
				return -1;
			}
			if (*argv != 0 && strcmp(*argv, "divisor") == 0) {
				NEXT_ARG();
				if (get_unsigned(&divisor, *argv, 0) ||
				    divisor == 0 || divisor > 0x100 ||
				    ((divisor - 1) & divisor)) {
					fprintf(stderr, "Illegal sample \"divisor\"\n");
					return -1;
				}
				NEXT_ARG();
			}
			hash = sel2.sel.keys[0].val & sel2.sel.keys[0].mask;
			hash ^= hash >> 16;
			hash ^= hash >> 8;
			htid = ((hash % divisor) << 12) | (htid & 0xFFF00000);
			sample_ok = 1;
			continue;
		} else if (strcmp(*argv, "indev") == 0) {
			char ind[IFNAMSIZ + 1] = {};

			argc--;
			argv++;
			if (argc < 1) {
				fprintf(stderr, "Illegal indev\n");
				return -1;
			}
			strncpy(ind, *argv, sizeof(ind) - 1);
			addattr_l(n, MAX_MSG, TCA_U32_INDEV, ind, strlen(ind) + 1);
		} else if (matches(*argv, "action") == 0) {
			NEXT_ARG();
			if (parse_action(&argc, &argv, TCA_U32_ACT, n)) {
				fprintf(stderr, "Illegal \"action\"\n");
				return -1;
			}
			terminal_ok++;
			continue;
		} else if (matches(*argv, "police") == 0) {
			NEXT_ARG();
			if (parse_police(&argc, &argv, TCA_U32_POLICE, n)) {
				fprintf(stderr, "Illegal \"police\"\n");
				return -1;
			}
			terminal_ok++;
			continue;
		} else if (strcmp(*argv, "skip_hw") == 0) {
			NEXT_ARG();
			flags |= TCA_CLS_FLAGS_SKIP_HW;
			continue;
		} else if (strcmp(*argv, "skip_sw") == 0) {
			NEXT_ARG();
			flags |= TCA_CLS_FLAGS_SKIP_SW;
			continue;
		} else if (strcmp(*argv, "help") == 0) {
			explain();
			return -1;
		} else {
			fprintf(stderr, "What is \"%s\"?\n", *argv);
			explain();
			return -1;
		}
		argc--; argv++;
	}

	/* We dont necessarily need class/flowids */
	if (terminal_ok)
		sel.sel.flags |= TC_U32_TERMINAL;

	if (order) {
		if (TC_U32_NODE(t->tcm_handle) && order != TC_U32_NODE(t->tcm_handle)) {
			fprintf(stderr, "\"order\" contradicts \"handle\"\n");
			return -1;
		}
		t->tcm_handle |= order;
	}

	if (htid)
		addattr_l(n, MAX_MSG, TCA_U32_HASH, &htid, 4);
	if (sel_ok) // 设置match参数
		addattr_l(n, MAX_MSG, TCA_U32_SEL, &sel, sizeof(sel.sel) +
			  sel.sel.nkeys * sizeof(struct tc_u32_key));
	if (flags) {
		if (!(flags ^ (TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW))) {
			fprintf(stderr, "skip_hw and skip_sw are mutually exclusive\n");
			return -1;
		}
		addattr_l(n, MAX_MSG, TCA_U32_FLAGS, &flags, 4);
	}

	tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
	return 0;
}

内核态实现

数据结构

u32会将配置到同一个qdisc以及其内部class的filter统一管理起来。它在qdisc对象中增加了一个专用字段（类型为void *)u32_mode，该字段实际中指向tc_u_common结构。

tc_u_common

通过tc_u_common.hlist，u32将所有配置的filter组织成一个单链表。

struct tc_u_common
{
	struct tc_u_hnode *hlist; // tc_u_hnode链表表头
	struct Qdisc *q; // 指向qdisc
	int	refcnt; // 引用计数，初始为1，链表中每个tc_u_hode也都会持有一个该结构的引用计数
	u32	hgenerator; // 用于自动生成hash table ID
};

filter对象: tc_u_hnode

在Netlink接口中看到，同一个filter链表中，相同优先级的filter命令会对应同一个filter对象(tcf_proto)，对于u32，也会相应的关联一个tc_u_hnode对象，二者通过tcf_proto.root字段关联到一起，具体见u32_init()。

struct tc_u_hnode
{
	struct tc_u_hnode *next; // 将tc_u_hnode对象组织到tc_u_common.hlist中
	u32	handle; // 只保存filter的hash ID（最高12位有效，其它位肯定是0）
	u32	prio; // filter的优先级，prio越小，优先级越高，优先级高的filter对象会被放到链表的开头
	struct tc_u_common *tp_c; // 和u32_mode指向同一个tc_u_common对象
	int	refcnt;
	unsigned divisor; // ht数组长度减1
	struct tc_u_knode *ht[1]; // 用filter ID的中间8位（hash ID）索引该数组
};

match信息: tc_u_knode

struct tc_u_knode
{
	struct tc_u_knode *next;
	u32	handle; // 保存完整的32位句柄
	struct tc_u_hnode	*ht_up;
	struct tcf_exts		exts;
#ifdef CONFIG_NET_CLS_IND
	char indev[IFNAMSIZ];
#endif
	u8	fshift;
	struct tcf_result res; // res.classid字段保存了分类结果
	struct tc_u_hnode	*ht_down;
#ifdef CONFIG_CLS_U32_PERF
	struct tc_u32_pcnt	*pf;
#endif
#ifdef CONFIG_CLS_U32_MARK
	struct tc_u32_mark	mark;
#endif
	struct tc_u32_sel sel; // 保存tc命令行中的match信息，可能多条
};

u32 filter操作集: cls_u32_ops

u32模块初始化时，会通过register_tcf_proto_ops()接口向内核流控框架注册该实例，如此内核才可以支持u32 filter。

static struct tcf_proto_ops cls_u32_ops __read_mostly = {
	.kind		=	"u32",
	.classify	=	u32_classify,
	.init		=	u32_init,
	.destroy	=	u32_destroy,
	.get		=	u32_get,
	.put		=	u32_put,
	.change		=	u32_change,
	.delete		=	u32_delete,
	.walk		=	u32_walk,
	.dump		=	u32_dump,
	.owner		=	THIS_MODULE,
};

初始化: u32_init()

用户态通过命令新建u32 filter时，内核态在分配tcf_proto对象后，会调用该回调完成u32特有的初始化。

static int u32_init(struct tcf_proto *tp)
{
	struct tc_u_hnode *root_ht;
	struct tc_u_common *tp_c;

    // u32 filter在qdisc对象中增加了一个专用指针字段u32_mode，用于保存私有信息
	tp_c = tp->q->u32_node;

    // 分配hmode并对其进行初始化
	root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
	if (root_ht == NULL)
		return -ENOBUFS;
	root_ht->divisor = 0;
	root_ht->refcnt++;
	// 初始化filter句柄中的哈希表ID部分（高12位）。
	// 第一个filter的hash table ID会是0x800，后续的filter会在[0x800, 0xFFF]之间选一个未使用的
	root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
	root_ht->prio = tp->prio;

	if (tp_c == NULL) {
	    // qdsic对象中u32_mode尚未分配，则分配它，该流程只走一次
		tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
		if (tp_c == NULL) {
			kfree(root_ht);
			return -ENOBUFS;
		}
		tp_c->q = tp->q;
		tp->q->u32_node = tp_c;
	}
    // 新建的filter私有数据结构tc_u_hnode对象插入tc_u_common.hlist链表的开头
	tp_c->refcnt++;
	root_ht->next = tp_c->hlist;
	tp_c->hlist = root_ht;
	root_ht->tp_c = tp_c;
    // 将私有数据结构对象tc_u_hnode和通用的filter对象tcf_proto关联起来
	tp->root = root_ht;
	tp->data = tp_c;
	return 0;
}

获取filter对象/match对象: u32_get()

根据句柄可以获取hnode对象，或者更加具体的knode对象。

// 提取filter句柄中的hash ID部分，即高12位
#define TC_U32_HTID(h) ((h)&0xFFF00000)
#define TC_U32_ROOT	(0xFFF00000)
#define TC_U32_KEY(h) ((h)&0xFFFFF)

static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
{
	struct tc_u_hnode *ht;
	struct tc_u_common *tp_c = tp->data;

	if (TC_U32_HTID(handle) == TC_U32_ROOT)
		ht = tp->root;
	else
		ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
    // 未找到filter对象
	if (!ht)
		return 0;
    // 未指定更详细的match句柄，返回filter对象
	if (TC_U32_KEY(handle) == 0)
		return (unsigned long)ht;
    // 指定了详细的match句柄，返回match对象
	return (unsigned long)u32_lookup_key(ht, handle);
}

// 根据hash table ID在tc_u_common.hlist中找到handle对应的hnode
static __inline__ struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
{
	struct tc_u_hnode *ht;

	for (ht = tp_c->hlist; ht; ht = ht->next)
		if (ht->handle == handle)
			break;
	return ht;
}

#define TC_U32_HASH(h) (((h)>>12)&0xFF)

static __inline__ struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
{
	unsigned sel;
	struct tc_u_knode *n = NULL;

	sel = TC_U32_HASH(handle);
	if (sel > ht->divisor)
		goto out;
	for (n = ht->ht[sel]; n; n = n->next)
		if (n->handle == handle)
			break;
out:
	return n;
}

修改参数: u32_change()

@tp: filter对象;
@base: 如果filter的父亲为class，则为class操作集的get()返回值；否则为0;
@handle: filter的句柄，如果不指定则为0;
@tca: filter的参数;
@arg: filter操作集get()返回值, arg有效则修改指定的knode，否则修改hnode;
static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
    struct nlattr **tca, unsigned long *arg)
{
	struct tc_u_common *tp_c = tp->data;
	struct tc_u_hnode *ht;
	struct tc_u_knode *n;
	struct tc_u32_sel *s;
	struct nlattr *opt = tca[TCA_OPTIONS];
	struct nlattr *tb[TCA_U32_MAX + 1];
	u32 htid;
	int err;

	if (opt == NULL)
		return handle ? -EINVAL : 0;
    // 解析Netlink参数
	err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy);
	if (err < 0)
		return err;

    // 如果参数已经指定了要修改的knode对象，直接修改内容即可
	if ((n = (struct tc_u_knode*)*arg) != NULL) {
		if (TC_U32_KEY(n->handle) == 0)
			return -EINVAL;
        // 设置参数
		return u32_set_parms(tp, base, n->ht_up, n, tb, tca[TCA_RATE]);
	}

	if (tb[TCA_U32_DIVISOR]) {
		unsigned divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);

		if (--divisor > 0x100) // divisor不能超过256
			return -EINVAL;
		if (TC_U32_KEY(handle))
			return -EINVAL;
		if (handle == 0) {
			handle = gen_new_htid(tp->data);
			if (handle == 0)
				return -ENOMEM;
		}
		ht = kzalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL);
		if (ht == NULL)
			return -ENOBUFS;
		ht->tp_c = tp_c;
		ht->refcnt = 1;
		ht->divisor = divisor;
		ht->handle = handle;
		ht->prio = tp->prio;
		ht->next = tp_c->hlist;
		tp_c->hlist = ht;
		*arg = (unsigned long)ht;
		return 0;
	}

	if (tb[TCA_U32_HASH]) {
		htid = nla_get_u32(tb[TCA_U32_HASH]);
		if (TC_U32_HTID(htid) == TC_U32_ROOT) {
			ht = tp->root;
			htid = ht->handle;
		} else {
			ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
			if (ht == NULL)
				return -EINVAL;
		}
	} else {
	    // 从filter私有数据中找到hash table ID
		ht = tp->root;
		htid = ht->handle;
	}

	if (ht->divisor < TC_U32_HASH(htid))
		return -EINVAL;

    // 生成knode的句柄
	if (handle) {
		if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
			return -EINVAL;
		handle = htid | TC_U32_NODE(handle);
	} else
	    // 自动生成node ID，和hash table ID组合成完整的handle
		handle = gen_new_kid(ht, htid);

	if (tb[TCA_U32_SEL] == NULL) // 必须提供match信息
		return -EINVAL;
	s = nla_data(tb[TCA_U32_SEL]);

    // 分配knode信息，knode末尾保存的是所有的match信息，见tc_u_sel的定义
	n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
	if (n == NULL)
		return -ENOBUFS;

#ifdef CONFIG_CLS_U32_PERF
	n->pf = kzalloc(sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64), GFP_KERNEL);
	if (n->pf == NULL) {
		kfree(n);
		return -ENOBUFS;
	}
#endif
    // 拷贝用户态组织好的match信息
	memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
	n->ht_up = ht;
	n->handle = handle;
	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;

#ifdef CONFIG_CLS_U32_MARK
	if (tb[TCA_U32_MARK]) {
		struct tc_u32_mark *mark;

		mark = nla_data(tb[TCA_U32_MARK]);
		memcpy(&n->mark, mark, sizeof(struct tc_u32_mark));
		n->mark.success = 0;
	}
#endif
    // 设置速率信息到新的knode中
	err = u32_set_parms(tp, base, ht, n, tb, tca[TCA_RATE]);
	if (err == 0) {
	    // 遍历哈希表的冲突链，找到新的knode的插入位置，该冲突链中的knode是根据node ID由小到大保存的
		struct tc_u_knode **ins;
		for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
			if (TC_U32_NODE(handle) < TC_U32_NODE((*ins)->handle))
				break;
        // 将新的knode插入链表中
		n->next = *ins;
		tcf_tree_lock(tp);
		*ins = n;
		tcf_tree_unlock(tp);
        // 返回新的filter knode对象
		*arg = (unsigned long)n;
		return 0;
	}
#ifdef CONFIG_CLS_U32_PERF
	kfree(n->pf);
#endif
	kfree(n);
	return err;
}

分类: u32_classify()

判定skb是否可以由filter得出分类判定结果res。逻辑没看懂…

static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res)
{
	struct {
		struct tc_u_knode *knode;
		u8		  *ptr;
	} stack[TC_U32_MAXDEPTH];

	struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; // 找到该filter的私有数据结构
	u8 *ptr = skb_network_header(skb);
	struct tc_u_knode *n;
	int sdepth = 0;
	int off2 = 0;
	int sel = 0;
#ifdef CONFIG_CLS_U32_PERF
	int j;
#endif
	int i, r;

next_ht:
	n = ht->ht[sel];

next_knode:
    // 遍历knode链表去匹配
	if (n) {
		struct tc_u32_key *key = n->sel.keys;

#ifdef CONFIG_CLS_U32_PERF
		n->pf->rcnt +=1;
		j = 0;
#endif

#ifdef CONFIG_CLS_U32_MARK
		if ((skb->mark & n->mark.mask) != n->mark.val) {
			n = n->next;
			goto next_knode;
		} else {
			n->mark.success++;
		}
#endif
        // 同一条filter命令中配置的match条件是逻辑与关系，遍历该nkeys数组
		for (i = n->sel.nkeys; i>0; i--, key++) {
			if ((*(__be32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) {
				n = n->next;
				goto next_knode;
			}
#ifdef CONFIG_CLS_U32_PERF
			n->pf->kcnts[j] +=1;
			j++;
#endif
		}
		if (n->ht_down == NULL) {
check_terminal:
			if (n->sel.flags&TC_U32_TERMINAL) {

				*res = n->res;
#ifdef CONFIG_NET_CLS_IND
				if (!tcf_match_indev(skb, n->indev)) {
					n = n->next;
					goto next_knode;
				}
#endif
#ifdef CONFIG_CLS_U32_PERF
				n->pf->rhit +=1;
#endif
				r = tcf_exts_exec(skb, &n->exts, res);
				if (r < 0) {
					n = n->next;
					goto next_knode;
				}
				return r;
			}
			n = n->next;
			goto next_knode;
		}

		/* PUSH */
		if (sdepth >= TC_U32_MAXDEPTH)
			goto deadloop;
		stack[sdepth].knode = n;
		stack[sdepth].ptr = ptr;
		sdepth++;

		ht = n->ht_down;
		sel = 0;
		if (ht->divisor)
			sel = ht->divisor&u32_hash_fold(*(__be32*)(ptr+n->sel.hoff), &n->sel,n->fshift);

		if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT)))
			goto next_ht;

		if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) {
			off2 = n->sel.off + 3;
			if (n->sel.flags&TC_U32_VAROFFSET)
				off2 += ntohs(n->sel.offmask & *(__be16*)(ptr+n->sel.offoff)) >>n->sel.offshift;
			off2 &= ~3;
		}
		if (n->sel.flags&TC_U32_EAT) {
			ptr += off2;
			off2 = 0;
		}
		if (ptr < skb_tail_pointer(skb))
			goto next_ht;
	}

	/* POP */
	if (sdepth--) {
		n = stack[sdepth].knode;
		ht = n->ht_up;
		ptr = stack[sdepth].ptr;
		goto check_terminal;
	}
	return -1;

deadloop:
	if (net_ratelimit())
		printk("cls_u32: dead loop\n");
	return -1;
}

fanxiaoyu321

关注

1
点赞
踩
5

收藏

觉得还不错? 一键收藏
0
评论
u32过滤器

文章目录命令行说明示例数据结构match参数: tc_u32_key/tc_u32_sel命令行参数解析: u32_parse_opt()内核态实现数据结构tc_u_commonfilter对象: tc_u_hnodematch信息: tc_u_knodeu32 filter操作集: cls_u32_ops初始化: u32_init()获取filter对象/match对象: u32_get()修改参数: u32_change()分类: u32_classify()u32是最通用的一个数据包filter，它
复制链接

扫一扫

专栏目录