一、使用结构体
struct ipt_ip((include/uapi/linux/netfilter_ipv4/ip_tables.h))。
68 /* Yes, Virginia, you have to zero the padding. */
69 struct ipt_ip {
70 /* Source and destination IP addr */
71 struct in_addr src, dst;
72 /* Mask for src and dest IP addr */
73 struct in_addr smsk, dmsk;
74 char iniface[IFNAMSIZ], outiface[IFNAMSIZ];
75 unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ];
76
77 /* Protocol, 0 = ANY */
78 __u16 proto;
79
80 /* Flags word */
81 __u8 flags; /* 标志字段 */
82 /* Inverse flags */
83 __u8 invflags; /* 取反标志 */
84 };
struct ipt_entry(include/uapi/linux/netfilter_ipv4/ip_tables.h)。其中,
target_offset: target区的偏移,通常target区位于match区之后,而match区则在ipt_entry的末尾;
初始化为sizeof(struct ipt_entry),即假定没有match;
next_offset:下一条规则相对于本规则的偏移,也即本规则所用空间的总和,
初始化为sizeof(struct ipt_entry)+sizeof(struct ipt_target),即假定没有match;
comefrom:位向量,标记调用本规则的HOOK号,可用于检查规则的有效性;
101 /* This structure defines each of the firewall rules. Consists of 3
102 parts which are 1) general IP header stuff 2) match specific
103 stuff 3) the target to perform if the rule matches */
104 struct ipt_entry {
105 struct ipt_ip ip;
106
107 /* Mark with fields that we care about. */
108 unsigned int nfcache;
109
110 /* Size of ipt_entry + matches */
111 __u16 target_offset;
112 /* Size of ipt_entry + matches + target */
113 __u16 next_offset;
114
115 /* Back pointer */
116 unsigned int comefrom;
117
118 /* Packet and byte counters. */
119 struct xt_counters counters;
120
121 /* The matches (if any), then the target. */
122 unsigned char elems[0];
123 };
struct iphdr()
85 struct iphdr {
86 #if defined(__LITTLE_ENDIAN_BITFIELD)
87 __u8 ihl:4,
88 version:4;
89 #elif defined (__BIG_ENDIAN_BITFIELD)
90 __u8 version:4,
91 ihl:4;
92 #else
93 #error "Please fix <asm/byteorder.h>"
94 #endif
95 __u8 tos;
96 __be16 tot_len;
97 __be16 id;
98 __be16 frag_off;
99 __u8 ttl;
100 __u8 protocol;
101 __sum16 check;
102 __be32 saddr;
103 __be32 daddr;
104 /*The options start here. */
105 };
struct xt_action_param(include/net/netfilter/x_tables.h)
9 /**
10 * struct xt_action_param - parameters for matches/targets
11 *
12 * @match: the match extension
13 * @target: the target extension
14 * @matchinfo: per-match data
15 * @targetinfo: per-target data
16 * @net network namespace through which the action was invoked
17 * @in: input netdevice
18 * @out: output netdevice
19 * @fragoff: packet is a fragment, this is the data offset
20 * @thoff: position of transport header relative to skb->data
21 * @hook: hook number given packet came from
22 * @family: Actual NFPROTO_* through which the function is invoked
23 * (helpful when match->family == NFPROTO_UNSPEC)
24 *
25 * Fields written to by extensions:
26 *
27 * @hotdrop: drop packet if we had inspection problems
28 */
29 struct xt_action_param {
30 union {
31 const struct xt_match *match;
32 const struct xt_target *target;
33 };
34 union {
35 const void *matchinfo, *targinfo;
36 };
37 struct net *net;
38 const struct net_device *in, *out;
39 int fragoff;
40 unsigned int thoff;
41 unsigned int hooknum;
42 u_int8_t family;
43 bool hotdrop;
44 };
struct nf_hook_state(include/linux/netfilter.h)
50 struct nf_hook_state {
51 unsigned int hook; // 当前要被那个hook点处理
52 int thresh;
53 u_int8_t pf;
54 struct net_device *in;
55 struct net_device *out;
56 struct sock *sk;
57 struct net *net;
58 struct list_head *hook_list;
59 int (*okfn)(struct net *, struct sock *, struct sk_buff *);
60 };
二、ipt_do_table()函数(net/ipv4/netfilter/ip_tables.c)
285 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
286 unsigned int
287 ipt_do_table(struct sk_buff *skb,
288 const struct nf_hook_state *state,
289 struct xt_table *table)
290 {
291 unsigned int hook = state->hook;
292 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
293 const struct iphdr *ip;
294 /* Initializing verdict to NF_DROP keeps gcc happy. */
295 unsigned int verdict = NF_DROP;
296 const char *indev, *outdev;
297 const void *table_base;
298 struct ipt_entry *e, **jumpstack;
299 unsigned int stackidx, cpu;
300 const struct xt_table_info *private;
301 struct xt_action_param acpar;
302 unsigned int addend;
303
304 /* Initialization */
305 stackidx = 0;
306 ip = ip_hdr(skb); // 获取skb中网络层的相关信息
307 indev = state->in ? state->in->name : nulldevname;
308 outdev = state->out ? state->out->name : nulldevname;
309 /* We handle fragments by dealing with the first fragment as
310 * if it was a normal packet. All other fragments are treated
311 * normally, except that they will NEVER match rules that ask
312 * things we don't know, ie. tcp syn flag or ports). If the
313 * rule is also a fragment-specific rule, non-fragments won't
314 * match it. */
315 acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
316 acpar.thoff = ip_hdrlen(skb);
317 acpar.hotdrop = false;
318 acpar.net = state->net;
319 acpar.in = state->in;
320 acpar.out = state->out;
321 acpar.family = NFPROTO_IPV4;
322 acpar.hooknum = hook;
323
324 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); // 判断要处理的hook点是否在表的有效范围之内
325 local_bh_disable();
326 addend = xt_write_recseq_begin();
327 private = table->private;
328 cpu = smp_processor_id();
329 /*
330 * Ensure we load private-> members after we've fetched the base
331 * pointer.
332 */
333 smp_read_barrier_depends();
334 table_base = private->entries;
335 jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
336
337 /* Switch to alternate jumpstack if we're being invoked via TEE.
338 * TEE issues XT_CONTINUE verdict on original skb so we must not
339 * clobber the jumpstack.
340 *
341 * For recursion via REJECT or SYNPROXY the stack will be clobbered
342 * but it is no problem since absolute verdict is issued by these.
343 */
344 if (static_key_false(&xt_tee_enabled))
345 jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
346
347 e = get_entry(table_base, private->hook_entry[hook]); // 获取hook点的ipt_entry
348
349 pr_debug("Entering %s(hook %u), UF %p\n",
350 table->name, hook,
351 get_entry(table_base, private->underflow[hook]));
352
353 do {
354 const struct xt_entry_target *t;
355 const struct xt_entry_match *ematch;
356 struct xt_counters *counter;
357
358 IP_NF_ASSERT(e);
359 if (!ip_packet_match(ip, indev, outdev,
360 &e->ip, acpar.fragoff)) { // 对标准match,从sk_buff和ipt_entry及从nf_hook_state中获取的IP/设备等相关信息进行匹配
361 no_match:
362 e = ipt_next_entry(e); // 利用ipt_entry的next_offset获取下一个ipt_entry
363 continue;
364 }
365 // 对存在的扩展extend match进行匹配
366 xt_ematch_foreach(ematch, e) {
367 acpar.match = ematch->u.kernel.match;
368 acpar.matchinfo = ematch->data;
369 if (!acpar.match->match(skb, &acpar))
370 goto no_match;
371 }
372
373 counter = xt_get_this_cpu_counter(&e->counters);
374 ADD_COUNTER(*counter, skb->len, 1);
375
376 t = ipt_get_target(e); // 根据ipt_entry中的target_offset获取该ipt_entry对应的target
377 IP_NF_ASSERT(t->u.kernel.target);
378
379 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
380 /* The packet is traced: log it */
381 if (unlikely(skb->nf_trace))
382 trace_packet(state->net, skb, hook, state->in,
383 state->out, table->name, private, e);
384 #endif
385 /* Standard target? */
386 if (!t->u.kernel.target->target) { // 如果xt_entry_target->u.kernel.target->target == NULL 说明了是标准target
387 int v;
388
389 v = ((struct xt_standard_target *)t)->verdict;
390 if (v < 0) {
391 /* Pop from stack? */
392 if (v != XT_RETURN) { // 不是XT_RETURN时,则无需重新执行该hook函数,程序返回
393 verdict = (unsigned int)(-v) - 1;
394 break;
395 }
396 if (stackidx == 0) {
397 e = get_entry(table_base,
398 private->underflow[hook]);
399 pr_debug("Underflow (this is normal) "
400 "to %p\n", e);
401 } else {
402 e = jumpstack[--stackidx];
403 pr_debug("Pulled %p out from pos %u\n",
404 e, stackidx);
405 e = ipt_next_entry(e);
406 }
407 continue;
408 }
409 if (table_base + v != ipt_next_entry(e) &&
410 !(e->ip.flags & IPT_F_GOTO)) {
411 if (unlikely(stackidx >= private->stacksize)) {
412 verdict = NF_DROP;
413 break;
414 }
415 jumpstack[stackidx++] = e;
416 pr_debug("Pushed %p into pos %u\n",
417 e, stackidx - 1);
418 }
419
420 e = get_entry(table_base, v);
421 continue;
422 }
423
424 acpar.target = t->u.kernel.target;
425 acpar.targinfo = t->data;
426 // 当是扩展target时,需要调用t->u.kernel.target->target,执行扩展的target操作,并返回结果。
427 verdict = t->u.kernel.target->target(skb, &acpar);
428 /* Target might have changed stuff. */
429 ip = ip_hdr(skb);
// 当扩展target的执行结果为XT_CONTINUE时,需要取下一条规则继续执行
430 if (verdict == XT_CONTINUE)
431 e = ipt_next_entry(e);
432 else
433 /* Verdict */
434 break;
435 } while (!acpar.hotdrop);
436 pr_debug("Exiting %s; sp at %u\n", __func__, stackidx);
437
438 xt_write_recseq_end(addend);
439 local_bh_enable();
440
441 #ifdef DEBUG_ALLOW_ALL
442 return NF_ACCEPT;
443 #else
444 if (acpar.hotdrop)
445 return NF_DROP;
446 else return verdict;
447 #endif
448 }
ip_packet_match(net/ipv4/netfilter/ip_tables.c)
72 /* Returns whether matches rule or not. */
73 /* Performance critical - called for every packet */
74 static inline bool
75 ip_packet_match(const struct iphdr *ip,
76 const char *indev,
77 const char *outdev,
78 const struct ipt_ip *ipinfo,
79 int isfrag)
80 {
81 unsigned long ret;
82
83 #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
84
85 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
86 IPT_INV_SRCIP) ||
87 FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
88 IPT_INV_DSTIP)) {
89 dprintf("Source or dest mismatch.\n");
90
91 dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
92 &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr,
93 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
94 dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n",
95 &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr,
96 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
97 return false;
98 }
99
100 ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
101
102 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
103 dprintf("VIA in mismatch (%s vs %s).%s\n",
104 indev, ipinfo->iniface,
105 ipinfo->invflags & IPT_INV_VIA_IN ? " (INV)" : "");
106 return false;
107 }
108
109 ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
110
111 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
112 dprintf("VIA out mismatch (%s vs %s).%s\n",
113 outdev, ipinfo->outiface,
114 ipinfo->invflags & IPT_INV_VIA_OUT ? " (INV)" : "");
115 return false;
116 }
117
118 /* Check specific protocol */
119 if (ipinfo->proto &&
120 FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
121 dprintf("Packet protocol %hi does not match %hi.%s\n",
122 ip->protocol, ipinfo->proto,
123 ipinfo->invflags & IPT_INV_PROTO ? " (INV)" : "");
124 return false;
125 }
126
127 /* If we have a fragment rule but the packet is not a fragment
128 * then we return zero */
129 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
130 dprintf("Fragment rule but not fragment.%s\n",
131 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
132 return false;
133 }
134
135 return true;
136 }