ipt_do_table函数

一、使用结构体

struct ipt_ip((include/uapi/linux/netfilter_ipv4/ip_tables.h))。

 68 /* Yes, Virginia, you have to zero the padding. */
 69 struct ipt_ip {
 70     /* Source and destination IP addr */
 71     struct in_addr src, dst;
 72     /* Mask for src and dest IP addr */
 73     struct in_addr smsk, dmsk;
 74     char iniface[IFNAMSIZ], outiface[IFNAMSIZ];
 75     unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ];
 76 
 77     /* Protocol, 0 = ANY */
 78     __u16 proto;
 79 
 80     /* Flags word */
 81     __u8 flags;     /* 标志字段 */
 82     /* Inverse flags */
 83     __u8 invflags;   /* 取反标志 */
 84 };

struct ipt_entry(include/uapi/linux/netfilter_ipv4/ip_tables.h)。其中,
target_offset: target区的偏移,通常target区位于match区之后,而match区则在ipt_entry的末尾;
初始化为sizeof(struct ipt_entry),即假定没有match;
next_offset:下一条规则相对于本规则的偏移,也即本规则所用空间的总和,
初始化为sizeof(struct ipt_entry)+sizeof(struct ipt_target),即假定没有match;
comefrom:位向量,标记调用本规则的HOOK号,可用于检查规则的有效性;

101 /* This structure defines each of the firewall rules.  Consists of 3
102    parts which are 1) general IP header stuff 2) match specific
103    stuff 3) the target to perform if the rule matches */
104 struct ipt_entry {
105     struct ipt_ip ip;
106 
107     /* Mark with fields that we care about. */
108     unsigned int nfcache;
109 
110     /* Size of ipt_entry + matches */
111     __u16 target_offset;
112     /* Size of ipt_entry + matches + target */
113     __u16 next_offset;
114 
115     /* Back pointer */
116     unsigned int comefrom;
117 
118     /* Packet and byte counters. */
119     struct xt_counters counters;
120 
121     /* The matches (if any), then the target. */
122     unsigned char elems[0];
123 };

struct iphdr()

 85 struct iphdr {
 86 #if defined(__LITTLE_ENDIAN_BITFIELD)
 87     __u8    ihl:4,
 88         version:4;
 89 #elif defined (__BIG_ENDIAN_BITFIELD)
 90     __u8    version:4,
 91         ihl:4;
 92 #else
 93 #error  "Please fix <asm/byteorder.h>"
 94 #endif
 95     __u8    tos;
 96     __be16  tot_len;
 97     __be16  id;
 98     __be16  frag_off;
 99     __u8    ttl;
100     __u8    protocol;
101     __sum16 check;
102     __be32  saddr;
103     __be32  daddr;
104     /*The options start here. */
105 };

struct xt_action_param(include/net/netfilter/x_tables.h)

  9 /**
 10  * struct xt_action_param - parameters for matches/targets
 11  *
 12  * @match:  the match extension
 13  * @target: the target extension
 14  * @matchinfo:  per-match data
 15  * @targetinfo: per-target data
 16  * @net     network namespace through which the action was invoked
 17  * @in:     input netdevice
 18  * @out:    output netdevice
 19  * @fragoff:    packet is a fragment, this is the data offset
 20  * @thoff:  position of transport header relative to skb->data
 21  * @hook:   hook number given packet came from
 22  * @family: Actual NFPROTO_* through which the function is invoked
 23  *      (helpful when match->family == NFPROTO_UNSPEC)
 24  *
 25  * Fields written to by extensions:
 26  *
 27  * @hotdrop:    drop packet if we had inspection problems
 28  */
 29 struct xt_action_param {
 30     union {
 31         const struct xt_match *match;
 32         const struct xt_target *target;
 33     };
 34     union {
 35         const void *matchinfo, *targinfo;
 36     };
 37     struct net *net;
 38     const struct net_device *in, *out;
 39     int fragoff;
 40     unsigned int thoff;
 41     unsigned int hooknum;
 42     u_int8_t family;
 43     bool hotdrop;
 44 };

struct nf_hook_state(include/linux/netfilter.h)

 50 struct nf_hook_state {
 51     unsigned int hook;  // 当前要被那个hook点处理
 52     int thresh;
 53     u_int8_t pf;
 54     struct net_device *in;
 55     struct net_device *out;
 56     struct sock *sk;
 57     struct net *net;
 58     struct list_head *hook_list;
 59     int (*okfn)(struct net *, struct sock *, struct sk_buff *);
 60 };

二、ipt_do_table()函数(net/ipv4/netfilter/ip_tables.c)

 285 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 286 unsigned int
 287 ipt_do_table(struct sk_buff *skb,
 288          const struct nf_hook_state *state,
 289          struct xt_table *table)
 290 {
 291     unsigned int hook = state->hook;
 292     static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 293     const struct iphdr *ip;
 294     /* Initializing verdict to NF_DROP keeps gcc happy. */
 295     unsigned int verdict = NF_DROP;
 296     const char *indev, *outdev;
 297     const void *table_base;
 298     struct ipt_entry *e, **jumpstack;
 299     unsigned int stackidx, cpu;
 300     const struct xt_table_info *private;
 301     struct xt_action_param acpar;
 302     unsigned int addend;
 303 
 304     /* Initialization */
 305     stackidx = 0;
 306     ip = ip_hdr(skb); // 获取skb中网络层的相关信息
 307     indev = state->in ? state->in->name : nulldevname;
 308     outdev = state->out ? state->out->name : nulldevname;
 309     /* We handle fragments by dealing with the first fragment as
 310      * if it was a normal packet.  All other fragments are treated
 311      * normally, except that they will NEVER match rules that ask
 312      * things we don't know, ie. tcp syn flag or ports).  If the
 313      * rule is also a fragment-specific rule, non-fragments won't
 314      * match it. */
 315     acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
 316     acpar.thoff   = ip_hdrlen(skb);
 317     acpar.hotdrop = false;
 318     acpar.net     = state->net;
 319     acpar.in      = state->in;
 320     acpar.out     = state->out;
 321     acpar.family  = NFPROTO_IPV4;
 322     acpar.hooknum = hook;
 323 
 324     IP_NF_ASSERT(table->valid_hooks & (1 << hook));  // 判断要处理的hook点是否在表的有效范围之内
 325     local_bh_disable();
 326     addend = xt_write_recseq_begin();
 327     private = table->private;
 328     cpu        = smp_processor_id();
 329     /*
 330      * Ensure we load private-> members after we've fetched the base
 331      * pointer.
 332      */
 333     smp_read_barrier_depends();
 334     table_base = private->entries;
 335     jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
 336 
 337     /* Switch to alternate jumpstack if we're being invoked via TEE.
 338      * TEE issues XT_CONTINUE verdict on original skb so we must not
 339      * clobber the jumpstack.
 340      *
 341      * For recursion via REJECT or SYNPROXY the stack will be clobbered
 342      * but it is no problem since absolute verdict is issued by these.
 343      */
 344     if (static_key_false(&xt_tee_enabled))
 345         jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
 346 
 347     e = get_entry(table_base, private->hook_entry[hook]);  // 获取hook点的ipt_entry
 348 
 349     pr_debug("Entering %s(hook %u), UF %p\n",
 350          table->name, hook,
 351          get_entry(table_base, private->underflow[hook]));
 352 
 353     do {
 354         const struct xt_entry_target *t;
 355         const struct xt_entry_match *ematch;
 356         struct xt_counters *counter;
 357 
 358         IP_NF_ASSERT(e);
 359         if (!ip_packet_match(ip, indev, outdev,
 360             &e->ip, acpar.fragoff)) {    // 对标准match,从sk_buff和ipt_entry及从nf_hook_state中获取的IP/设备等相关信息进行匹配
 361  no_match:
 362             e = ipt_next_entry(e); // 利用ipt_entry的next_offset获取下一个ipt_entry
 363             continue;
 364         }
 365         // 对存在的扩展extend match进行匹配
 366         xt_ematch_foreach(ematch, e) {
 367             acpar.match     = ematch->u.kernel.match;
 368             acpar.matchinfo = ematch->data;
 369             if (!acpar.match->match(skb, &acpar))
 370                 goto no_match;
 371         }
 372 
 373         counter = xt_get_this_cpu_counter(&e->counters);
 374         ADD_COUNTER(*counter, skb->len, 1);
 375 
 376         t = ipt_get_target(e);  // 根据ipt_entry中的target_offset获取该ipt_entry对应的target
 377         IP_NF_ASSERT(t->u.kernel.target);
 378 
 379 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 380         /* The packet is traced: log it */
 381         if (unlikely(skb->nf_trace))
 382             trace_packet(state->net, skb, hook, state->in,
 383                      state->out, table->name, private, e);
 384 #endif
 385         /* Standard target? */
 386         if (!t->u.kernel.target->target) { // 如果xt_entry_target->u.kernel.target->target == NULL 说明了是标准target
 387             int v;
 388 
 389             v = ((struct xt_standard_target *)t)->verdict;
 390             if (v < 0) {
 391                 /* Pop from stack? */
 392                 if (v != XT_RETURN) {    // 不是XT_RETURN时,则无需重新执行该hook函数,程序返回
 393                     verdict = (unsigned int)(-v) - 1;
 394                     break;
 395                 }
 396                 if (stackidx == 0) {
 397                     e = get_entry(table_base,
 398                         private->underflow[hook]);
 399                     pr_debug("Underflow (this is normal) "
 400                          "to %p\n", e);
 401                 } else {
 402                     e = jumpstack[--stackidx];
 403                     pr_debug("Pulled %p out from pos %u\n",
 404                          e, stackidx);
 405                     e = ipt_next_entry(e);
 406                 }
 407                 continue;
 408             }
 409             if (table_base + v != ipt_next_entry(e) &&
 410                 !(e->ip.flags & IPT_F_GOTO)) {
 411                 if (unlikely(stackidx >= private->stacksize)) {
 412                     verdict = NF_DROP;
 413                     break;
 414                 }
 415                 jumpstack[stackidx++] = e;
 416                 pr_debug("Pushed %p into pos %u\n",
 417                      e, stackidx - 1);
 418             }
 419 
 420             e = get_entry(table_base, v);
 421             continue;
 422         }
 423 
 424         acpar.target   = t->u.kernel.target;
 425         acpar.targinfo = t->data;
 426 // 当是扩展target时,需要调用t->u.kernel.target->target,执行扩展的target操作,并返回结果。
 427         verdict = t->u.kernel.target->target(skb, &acpar);
 428         /* Target might have changed stuff. */
 429         ip = ip_hdr(skb);
            // 当扩展target的执行结果为XT_CONTINUE时,需要取下一条规则继续执行
 430         if (verdict == XT_CONTINUE)
 431             e = ipt_next_entry(e);
 432         else
 433             /* Verdict */
 434             break;
 435     } while (!acpar.hotdrop);
 436     pr_debug("Exiting %s; sp at %u\n", __func__, stackidx);
 437 
 438     xt_write_recseq_end(addend);
 439     local_bh_enable();
 440 
 441 #ifdef DEBUG_ALLOW_ALL
 442     return NF_ACCEPT;
 443 #else
 444     if (acpar.hotdrop)
 445         return NF_DROP;
 446     else return verdict;
 447 #endif
 448 }

ip_packet_match(net/ipv4/netfilter/ip_tables.c)

72 /* Returns whether matches rule or not. */
  73 /* Performance critical - called for every packet */
  74 static inline bool
  75 ip_packet_match(const struct iphdr *ip,
  76         const char *indev,
  77         const char *outdev,
  78         const struct ipt_ip *ipinfo,
  79         int isfrag)
  80 {
  81     unsigned long ret;
  82 
  83 #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
  84 
  85     if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
  86           IPT_INV_SRCIP) ||
  87         FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
  88           IPT_INV_DSTIP)) {
  89         dprintf("Source or dest mismatch.\n");
  90 
  91         dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
  92             &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr,
  93             ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
  94         dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n",
  95             &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr,
  96             ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
  97         return false;
  98     }
  99 
 100     ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
 101 
 102     if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
 103         dprintf("VIA in mismatch (%s vs %s).%s\n",
 104             indev, ipinfo->iniface,
 105             ipinfo->invflags & IPT_INV_VIA_IN ? " (INV)" : "");
 106         return false;
 107     }
 108 
 109     ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
 110 
 111     if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
 112         dprintf("VIA out mismatch (%s vs %s).%s\n",
 113             outdev, ipinfo->outiface,
 114             ipinfo->invflags & IPT_INV_VIA_OUT ? " (INV)" : "");
 115         return false;
 116     }
 117 
 118     /* Check specific protocol */
 119     if (ipinfo->proto &&
 120         FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
 121         dprintf("Packet protocol %hi does not match %hi.%s\n",
 122             ip->protocol, ipinfo->proto,
 123             ipinfo->invflags & IPT_INV_PROTO ? " (INV)" : "");
 124         return false;
 125     }
 126 
 127     /* If we have a fragment rule but the packet is not a fragment
 128      * then we return zero */
 129     if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
 130         dprintf("Fragment rule but not fragment.%s\n",
 131             ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
 132         return false;
 133     }
 134 
 135     return true;
 136 }

 

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值