继上一篇笔记防火墙之filter表(一)—AF_INET协议族,这篇笔记来看看AF_INET协议族的filter表的钩子的注册,以及防火墙检查时规则的遍历。涉及的核心代码文件有:
代码路径 | 说明 |
---|---|
net/ipv4/netfilter/iptable_filter.c | IPv4 filter表实现 |
1. HOOK的注册
static int __init iptable_filter_init(void)
{
...
/* Register hooks */
ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
if (ret < 0)
goto cleanup_table;
return ret;
...
}
没什么复杂的,直接调用Netfilter框架的钩子注册函数,关键是ipt_ops的定义。
static struct nf_hook_ops ipt_ops[] __read_mostly = {
{
.hook = ipt_hook,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_FILTER,
},
{
.hook = ipt_hook,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_FORWARD,
.priority = NF_IP_PRI_FILTER,
},
{
.hook = ipt_local_out_hook,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_FILTER,
},
};
注册了三个HOOK点:LOCAL_IN、LOCAL_OUT和FORWARD,对应的钩子函数分别为ipt_hook()、ipt_hook()和ipt_local_out_hook()。这三个钩子的优先级都是NF_IP_PRI_FILTER(值为0)。
2. 钩子函数:ipt_hook()
如上,当从LOCAL_IN和FORWARD两个HOOK点通过filter表时,入口函数就是ipt_hook(),可以想象得到,该函数要执行的动作就是找到filter表中对应HOOK点上挂的规则,然后逐条匹配处理。
/* The work comes in here from netfilter.c. */
static unsigned int ipt_hook(unsigned int hook,
struct sk_buff *skb, const struct net_device *in,
const struct net_device *out, int (*okfn)(struct sk_buff *))
{
//指定处理filter表
return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter);
}
2.1 ipt_do_table()
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int ipt_do_table(struct sk_buff *skb, unsigned int hook,
const struct net_device *in, const struct net_device *out,
struct xt_table *table)
{
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
u_int16_t offset;
struct iphdr *ip;
u_int16_t datalen;
bool hotdrop = false;
/* Initializing verdict to NF_DROP keeps gcc happy. */
unsigned int verdict = NF_DROP;
const char *indev, *outdev;
void *table_base;
struct ipt_entry *e, *back;
struct xt_table_info *private;
/* Initialization */
ip = ip_hdr(skb);
datalen = skb->len - ip->ihl * 4;
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
/* We handle fragments by dealing with the first fragment as
* if it was a normal packet. All other fragments are treated
* normally, except that they will NEVER match rules that ask
* things we don't know, ie. tcp syn flag or ports). If the
* rule is also a fragment-specific rule, non-fragments won't
* match it. */
offset = ntohs(ip->frag_off) & IP_OFFSET;
read_lock_bh(&table->lock);
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
//找到struct xt_table_info
private = table->private;
//找到本地CPU的第一条规则入口
table_base = (void *)private->entries[smp_processor_id()];
//hook_entry[]中记录的是各个HOOK点第一条规则距离表头的偏移,
//e指向该hook点上的第一条规则
e = get_entry(table_base, private->hook_entry[hook]);
//back初始化为HOOK点的最后一条规则,即该HOOK点的默认策略
back = get_entry(table_base, private->underflow[hook]);
do {
IP_NF_ASSERT(e);
IP_NF_ASSERT(back);
//ip_packet_match()对skb进行基本条件匹配,如果ip地址、网卡
if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
struct ipt_entry_target *t;
//do_match()进行扩展match的匹配。具体match可以通过对hotdrop赋值为true,
//结束整个遍历过程
if (IPT_MATCH_ITERATE(e, do_match, skb, in, out, offset, &hotdrop) != 0)
goto no_match;
//所有匹配都已命中,累加计数器
ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
//获取本条规则的target
t = ipt_get_target(e);
IP_NF_ASSERT(t->u.kernel.target);
//xt_target中的target回调函数指针为空表示是标准target
if (!t->u.kernel.target->target) {
int v = ((struct ipt_standard_target *)t)->verdict;
if (v < 0) {
//verdict小于0,并且不是IPT_RETURN,说明属于ACCEPT或者DROP,
//这已经是一种判决结果了,结束整个表在该HOOK点的处理过程
if (v != IPT_RETURN) {
verdict = (unsigned)(-v) - 1;
break;
}
//verdict是IPT_RETURN,说明是自定义链的返回。
//这里,指针back和comefrom实现了一种类似于栈的操作,back为栈顶元素
//栈中的每个元素用规则中的comefrom指向
e = back;
back = get_entry(table_base, back->comefrom);
continue;
}
//target是自定义链,back入栈,然后跳转到自定义链的规则入口
if (table_base + v != (void *)e + e->next_offset &&
!(e->ip.flags & IPT_F_GOTO)) {
/* Save old back ptr in next entry */
struct ipt_entry *next = (void *)e + e->next_offset;
next->comefrom = (void *)back - table_base;
/* set back pointer to next entry */
back = next;
}
//对于自定义链,v大于0,保存的是该自定义链距离table中第一条规则的偏移
e = get_entry(table_base, v);
} else {
//这是一个扩展target,直接调用其target()回调
/* Targets which reenter must return abs. verdicts */
verdict = t->u.kernel.target->target(skb, in, out, hook,
t->u.kernel.target, t->data);
/* Target might have changed stuff. */
ip = ip_hdr(skb);
datalen = skb->len - ip->ihl * 4;
//如果target的返回结果是IPT_CONTINUE,那么会继续遍历下一条,
//否则结束整个table的变量
if (verdict == IPT_CONTINUE)
e = (void *)e + e->next_offset;
else
/* Verdict */
break;
}
} else {
no_match:
//不匹配,继续下一条规则
e = (void *)e + e->next_offset;
}
} while (!hotdrop);//如果中间有match设置了hotdrop为true,那么可以提前结束遍历
read_unlock_bh(&table->lock);
//hotdrop设置为true,会最终导致数据包被丢弃
if (hotdrop)
return NF_DROP;
else
return verdict;
}
2.1.1 基本匹配ip_packet_match()
该函数根据skb头部和struct ipt_entry中的struct ipt_ip结构进行比较,基本意思就是逐个比较。
/*
We keep a set of rules for each CPU, so we can avoid write-locking
them in the softirq when updating the counters and therefore
only need to read-lock in the softirq; doing a write_lock_bh() in user
context stops packets coming through and allows user context to read
the counters or update the rules.
Hence the start of any table is given by get_table() below. */
/* Returns whether matches rule or not. */
/* Performance critical - called for every packet */
static inline bool ip_packet_match(const struct iphdr *ip, const char *indev,
const char *outdev, const struct ipt_ip *ipinfo, int isfrag)
{
size_t i;
unsigned long ret;
#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
IPT_INV_SRCIP)
|| FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
IPT_INV_DSTIP)) {
dprintf("Source or dest mismatch.\n");
return false;
}
/* Look for ifname matches; this should unroll nicely. */
for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
ret |= (((const unsigned long *)indev)[i]
^ ((const unsigned long *)ipinfo->iniface)[i])
& ((const unsigned long *)ipinfo->iniface_mask)[i];
}
if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
dprintf("VIA in mismatch (%s vs %s).%s\n",
indev, ipinfo->iniface,
ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
return false;
}
for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
ret |= (((const unsigned long *)outdev)[i]
^ ((const unsigned long *)ipinfo->outiface)[i])
& ((const unsigned long *)ipinfo->outiface_mask)[i];
}
if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
dprintf("VIA out mismatch (%s vs %s).%s\n",
outdev, ipinfo->outiface,
ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
return false;
}
/* Check specific protocol */
if (ipinfo->proto
&& FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
dprintf("Packet protocol %hi does not match %hi.%s\n",
ip->protocol, ipinfo->proto,
ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
return false;
}
/* If we have a fragment rule but the packet is not a fragment
* then we return zero */
if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
dprintf("Fragment rule but not fragment.%s\n",
ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
return false;
}
return true;
}
2.1.2 扩展匹配do_match()
/* Performance critical - called for every packet */
static inline bool
do_match(struct ipt_entry_match *m, const struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int offset, bool *hotdrop)
{
//直接调用扩展匹配的match()回调函数
if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
offset, ip_hdrlen(skb), hotdrop))
return true;
else
return false;
}
3. 钩子函数ipt_local_out_hook()
static unsigned int ipt_local_out_hook(unsigned int hook, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* root is playing with raw sockets. */
if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("iptable_filter: ignoring short SOCK_RAW packet.\n");
return NF_ACCEPT;
}
//依然是调用ipt_do_table()执行遍历操作
return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter);
}