Netfilter的基本结构
(1)从IP包的收发流程中可以看到,当IP包接收器(ip_rcv)接收到正确的ip包后,首先注入“路由前过滤器”(NF_IP_PRE_ROUTING),路由前过滤器(NF_IP_PRE_ROUTING)的输出(ip_rcv_finishi)包经过绑定路由和编译ip选项区之后输入到路由入口(dst->input)。对于本地接收包来说,路由入口指向ip本地分发器(ip_local_deliver),本地分发器首先进行IP碎片重组。然后将IP合成包注入“本地输入过滤器”(NF_IP_LOCAL_IN),本地输入过滤器的输出(ip_local_deliver_finish)包进一步传递到IP传输协议控制器(igmp_rcv,tcp_v4_rcv,udp_rcv,icmp_rcv),最后包被投送到各个套接字的接收队列中。对于转发包来说,路由入口将指向IP转发器(ip_forward),转发器对IP包预处理后注入“IP转发过滤器”(NF_IP_FORWARD),转发过滤器的输出(ip_forward_finish)包经过分片处理后通过路由出口(dsr->output)注入“IP路由后过滤器”(NF_IP_POST_ROUTING),路由过后过滤器的输出(ip_finish_output2)包通过邻居出口(dst->neighbour->output)创建硬件帧头调用neigh_resolve_output()输出或在ip_finish_output2中直接创建帧头从dst->hh->hh_output输出。输入到包调度器(dev_queue_xmit)中进行排队发射。
对于本地发送包来说,IP包生成器产生的输出包首先注入“本地输出过滤器”(NF_IP_LOCAL_OUT)
本地过滤器的输出(output_maybe_reroute)包经过分片处理(ip_fragment)后再通过路由出口从dst->output将包发送出去。
(2)nf_hooks_ops结构描述,系统中所有的网络包过滤器都登记在网络过滤器的钩链数组(nf_hooks)中,它是用网络协议族编号和过滤器编号索引的二维链头(list_head)结构数组,数组中的每一元素代表一种过滤器链。网络过滤器以NF_HOOK(family,num,skb,indev,outdev,output)宏定义插入到代码中。当执行到NF_HOOK()时, 如果nf_hooks[family][num]过滤器链非空,系统将调用nf_hook_slow(family,num,skb,indev,outdev,output)将包依次传递给过滤链中的过滤函数。在每一个过滤环节上, 如果过滤函数返回NF_ACCEPT, 则过滤包允许输出。这时过滤包将注入下一链节过滤器或者调用output参数函数将过滤包输出。如果过滤函数返回NF_DROP, 则过滤过程被终止, 过滤包禁止输出并被释放。如果过滤函数返回NF_STOLEN, 说明输入包已被转移走, 终止过滤无输出。如果过滤函数返回NF_REPEAT, 过滤包将重新过滤一次. 如果过滤函数返回NF_QUEUE。则当前过滤参数和输入包将被传递到过滤器协议族的过滤队列中进行处理(queue_handler)。
include/linux/netfilter.h:
#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) /
(list_empty(&nf_hooks[(pf)][(hook)]) /
? (okfn)(skb) /
: nf_hook_slow((pf), (hook), (skb), (indev), (outdev), (okfn)))
#endif
/* Largest hook number + 1 */
#define NF_MAX_HOOKS 8
typedef unsigned int nf_hookfn(unsigned int hooknum,
struct sk_buff **skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *));
struct nf_hook_ops
{
struct list_head list;
/* User fills in from here down. */
nf_hookfn *hook; 过滤函数
int pf; 过滤器的协议族
int hooknum; 过滤器编号
/* Hooks are ordered in ascending priority. */
int priority; 过滤器优先级, 同一协议族的过滤器以priority值作升序排列
};
/* Each queued (to userspace) skbuff has one of these. */
struct nf_info
{
/* The ops struct which sent us to userspace. */
struct nf_hook_ops *elem;
/* If we're sent to userspace, this keeps housekeeping info */
int pf;
unsigned int hook;
struct net_device *indev, *outdev;
int (*okfn)(struct sk_buff *);
};
; net/core/netfilter.c:
struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
/*
* A queue handler may be registered for each protocol. Each is protected by
* long term mutex. The handler must provide an an outfn() to accept packets
* for queueing and must reinject all packets it receives, no matter what.
*/
static struct nf_queue_handler_t {
nf_queue_outfn_t outfn;
void *data;
} queue_handler[NPROTO];
typedef int (*nf_queue_outfn_t)(struct sk_buff *skb,
struct nf_info *info, void *data);
; net/core/netfilter.c:
int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *))
{
struct list_head *elem;
unsigned int verdict;
int ret = 0;
/* We may already have this, but read-locks nest anyway */
br_read_lock_bh(BR_NETPROTO_LOCK);
#ifdef CONFIG_NETFILTER_DEBUG
if (skb->nf_debug & (1 << hook)) {
printk("nf_hook: hook %i already set./n", hook);
nf_dump_skb(pf, skb);
}
skb->nf_debug |= (1 << hook);
#endif
elem = &nf_hooks[pf][hook];
verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
outdev, &elem, okfn);
if (verdict == NF_QUEUE) {
NFDEBUG("nf_hook: Verdict = QUEUE./n");
nf_queue(skb, elem, pf, hook, indev, outdev, okfn);
}
switch (verdict) {
case NF_ACCEPT:
ret = okfn(skb);
break;
case NF_DROP:
kfree_skb(skb);
ret = -EPERM;
break;
}
br_read_unlock_bh(BR_NETPROTO_LOCK);
return ret;
}
static unsigned int nf_iterate(struct list_head *head,
struct sk_buff **skb,
int hook,
const struct net_device *indev,
const struct net_device *outdev,
struct list_head **i,
int (*okfn)(struct sk_buff *)) 迭代过滤器
{
for (*i = (*i)->next; *i != head; *i = (*i)->next) { 扫描过滤器的环形链表
struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; 取过滤器结构
switch (elem->hook(hook, skb, indev, outdev, okfn)) { 调用过滤函数
case NF_QUEUE:
return NF_QUEUE;
case NF_STOLEN:
return NF_STOLEN;
case NF_DROP:
return NF_DROP;
case NF_REPEAT:
*i = (*i)->prev;
break;
#ifdef CONFIG_NETFILTER_DEBUG
case NF_ACCEPT:
break;
default:
NFDEBUG("Evil return from %p(%u)./n",
elem->hook, hook);
#endif
}
}
return NF_ACCEPT;
}
/*
* Any packet that leaves via this function must come back
* through nf_reinject().
*/
static void nf_queue(struct sk_buff *skb,
struct list_head *elem,
int pf, unsigned int hook,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *))
{
int status;
struct nf_info *info;
if (!queue_handler[pf].outfn) {
kfree_skb(skb);
return;
}
info = kmalloc(sizeof(*info), GFP_ATOMIC);
if (!info) {
if (net_ratelimit())
printk(KERN_ERR "OOM queueing packet %p/n",
skb);
kfree_skb(skb);
return;
}
*info = (struct nf_info) {
(struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn }; 保存过滤参数
/* Bump dev refs so they don't vanish while packet is out */
if (indev) dev_hold(indev);
if (outdev) dev_hold(outdev);
status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
if (status < 0) {
/* James M doesn't say [censored] enough. */
if (indev) dev_put(indev);
if (outdev) dev_put(outdev);
kfree(info);
kfree_skb(skb);
return;
}
}
void nf_reinject(struct sk_buff *skb, struct nf_info *info,
unsigned int verdict) 将包和它的过滤参数重新注入过滤器
{
struct list_head *elem = &info->elem->list;
struct list_head *i;
/* We don't have BR_NETPROTO_LOCK here */
br_read_lock_bh(BR_NETPROTO_LOCK);
for (i = nf_hooks[info->pf][info->hook].next; i != elem; i = i->next) {
if (i == &nf_hooks[info->pf][info->hook]) {
/* The module which sent it to userspace is gone. */
NFDEBUG("%s: module disappeared, dropping packet./n",
__FUNCTION__);
verdict = NF_DROP;
break;
}
}
/* Continue traversal iff userspace said ok... */
if (verdict == NF_REPEAT) {
elem = elem->prev;
verdict = NF_ACCEPT;
}
if (verdict == NF_ACCEPT) {
verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
&skb, info->hook,
info->indev, info->outdev, &elem,
info->okfn);
}
switch (verdict) {
case NF_ACCEPT:
info->okfn(skb);
break;
case NF_QUEUE:
nf_queue(skb, elem, info->pf, info->hook,
info->indev, info->outdev, info->okfn);
break;
case NF_DROP:
kfree_skb(skb);
break;
}
br_read_unlock_bh(BR_NETPROTO_LOCK);
/* Release those devices we held, or Alexey will kill me. */
if (info->indev) dev_put(info->indev);
if (info->outdev) dev_put(info->outdev);
kfree(info);
return;
}
int nf_register_hook(struct nf_hook_ops *reg) 注册过滤器
{
struct list_head *i;
br_write_lock_bh(BR_NETPROTO_LOCK);
for (i = nf_hooks[reg->pf][reg->hooknum].next;
i != &nf_hooks[reg->pf][reg->hooknum];
i = i->next) {
if (reg->priority < ((struct nf_hook_ops *)i)->priority)
break;
}
list_add(®->list, i->prev);
br_write_unlock_bh(BR_NETPROTO_LOCK);
return 0;
}
void nf_unregister_hook(struct nf_hook_ops *reg)
{
br_write_lock_bh(BR_NETPROTO_LOCK);
list_del(®->list);
br_write_unlock_bh(BR_NETPROTO_LOCK);
}
int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
注册过滤队列处理器
{
int ret;
br_write_lock_bh(BR_NETPROTO_LOCK);
if (queue_handler[pf].outfn)
ret = -EBUSY;
else {
queue_handler[pf].outfn = outfn;
queue_handler[pf].data = data;
ret = 0;
}
br_write_unlock_bh(BR_NETPROTO_LOCK);
return ret;
}
/* The caller must flush their queue before this */
int nf_unregister_queue_handler(int pf)
{
br_write_lock_bh(BR_NETPROTO_LOCK);
queue_handler[pf].outfn = NULL;
queue_handler[pf].data = NULL;
br_write_unlock_bh(BR_NETPROTO_LOCK);
return 0;
}