Netfilter之框架初始化与对外接口

最新推荐文章于 2024-08-24 12:20:35 发布

fanxiaoyu321

最新推荐文章于 2024-08-24 12:20:35 发布

阅读量1.2k

点赞数

分类专栏： Netfilter Linux Netfilter代码分析文章标签： Netfilter NF_HOOK nf_hook_slow

本文链接：https://blog.csdn.net/xiaoyu_750516366/article/details/88775990

版权

Netfilter 同时被 2 个专栏收录

19 篇文章 11 订阅

订阅专栏

Linux Netfilter代码分析

19 篇文章 8 订阅

订阅专栏

本文详细介绍了Netfilter框架的核心数据结构，包括nf_hooks二维数组、struct nf_hook_ops和struct nf_sockopt_ops。接着，讨论了框架的初始化过程，如netfilter_queue_init()和netfilter_log_init()。文章还阐述了如何注册和去注册钩子及用户空间接口，以及如何遍历HOOK点，重点讲解了NF_HOOK系列函数和nf_hook_slow()的工作原理。

摘要由CSDN通过智能技术生成

从Makefile中也可以看的出来，Netfilter框架的核心功能由如下几个文件实现：

netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
obj-$(CONFIG_NETFILTER) = netfilter.o

其中core.c文件又是重中之重，log、queue和sockopt我们暂不关注，这篇笔记只是简单列举下，以后需要时再来详细分析。

1. 数据结构

1.1 nf_hooks

这是一个二维数组，数组成员就是链表元素。具体的协议族通过向Netfilter框架注册自己的HOOK钩子，实现自己具体的功能，该二维数组维护的就是各个协议族自己的注册信息。

从定义来看，当前预估可能的最多协议族为34个，并且每个协议族最多只能有8个HOOK点，当然每个HOOK点是可以挂很多的钩子函数，不然也不需要用链表实现了。

#define NPROTO		34		/* should be enough for now..	*/
/* Largest hook number + 1 */
#define NF_MAX_HOOKS 8
struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS] __read_mostly;
EXPORT_SYMBOL(nf_hooks);
static DEFINE_MUTEX(nf_hook_mutex);

1.2 struct nf_hook_ops

nf_hooks中每个钩子实际上是一个指向struct nf_hook_ops结构的指针。

//钩子函数可能的返回值，各个返回值的含义见下面nf_hook_slow()的分析
#define NF_DROP 0
#define NF_ACCEPT 1
#define NF_STOLEN 2
#define NF_QUEUE 3
#define NF_REPEAT 4
#define NF_STOP 5
#define NF_MAX_VERDICT NF_STOP
//钩子函数原型
typedef unsigned int nf_hookfn(unsigned int hooknum, struct sk_buff *skb,
		const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *));

struct nf_hook_ops
{
	//同一协议族同一HOOK点的钩子用链表组织起来
	struct list_head list;
	//钩子函数、协议族、HOOK点、优先级
	nf_hookfn *hook;
	struct module *owner;
	int pf;
	int hooknum;
	/* Hooks are ordered in ascending priority. */
	int priority;
};

所以，Netfilter框架对系统中所有协议族的钩子函数的组织用下图表示：
在这里插入图片描述

1.3 sruct nf_sockopt_ops

内核和用户空间的iptables工具是通过系统调用setsockopt()和getsockopt()进行交互的，不同的协议族有各自的数据结构以及独特的处理，但是作为框架，Netfilter需要给各个协议族提供一个统一的入口，该机制的核心数据结构就是sruct nf_sockopt_ops。

各个协议族向Netfilter框架注册这样的对象，框架收到socket选项调用时根据注册信息进行命令的分发处理。

struct nf_sockopt_ops
{
	struct list_head list;
	int pf;
	/* Non-inclusive ranges: use 0/0/NULL to never get called. */
	//SET命令的最小值和最大值
	int set_optmin;
	int set_optmax;
	//执行SET命令的接口
	int (*set)(struct sock *sk, int optval, void __user *user, unsigned int len);
	int (*compat_set)(struct sock *sk, int optval, void __user *user, unsigned int len);
	//GET命令的最小值和最大值
	int get_optmin;
	int get_optmax;
	//执行GET命令的接口
	int (*get)(struct sock *sk, int optval, void __user *user, int *len);
	int (*compat_get)(struct sock *sk, int optval, void __user *user, int *len);
	/* Use the module struct to lock set/get code in place */
	struct module *owner;
};

2. 初始化

netfilter框架的初始化是在开机过程中的socket_init()中被调用的。

static int __init sock_init(void)
{
...
//要支持Netfilter，必须开启开关CONFIG_NETFILTER
#ifdef CONFIG_NETFILTER
	netfilter_init();
#endif
	return 0;
}
core_initcall(sock_init);	/* early initcall */

void __init netfilter_init(void)
{
	//初始化nf_hooks的链表头结构
	int i, h;
	for (i = 0; i < NPROTO; i++) {
		for (h = 0; h < NF_MAX_HOOKS; h++)
			INIT_LIST_HEAD(&nf_hooks[i][h]);
	}
#ifdef CONFIG_PROC_FS
	//在/proc/sys/net/目录下创建netfilter子目录
	proc_net_netfilter = proc_mkdir("netfilter", init_net.proc_net);
	if (!proc_net_netfilter)
		panic("cannot create netfilter proc entry");
#endif
	if (netfilter_queue_init() < 0)
		panic("cannot initialize nf_queue");
	if (netfilter_log_init() < 0)
		panic("cannot initialize nf_log");
}

2.1 netfilter_queue_init()

int __init netfilter_queue_init(void)
{
#ifdef CONFIG_PROC_FS
	//创建/proc/sys/net/netfilter/nf_queue文件
	if (!proc_create("nf_queue", S_IRUGO, proc_net_netfilter, &nfqueue_file_ops))
		return -1;
#endif
	return 0;
}

2.2 netfilter_log_init()

int __init netfilter_log_init(void)
{
#ifdef CONFIG_PROC_FS
	//创建/proc/sys/net/netfilter/nf_log文件
	if (!proc_create("nf_log", S_IRUGO, proc_net_netfilter, &nflog_file_ops))
		return -1;
#endif
	return 0;
}

3. 对外接口

3.1 钩子的注册&去注册

具体的协议族要想使用Netfilter框架提供的接口，必须提前向框架注册自己，这个动作通常实在各个协议族自己的初始化过程中完成的，这里我们重点关注注册接口的内部实现。

int nf_register_hook(struct nf_hook_ops *reg)
{
	struct nf_hook_ops *elem;
	int err;

	err = mutex_lock_interruptible(&nf_hook_mutex);
	if (err < 0)
		return err;
	//同一个HOOK点的钩子是按照优先级大小排序的，优先级高的排在前面，这样遍历
	//钩子的时候会被先执行，数字越小，表示优先级越高
	list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
		if (reg->priority < elem->priority)
			break;
	}
	list_add_rcu(&reg->list, elem->list.prev);
	mutex_unlock(&nf_hook_mutex);
	return 0;
}
EXPORT_SYMBOL(nf_register_hook);

void nf_unregister_hook(struct nf_hook_ops *reg)
{
	mutex_lock(&nf_hook_mutex);
	list_del_rcu(&reg->list);
	mutex_unlock(&nf_hook_mutex);
	synchronize_net();
}
EXPORT_SYMBOL(nf_unregister_hook);

此外，也提供了nf_register_hooks()和nf_unregister_hooks()可以一次注册多个钩子。

3.2 用户空间接口的注册&去注册

Netfilter框架将所有协议族注册的struct nf_sockopt_ops使用全局链表保存在一起：

/* Sockopts only registered and called from user context, so
   net locking would be overkill.  Also, [gs]etsockopt calls may
   sleep. */
static DEFINE_MUTEX(nf_sockopt_mutex);
static LIST_HEAD(nf_sockopts);

注册代码如下：

/* Functions to register sockopt ranges (exclusive). */
int nf_register_sockopt(struct nf_sockopt_ops *reg)
{
	struct nf_sockopt_ops *ops;
	int ret = 0;

	if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0)
		return -EINTR;
	//遍历已经注册的接口，确保同一个协议族中注册的GET和SET命令编号
	//不能有重复，重复会导致命令的分发出现问题
	list_for_each_entry(ops, &nf_sockopts, list) {
		if (ops->pf == reg->pf
		    && (overlap(ops->set_optmin, ops->set_optmax,
				reg->set_optmin, reg->set_optmax)
			|| overlap(ops->get_optmin, ops->get_optmax,
				   reg->get_optmin, reg->get_optmax))) {
			NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
				ops->set_optmin, ops->set_optmax,
				ops->get_optmin, ops->get_optmax,
				reg->set_optmin, reg->set_optmax,
				reg->get_optmin, reg->get_optmax);
			ret = -EBUSY;
			goto out;
		}
	}
	//将注册接口链入全局链表
	list_add(&reg->list, &nf_sockopts);
out:
	mutex_unlock(&nf_sockopt_mutex);
	return ret;
}
EXPORT_SYMBOL(nf_register_sockopt);

去注册函数为nf_unregister_sockopt()，不再赘述。

4. 遍历HOOK点

钩子全部注册在了框架中，但是框架本身并不知道应该在何时何地触发某个HOOK点上钩子的执行，只有具体的协议族知道自己的HOOK点，为了尽可能的保持代码的简洁，框架提供了一组宏（也有部分接口）给协议族，协议族通过这些接口在合适的位置触发钩子的执行即可。

下面列出的都是开启了CONFIG_NETFILTER开关后的实现，不开启时根本没有钩子，所以要么为空，要么直接调用okfun().

4.1 NF_HOOK

#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
	NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN)

#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh)	       \
({int __ret;								       \
if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, thresh, 1)) == 1)\
	__ret = (okfn)(skb);						       \
__ret;})

4.2 NF_HOOK_COND

#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond)		       \
({int __ret;								       \
if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\
	__ret = (okfn)(skb);						       \
__ret;})

4.3 nf_hook()

static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb,
			  struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *))
{
	return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN, 1);
}

上面的这几个宏或者函数仅仅是包装函数，它们最终都会调用nf_hook_thresh()，它们的差异也很直观，就是参数thresh和cond不同而已，下面会看到这两个参数的作用。

4.4 nf_hook_thresh()

/**
 *	nf_hook_thresh - call a netfilter hook
 *	
 *	Returns 1 if the hook has allowed the packet to pass.  The function
 *	okfn must be invoked by the caller in this case.  Any other return
 *	value indicates the packet has been consumed by the hook.
 */
static inline int nf_hook_thresh(int pf, unsigned int hook, struct sk_buff *skb,
			struct net_device *indev, struct net_device *outdev,
			int (*okfn)(struct sk_buff *), int thresh, int cond)
{
	//如果条件不成立，那么直接返回1，这种情况下，上层包裹函数会调用okfn()，
	//所以cond的含义就是是否要执行该HOOK点上的钩子函数，只有条件条件成立
	//时才过这些钩子，否则直接认为已经通过了该HOOK点
	if (!cond)
		return 1;
	return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
}

4.5 nf_hook_slow()

从上面可以看到，真正检查数据包skb是否可以通过该HOOK点是通过nf_hook_slow()实现的，该函数返回1表示skb通过了该HOOK点，返回其它值表示skb已经被钩子消耗掉了。

返回非1的值时要特别注意，这时skb可能已经被释放了。

/* we overload the higher bits for encoding auxiliary data such as the queue
 * number. Not nice, but better than additional function arguments. */
#define NF_VERDICT_MASK 0x0000ffff
#define NF_VERDICT_BITS 16
/* Returns 1 if okfn() needs to be executed by the caller,
 * -EPERM for NF_DROP, 0 otherwise. */
int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
		 struct net_device *indev, struct net_device *outdev,
		 int (*okfn)(struct sk_buff *), int hook_thresh)
{
	struct list_head *elem;
	unsigned int verdict;
	int ret = 0;

	/* We may already have this, but read-locks nest anyway */
	rcu_read_lock();
	//根据协议族和HOOK点编号找到对应的钩子链表
	elem = &nf_hooks[pf][hook];
next_hook:
	//从elem开始遍历HOOK点上的钩子，根据钩子返回的判决结果进行下一步动作
	verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev, outdev, &elem, okfn, hook_thresh);
	if (verdict == NF_ACCEPT || verdict == NF_STOP) {
		//返回NF_ACCEPT或者NF_STOP表示让skb通过HOOK点并且停止后续钩子的遍历
		ret = 1;
		goto unlock;
	} else if (verdict == NF_DROP) {
		//返回NF_DROP表示禁止skb通过，释放skb然后返回-EPERM
		kfree_skb(skb);
		ret = -EPERM;
	} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
		//返回NF_QUEUE会使用框架提供的另外一种机制，先忽略
		if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, verdict >> NF_VERDICT_BITS))
			goto next_hook;
	}
unlock:
	rcu_read_unlock();
	return ret;
}
EXPORT_SYMBOL(nf_hook_slow);

从上面可以看出，钩子函数返回NF_DROP、NF_ACCEPT、NF_STOP都属于一种HOOK点最后的判决结果。

4.5.1 nf_iterate()

unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, int hook,
			const struct net_device *indev, const struct net_device *outdev,
			struct list_head **i, int (*okfn)(struct sk_buff *), int hook_thresh)
{
	unsigned int verdict;

	/*
	 * The caller must not block between calls to this
	 * function because of risk of continuing from deleted element.
	 */
	//从参数指定的位置i开始向后遍历
	list_for_each_continue_rcu(*i, head) {
		struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
		//跳过那些优先级高于指定门限的钩子
		//从这里可以看出最外面thresh参数的作用，它指定了一个上限优先级，
		//只有那些优先级低于该上限的钩子才会被遍历执行
		if (hook_thresh > elem->priority)
			continue;
		/* Optimization: we don't need to hold module
		   reference here, since function can't sleep. --RR */
		//调用钩子函数，返回判决结果
		verdict = elem->hook(hook, skb, indev, outdev, okfn);
		//可以看出，一个钩子返回NF_ACCEPT并不会结束遍历，而是HOOK点中所有
		//钩子全部返回NF_ACCEPT后才能返回（和iptables规则说的不是一回事，见后面文章）
		if (verdict != NF_ACCEPT) {
			//返回的不是NF_ACCEPT也不是NF_ACCEPT时，返回给调用者
			if (verdict != NF_REPEAT)
				return verdict;
			//返回NF_REPEAT时表示让skb重新过一次当前钩子，所以遍历指针前移，				
			*i = (*i)->prev;
		}
	}
	//所有钩子都返回NF_ACCEPT时从这里返回
	return NF_ACCEPT;
}