从ip_queue到nfnetlink_queue(下)
本文档的Copyleft归yfydz所有,使用GPL发布,可以自由拷贝,转载,转载时请保持文档的完整性,严
3. 内核空间
内核版本2.6.17.11。
内核空间的代码程序包括net/netfilter/nfnetlink_queue.c和xt_NFQUEUE.c,前者是具体实现,后者
内核空间的代码程序包括net/netfilter/nfnetlink_queue.c和xt_NFQUEUE.c,前者是具体实现,后者
是iptables的一个目标,用来指定数据属于哪个队列。
3.1 数据结构
/* include/linux/netfilter/nfnetlink_queue.h */
// nfqueue netlink消息类型
enum nfqnl_msg_types {
NFQNL_MSG_PACKET, /* packet from kernel to userspace */
NFQNL_MSG_VERDICT, /* verdict from userspace to kernel */
NFQNL_MSG_CONFIG, /* connect to a particular queue */
NFQNL_MSG_MAX
};
};
// nfqueue netlink消息数据包头
struct nfqnl_msg_packet_hdr {
u_int32_t packet_id; /* unique ID of packet in queue */
u_int16_t hw_protocol; /* hw protocol (network order) */
u_int8_t hook; /* netfilter hook */
} __attribute__ ((packed));
// nfqueue netlink消息数据包头硬件部分,MAC地址
struct nfqnl_msg_packet_hw {
u_int16_t hw_addrlen;
u_int16_t _pad;
u_int8_t hw_addr[8];
} __attribute__ ((packed));
// nfqueue netlink消息数据包64位时间戳
struct nfqnl_msg_packet_timestamp {
aligned_u64 sec;
aligned_u64 usec;
} __attribute__ ((packed));
// nfqueue netlink属性
enum nfqnl_attr_type {类型
NFQA_UNSPEC,
NFQA_PACKET_HDR,
NFQA_VERDICT_HDR, /* nfqnl_msg_verdict_hrd */
NFQA_MARK, /* u_int32_t nfmark */
NFQA_TIMESTAMP, /* nfqnl_msg_packet_timestamp */
NFQA_IFINDEX_INDEV, /* u_int32_t ifindex */
NFQA_IFINDEX_OUTDEV, /* u_int32_t ifindex */
NFQA_IFINDEX_PHYSINDEV, /* u_int32_t ifindex */
NFQA_IFINDEX_PHYSOUTDEV, /* u_int32_t ifindex */
NFQA_HWADDR, /* nfqnl_msg_packet_hw */
NFQA_PAYLOAD, /* opaque data payload */
__NFQA_MAX
};
#define NFQA_MAX (__NFQA_MAX - 1)
};
#define NFQA_MAX (__NFQA_MAX - 1)
// nfqueue netlink消息数据判定头
struct nfqnl_msg_verdict_hdr {
u_int32_t verdict;
u_int32_t id;
} __attribute__ ((packed));
// nfqueue netlink消息配置命令类型
enum nfqnl_msg_config_cmds {
NFQNL_CFG_CMD_NONE,
NFQNL_CFG_CMD_BIND,
NFQNL_CFG_CMD_UNBIND,
NFQNL_CFG_CMD_PF_BIND,
NFQNL_CFG_CMD_PF_UNBIND,
};
enum nfqnl_msg_config_cmds {
NFQNL_CFG_CMD_NONE,
NFQNL_CFG_CMD_BIND,
NFQNL_CFG_CMD_UNBIND,
NFQNL_CFG_CMD_PF_BIND,
NFQNL_CFG_CMD_PF_UNBIND,
};
// nfqueue netlink消息配置命令结构
struct nfqnl_msg_config_cmd {
u_int8_t command; /* nfqnl_msg_config_cmds */
u_int8_t _pad;
u_int16_t pf; /* AF_xxx for PF_[UN]BIND */
} __attribute__ ((packed));
// nfqueue netlink消息配置模式
enum nfqnl_config_mode {
NFQNL_COPY_NONE, // 不拷贝
NFQNL_COPY_META, // 只拷贝基本信息
NFQNL_COPY_PACKET, // 拷贝整个数据包
};
// nfqueue netlink消息配置参数结构
struct nfqnl_msg_config_params {
u_int32_t copy_range;
u_int8_t copy_mode; /* enum nfqnl_config_mode */
} __attribute__ ((packed));
// nfqueue netlink消息配置模式
enum nfqnl_attr_config {
NFQA_CFG_UNSPEC,
NFQA_CFG_CMD, /* nfqnl_msg_config_cmd */
NFQA_CFG_PARAMS, /* nfqnl_msg_config_params */
__NFQA_CFG_MAX
};
#define NFQA_CFG_MAX (__NFQA_CFG_MAX-1)
enum nfqnl_attr_config {
NFQA_CFG_UNSPEC,
NFQA_CFG_CMD, /* nfqnl_msg_config_cmd */
NFQA_CFG_PARAMS, /* nfqnl_msg_config_params */
__NFQA_CFG_MAX
};
#define NFQA_CFG_MAX (__NFQA_CFG_MAX-1)
/* include/linux/netfilter.c */
struct nf_info
{
/* The ops struct which sent us to userspace. */
struct nf_hook_ops *elem;
/* If we're sent to userspace, this keeps housekeeping info */
int pf;
unsigned int hook;
struct net_device *indev, *outdev;
int (*okfn)(struct sk_buff *);
};
{
/* The ops struct which sent us to userspace. */
struct nf_hook_ops *elem;
/* If we're sent to userspace, this keeps housekeeping info */
int pf;
unsigned int hook;
struct net_device *indev, *outdev;
int (*okfn)(struct sk_buff *);
};
/* net/netfilter/nfnetlink_queue.c */
// 队列项结构
struct nfqnl_queue_entry {
struct list_head list;
struct nf_info *info;
struct sk_buff *skb;
unsigned int id;
};
// 队列项结构
struct nfqnl_queue_entry {
struct list_head list;
struct nf_info *info;
struct sk_buff *skb;
unsigned int id;
};
// 队列实例结构
struct nfqnl_instance {
// HASH链表节点
struct hlist_node hlist; /* global list of queues */
atomic_t use;
struct nfqnl_instance {
// HASH链表节点
struct hlist_node hlist; /* global list of queues */
atomic_t use;
// 应用程序的pid
int peer_pid;
// 队列最大长度
unsigned int queue_maxlen;
// 数据拷贝范围
unsigned int copy_range;
// 当前队列元素数
unsigned int queue_total;
// 队列丢包数
unsigned int queue_dropped;
// 用户程序判定丢包
unsigned int queue_user_dropped;
// ID序
atomic_t id_sequence; /* 'sequence' of pkt ids */
// 队列号
u_int16_t queue_num; /* number of this queue */
// 拷贝模式
u_int8_t copy_mode;
int peer_pid;
// 队列最大长度
unsigned int queue_maxlen;
// 数据拷贝范围
unsigned int copy_range;
// 当前队列元素数
unsigned int queue_total;
// 队列丢包数
unsigned int queue_dropped;
// 用户程序判定丢包
unsigned int queue_user_dropped;
// ID序
atomic_t id_sequence; /* 'sequence' of pkt ids */
// 队列号
u_int16_t queue_num; /* number of this queue */
// 拷贝模式
u_int8_t copy_mode;
spinlock_t lock;
// queue entry队列
struct list_head queue_list; /* packets in queue */
};
// queue entry队列
struct list_head queue_list; /* packets in queue */
};
3.2 内核程序流程
3.2.1 系统初始化
/* net/netfilter/nfnetlink_queue.c */
static int __init nfnetlink_queue_init(void)
{
int i, status = -ENOMEM;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc_nfqueue;
#endif
// 16个HASH链表
for (i = 0; i < INSTANCE_BUCKETS; i++)
INIT_HLIST_HEAD(&instance_table[i]);
{
int i, status = -ENOMEM;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc_nfqueue;
#endif
// 16个HASH链表
for (i = 0; i < INSTANCE_BUCKETS; i++)
INIT_HLIST_HEAD(&instance_table[i]);
// 登记netlink通知
netlink_register_notifier(&nfqnl_rtnl_notifier);
// 登记nfnetlink子系统
status = nfnetlink_subsys_register(&nfqnl_subsys);
if (status < 0) {
printk(KERN_ERR "nf_queue: failed to create netlink socket/n");
goto cleanup_netlink_notifier;
}
netlink_register_notifier(&nfqnl_rtnl_notifier);
// 登记nfnetlink子系统
status = nfnetlink_subsys_register(&nfqnl_subsys);
if (status < 0) {
printk(KERN_ERR "nf_queue: failed to create netlink socket/n");
goto cleanup_netlink_notifier;
}
#ifdef CONFIG_PROC_FS
// 建立/proc/net/netfilter/nfnetlink_queue文件
proc_nfqueue = create_proc_entry("nfnetlink_queue", 0440,
proc_net_netfilter);
if (!proc_nfqueue)
goto cleanup_subsys;
proc_nfqueue->proc_fops = &nfqnl_file_ops;
#endif
// 建立/proc/net/netfilter/nfnetlink_queue文件
proc_nfqueue = create_proc_entry("nfnetlink_queue", 0440,
proc_net_netfilter);
if (!proc_nfqueue)
goto cleanup_subsys;
proc_nfqueue->proc_fops = &nfqnl_file_ops;
#endif
// 登记nfqueue netlink设备通知
register_netdevice_notifier(&nfqnl_dev_notifier);
return status;
#ifdef CONFIG_PROC_FS
cleanup_subsys:
nfnetlink_subsys_unregister(&nfqnl_subsys);
#endif
cleanup_netlink_notifier:
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
return status;
}
cleanup_subsys:
nfnetlink_subsys_unregister(&nfqnl_subsys);
#endif
cleanup_netlink_notifier:
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
return status;
}
3.2.2
// netlink通知,只是定义一个通知回调函数, 在接收到netlink套接字信息时调用
static struct notifier_block nfqnl_rtnl_notifier = {
.notifier_call = nfqnl_rcv_nl_event,
};
static struct notifier_block nfqnl_rtnl_notifier = {
.notifier_call = nfqnl_rcv_nl_event,
};
static int
nfqnl_rcv_nl_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct netlink_notify *n = ptr;
nfqnl_rcv_nl_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct netlink_notify *n = ptr;
// 就只处理释放事件
if (event == NETLINK_URELEASE &&
n->protocol == NETLINK_NETFILTER && n->pid) {
int i;
if (event == NETLINK_URELEASE &&
n->protocol == NETLINK_NETFILTER && n->pid) {
int i;
/* destroy all instances for this pid */
write_lock_bh(&instances_lock);
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct hlist_node *tmp, *t2;
struct nfqnl_instance *inst;
struct hlist_head *head = &instance_table[i];
// 释放指定pid的所有子队列信息
hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
if (n->pid == inst->peer_pid)
__instance_destroy(inst);
}
}
write_unlock_bh(&instances_lock);
}
return NOTIFY_DONE;
}
write_lock_bh(&instances_lock);
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct hlist_node *tmp, *t2;
struct nfqnl_instance *inst;
struct hlist_head *head = &instance_table[i];
// 释放指定pid的所有子队列信息
hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
if (n->pid == inst->peer_pid)
__instance_destroy(inst);
}
}
write_unlock_bh(&instances_lock);
}
return NOTIFY_DONE;
}
以下两个函数实现释放操作,实际是调用同一个函数,一个需要加锁,一个不需要
static inline void
instance_destroy(struct nfqnl_instance *inst)
{
_instance_destroy2(inst, 1);
}
instance_destroy(struct nfqnl_instance *inst)
{
_instance_destroy2(inst, 1);
}
static inline void
__instance_destroy(struct nfqnl_instance *inst)
{
_instance_destroy2(inst, 0);
}
__instance_destroy(struct nfqnl_instance *inst)
{
_instance_destroy2(inst, 0);
}
static void
_instance_destroy2(struct nfqnl_instance *inst, int lock)
{
/* first pull it out of the global list */
if (lock)
write_lock_bh(&instances_lock);
QDEBUG("removing instance %p (queuenum=%u) from hash/n",
inst, inst->queue_num);
// 将队列实例先从链表中移出
hlist_del(&inst->hlist);
inst, inst->queue_num);
// 将队列实例先从链表中移出
hlist_del(&inst->hlist);
if (lock)
write_unlock_bh(&instances_lock);
write_unlock_bh(&instances_lock);
/* then flush all pending skbs from the queue */
// 将当前队列中所有包的判定都设置DROP
nfqnl_flush(inst, NF_DROP);
// 将当前队列中所有包的判定都设置DROP
nfqnl_flush(inst, NF_DROP);
/* and finally put the refcount */
// 释放队列实例本身
instance_put(inst);
// 释放队列实例本身
instance_put(inst);