这个函数实现很简单,它就是置事件缓存的标志位。
- static inline void
- nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
- {
- struct nf_conntrack_ecache *e;
-
- if (nf_conntrack_event_cb == NULL)
- return;
-
- e = nf_ct_ecache_find(ct);
- if (e == NULL)
- return;
-
- set_bit(event, &e->cache);
- }
以下的内容来自于偶一篇没有整理过的笔记,主要分析事件机制的实现,希望对你有所帮助。
//
内核态接口
1、初始化
//事件句柄
- static struct nf_ct_event_notifier ctnl_notifier = {
- .fcn = ctnetlink_conntrack_event,
- };
- #ifdef CONFIG_NF_CONNTRACK_EVENTS
- ret = nf_conntrack_register_notifier(&ctnl_notifier);
- if (ret < 0) {
- printk("ctnetlink_init: cannot register notifier.\n");
- goto err_unreg_exp_subsys;
- }
- #ifdef CONFIG_NF_CONNTRACK_EVENTS
- nf_conntrack_unregister_notifier(&ctnl_notifier);
- #endif
- int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
- {
- int ret = 0;
- struct nf_ct_event_notifier *notify;
-
- //获得锁
- mutex_lock(&nf_ct_ecache_mutex);
- //获取事件句柄,判断是否已注册
- notify = rcu_dereference(nf_conntrack_event_cb);
- if (notify != NULL) {
- ret = -EBUSY;
- goto out_unlock;
- }
- //注册新的句柄
- rcu_assign_pointer(nf_conntrack_event_cb, new);
- //释放锁
- mutex_unlock(&nf_ct_ecache_mutex);
- return ret;
-
- out_unlock:
- mutex_unlock(&nf_ct_ecache_mutex);
- return ret;
- }
- void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
- {
- struct nf_ct_event_notifier *notify;
-
- mutex_lock(&nf_ct_ecache_mutex);
- notify = rcu_dereference(nf_conntrack_event_cb);
- BUG_ON(notify != new);
- rcu_assign_pointer(nf_conntrack_event_cb, NULL);
- mutex_unlock(&nf_ct_ecache_mutex);
- }
事件类型,事件名称是位掩码,设置某个事件,使用 1 << event
- /* Connection tracking event types */
- enum ip_conntrack_events
- {
- IPCT_NEW = 0, /* new conntrack */
- IPCT_RELATED = 1, /* related conntrack */
- IPCT_DESTROY = 2, /* destroyed conntrack */
- IPCT_STATUS = 3, /* status has changed */
- IPCT_PROTOINFO = 4, /* protocol information has changed */
- IPCT_HELPER = 5, /* new helper has been set */
- IPCT_MARK = 6, /* new mark has been set */
- IPCT_NATSEQADJ = 7, /* NAT is doing sequence adjustment */
- IPCT_SECMARK = 8, /* new security mark has been set */
- };
- static void death_by_timeout(unsigned long ul_conntrack)
- {
- ……
-
- if (!test_bit(IPS_DYING_BIT, &ct->status) &&
- unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
- ……
- static inline int
- nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
- {
- //计算位掩码后,调用nf_conntrack_eventmask_report,pid和report置为0
- return nf_conntrack_eventmask_report(1 << event, ct, 0, 0);
- }
- static inline int
- nf_conntrack_eventmask_report(unsigned int eventmask,
- struct nf_conn *ct,
- u32 pid,
- int report)
- {
- int ret = 0;
- struct net *net = nf_ct_net(ct);
- struct nf_ct_event_notifier *notify;
- struct nf_conntrack_ecache *e;
-
- rcu_read_lock();
- //取得事件通知句柄
- notify = rcu_dereference(nf_conntrack_event_cb);
- if (notify == NULL)
- goto out_unlock;
-
- //取得sysctl控制标志
- if (!net->ct.sysctl_events)
- goto out_unlock;
-
- //寻找事件缓存
- e = nf_ct_ecache_find(ct);
- if (e == NULL)
- goto out_unlock;
-
- if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
- //构建nf_ct_event
- struct nf_ct_event item = {
- .ct = ct,
- .pid = e->pid ? e->pid : pid, //根据缓存中的pid,如果没有,使用参数的值
- .report = report
- };
- /* This is a resent of a destroy event? If so, skip missed */
- unsigned long missed = e->pid ? 0 : e->missed;
-
- //调用通知函数
- ret = notify->fcn(eventmask | missed, &item);
- if (unlikely(ret < 0 || missed)) {
- spin_lock_bh(&ct->lock);
- if (ret < 0) {
- /* This is a destroy event that has been
- * triggered by a process, we store the PID
- * to include it in the retransmission. */
- if (eventmask & (1 << IPCT_DESTROY) &&
- e->pid == 0 && pid != 0)
- e->pid = pid;
- else
- e->missed |= eventmask;
- } else
- e->missed &= ~missed;
- spin_unlock_bh(&ct->lock);
- }
- }
- out_unlock:
- rcu_read_unlock();
- return ret;
- }
- static int
- ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
- {
- //根据事件掩码,计算相应的nfnetlink conntrack子系统的消息类型和nfnetlink group
- if (events & (1 << IPCT_DESTROY)) {
- type = IPCTNL_MSG_CT_DELETE;
- group = NFNLGRP_CONNTRACK_DESTROY;
- } else if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) {
- type = IPCTNL_MSG_CT_NEW;
- flags = NLM_F_CREATE|NLM_F_EXCL;
- group = NFNLGRP_CONNTRACK_NEW;
- } else if (events) {
- type = IPCTNL_MSG_CT_NEW;
- group = NFNLGRP_CONNTRACK_UPDATE;
- } else
- return 0;
-
- //如果没有置report标志,则需要有相应的监听者
- if (!item->report && !nfnetlink_has_listeners(group))
- return 0;
-
- //构造skb
- ……
-
- //发送报文
- err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
- if (err == -ENOBUFS || err == -EAGAIN)
- return -ENOBUFS;
- }
- int nfnetlink_send(struct sk_buff *skb, u32 pid,
- unsigned group, int echo, gfp_t flags)
- {
- return nlmsg_notify(nfnl, skb, pid, group, echo, flags);
- }
- int nfnetlink_has_listeners(unsigned int group)
- {
- return netlink_has_listeners(nfnl, group);
- }
- nfnetlink group:
-
- enum nfnetlink_groups {
- NFNLGRP_NONE,
- #define NFNLGRP_NONE NFNLGRP_NONE
- NFNLGRP_CONNTRACK_NEW,
- #define NFNLGRP_CONNTRACK_NEW NFNLGRP_CONNTRACK_NEW
- NFNLGRP_CONNTRACK_UPDATE,
- #define NFNLGRP_CONNTRACK_UPDATE NFNLGRP_CONNTRACK_UPDATE
- NFNLGRP_CONNTRACK_DESTROY,
- #define NFNLGRP_CONNTRACK_DESTROY NFNLGRP_CONNTRACK_DESTROY
- NFNLGRP_CONNTRACK_EXP_NEW,
- #define NFNLGRP_CONNTRACK_EXP_NEW NFNLGRP_CONNTRACK_EXP_NEW
- NFNLGRP_CONNTRACK_EXP_UPDATE,
- #define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE
- NFNLGRP_CONNTRACK_EXP_DESTROY,
- #define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY
- __NFNLGRP_MAX,
- };
- #define NFNLGRP_MAX (__NFNLGRP_MAX - 1)
与用户态的nfnetlink groups不同的是,用户态是一个位图,这是因为,内核是事件的发送者,它一个时刻只
发送一个事件,而用户态是接受者,它可以同时监听多个事件:
- /* nfnetlink groups: Up to 32 maximum */
- #define NF_NETLINK_CONNTRACK_NEW 0x00000001
- #define NF_NETLINK_CONNTRACK_UPDATE 0x00000002
- #define NF_NETLINK_CONNTRACK_DESTROY 0x00000004
- #define NF_NETLINK_CONNTRACK_EXP_NEW 0x00000008
- #define NF_NETLINK_CONNTRACK_EXP_UPDATE 0x00000010
- #define NF_NETLINK_CONNTRACK_EXP_DESTROY 0x00000020
- #define NFCT_ALL_CT_GROUPS (NF_NETLINK_CONNTRACK_NEW|NF_NETLINK_CONNTRACK_UPDATE|NF_NETLINK_CONNTRACK_DESTROY)
- if (options & CT_OPT_EVENT_MASK)
- cth = nfct_open(CONNTRACK,
- event_mask & NFCT_ALL_CT_GROUPS);
- else
- cth = nfct_open(CONNTRACK, NFCT_ALL_CT_GROUPS);
-
- nfct_callback_register(cth, NFCT_T_ALL, event_cb, obj);
- res = nfct_catch(cth);
- struct nfct_handle *nfct_open_nfnl(struct nfnl_handle *nfnlh,
- u_int8_t subsys_id,
- unsigned int subscriptions)
- {
- ……
- if (subsys_id == 0 || subsys_id == NFNL_SUBSYS_CTNETLINK) {
- cth->nfnlssh_ct = nfnl_subsys_open(cth->nfnlh,
- NFNL_SUBSYS_CTNETLINK,
- IPCTNL_MSG_MAX,
- subscriptions);
- if (!cth->nfnlssh_ct)
- goto out_free;
- }
- ……
- struct nfnl_subsys_handle *
- nfnl_subsys_open(struct nfnl_handle *nfnlh, u_int8_t subsys_id,
- u_int8_t cb_count, u_int32_t subscriptions)
- {
- ……
- //在子系统句柄中保存关心的nfnetlink groups
- ssh->subscriptions = subscriptions;
- ……
- /* although now we have nfnl_join to subscribe to certain
- * groups, just keep this to ensure compatibility */
- if (recalc_rebind_subscriptions(nfnlh) < 0) {
- free(ssh->cb);
- ssh->cb = NULL;
- return NULL;
- }
- ……
- }
- static int recalc_rebind_subscriptions(struct nfnl_handle *nfnlh)
- {
- int i, err;
- u_int32_t new_subscriptions = nfnlh->subscriptions;
-
- for (i = 0; i < NFNL_MAX_SUBSYS; i++)
- new_subscriptions |= nfnlh->subsys[i].subscriptions;
-
- nfnlh->local.nl_groups = new_subscriptions;
- err = bind(nfnlh->fd, (struct sockaddr *)&nfnlh->local,
- sizeof(nfnlh->local));
- if (err == -1)
- return -1;
-
- nfnlh->subscriptions = new_subscriptions;
-
- return 0;
- }
- static int event_cb(enum nf_conntrack_msg_type type,
- struct nf_conntrack *ct,
- void *data)
- {
- char buf[1024];
- struct nf_conntrack *obj = data;
- unsigned int op_type = NFCT_O_DEFAULT;
- unsigned int op_flags = 0;
-
- if (filter_nat(obj, ct))
- return NFCT_CB_CONTINUE;
-
- if (options & CT_COMPARISON &&
- !nfct_cmp(obj, ct, NFCT_CMP_ALL | NFCT_CMP_MASK))
- return NFCT_CB_CONTINUE;
-
- if (output_mask & _O_XML) {
- op_type = NFCT_O_XML;
- if (dump_xml_header_done) {
- dump_xml_header_done = 0;
- printf("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
- "<conntrack>\n");
- }
- }
- if (output_mask & _O_EXT)
- op_flags = NFCT_OF_SHOW_LAYER3;
- if (output_mask & _O_TMS) {
- if (!(output_mask & _O_XML)) {
- struct timeval tv;
- gettimeofday(&tv, NULL);
- printf("[%-8ld.%-6ld]\t", tv.tv_sec, tv.tv_usec);
- } else
- op_flags |= NFCT_OF_TIME;
- }
- if (output_mask & _O_ID)
- op_flags |= NFCT_OF_ID;
-
- nfct_snprintf(buf, sizeof(buf), ct, type, op_type, op_flags);
-
- printf("%s\n", buf);
- fflush(stdout);
-
- counter++;
-
- return NFCT_CB_CONTINUE;
- }
- /* message type */
- enum nf_conntrack_msg_type {
- NFCT_T_UNKNOWN = 0,
-
- NFCT_T_NEW_BIT = 0,
- NFCT_T_NEW = (1 << NFCT_T_NEW_BIT),
-
- NFCT_T_UPDATE_BIT = 1,
- NFCT_T_UPDATE = (1 << NFCT_T_UPDATE_BIT),
-
- NFCT_T_DESTROY_BIT = 2,
- NFCT_T_DESTROY = (1 << NFCT_T_DESTROY_BIT),
-
- NFCT_T_ALL = NFCT_T_NEW | NFCT_T_UPDATE | NFCT_T_DESTROY,
-
- NFCT_T_ERROR_BIT = 31,
- NFCT_T_ERROR = (1 << NFCT_T_ERROR_BIT),
- };
- int __callback(struct nlmsghdr *nlh, struct nfattr *nfa[], void *data)
- {
- int ret = NFNL_CB_STOP;
- unsigned int type;
- struct nf_conntrack *ct;
- int len = nlh->nlmsg_len;
- struct __data_container *container = data;
-
- len -= NLMSG_LENGTH(sizeof(struct nfgenmsg));
- if (len < 0)
- return NFNL_CB_CONTINUE;
-
- type = __parse_message_type(nlh);
- ……
- int __parse_message_type(const struct nlmsghdr *nlh)
- {
- u_int16_t type = NFNL_MSG_TYPE(nlh->nlmsg_type);
- u_int16_t flags = nlh->nlmsg_flags;
- int ret = NFCT_T_UNKNOWN;
-
- if (type == IPCTNL_MSG_CT_NEW) {
- if (flags & (NLM_F_CREATE|NLM_F_EXCL))
- ret = NFCT_T_NEW;
- else
- ret = NFCT_T_UPDATE;
- } else if (type == IPCTNL_MSG_CT_DELETE)
- ret = NFCT_T_DESTROY;
-
- return ret;
- }