nfnetlink_log日志

nfnetlink_log日志内容参见:tcpdump抓取nflog日志报文

定义的日志结构为nfulnl_logger,类型为NF_LOG_TYPE_ULOG,处理函数为nfulnl_log_packet。

static struct nf_logger nfulnl_logger __read_mostly = {
    .name   = "nfnetlink_log",
    .type   = NF_LOG_TYPE_ULOG,
    .logfn  = nfulnl_log_packet,
    .me = THIS_MODULE,
};

默认的日志信息结构如下。报文拷贝长度为0xffff(最大IP报文长度),netlink组0,队列长度为1。

static const struct nf_loginfo default_loginfo = {
    .type =     NF_LOG_TYPE_ULOG,
    .u = {
        .ulog = {
            .copy_len   = 0xffff,
            .group      = 0,
            .qthreshold = 1,
        },
    },
};

由nf_log_register将以上日志结构nfulnl_logger添加到全局loggers数组中。注意这里注册的协议为NFPROTO_UNSPEC(0),对于此协议类型,函数nf_log_register将loggers数组中所有协议的类型为NF_LOG_TYPE_ULOG的日志记录器全部设置为nfulnl_logger(即nfnetlink_log)。

static int __init nfnetlink_log_init(void)
{
    status = nfnetlink_subsys_register(&nfulnl_subsys);
    if (status < 0) {
        pr_err("failed to create netlink socket\n");
        goto cleanup_netlink_notifier;
    }

    status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger);

日志处理

首先,如果参数中指定的日志信息是有效的,使用指定信息;否则,使用默认的default_loginfo。如果没有对应组的实例(没有接收者),返回,不生成日志。

static void
nfulnl_log_packet(struct net *net,
          u_int8_t pf,
          unsigned int hooknum,
          const struct sk_buff *skb,
          const struct net_device *in,
          const struct net_device *out,
          const struct nf_loginfo *li_user,
          const char *prefix)
{
    struct nfulnl_instance *inst;
    const struct nf_loginfo *li;
    struct nfnl_log_net *log = nfnl_log_pernet(net);
    const struct nfnl_ct_hook *nfnl_ct = NULL;
    struct nf_conn *ct = NULL;
    enum ip_conntrack_info ctinfo;

    if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
        li = li_user;
    else
        li = &default_loginfo;

    inst = instance_lookup_get(log, li->u.ulog.group);
    if (!inst)
        return;

以下计算所有的信息所占的长度。

    if (prefix)
        plen = strlen(prefix) + 1;

    /* FIXME: do we want to make the size calculation conditional based on
     * what is actually present?  way more branches and checks, but more
     * memory efficient... */
    size = nlmsg_total_size(sizeof(struct nfgenmsg))
        + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr))
        + nla_total_size(sizeof(u_int32_t)) /* ifindex */
        + nla_total_size(sizeof(u_int32_t)) /* ifindex */

优先使用参数中指定的队列长度,其中默认default_loginfo中指定队列长度为1。


    qthreshold = inst->qthreshold;
    /* per-rule qthreshold overrides per-instance */
    if (li->u.ulog.qthreshold)
        if (qthreshold > li->u.ulog.qthreshold)
            qthreshold = li->u.ulog.qthreshold;

对于实例的模式NFULNL_COPY_PACKET,还将拷贝报文数据的一部分,长度为实例的成员copy_range指定,但是不能超过参数日志信息中指定的值,当然,也不能超过报文自身的长度。

    switch (inst->copy_mode) {
    case NFULNL_COPY_META:
    case NFULNL_COPY_NONE:
        data_len = 0;
        break;

    case NFULNL_COPY_PACKET:
        data_len = inst->copy_range;
        if ((li->u.ulog.flags & NF_LOG_F_COPY_LEN) &&
            (li->u.ulog.copy_len < data_len))
            data_len = li->u.ulog.copy_len;

        if (data_len > skb->len)
            data_len = skb->len;

        size += nla_total_size(data_len);
        break;

    case NFULNL_COPY_DISABLED:
    default:
        goto unlock_and_release;
    }

如果实例中已有skb,并且其尾部不能容纳当前的报文,将此skb执行发送操作。

    if (inst->skb && size > skb_tailroom(inst->skb)) {
        /* either the queue len is too high or we don't have
         * enough room in the skb left. flush to userspace. */
        __nfulnl_flush(inst);
    }

如果实例中没有skb结构,重新分配。增加实例的队列长度,由函数__build_packet_message组件日志报文。

    if (!inst->skb) {
        inst->skb = nfulnl_alloc_skb(net, inst->peer_portid,
                         inst->nlbufsiz, size);
        if (!inst->skb)
            goto alloc_failure;
    }

    inst->qlen++;

    __build_packet_message(log, inst, skb, data_len, pf,
                hooknum, in, out, prefix, plen,
                nfnl_ct, ct, ctinfo);

如果实例的队列长度超过阈值,执行发送操作;否者,启动定时器,超时时发送。

    if (inst->qlen >= qthreshold)
        __nfulnl_flush(inst);
    /* timer_pending always called within inst->lock, so there
     * is no chance of a race here */
    else if (!timer_pending(&inst->timer)) {
        instance_get(inst);
        inst->timer.expires = jiffies + (inst->flushtimeout*HZ/100);
        add_timer(&inst->timer);
    }

日志报文组建

首先,初始化netlink消息头部信息nfgenmsg。

static inline int
__build_packet_message(struct nfnl_log_net *log,...)
{
    struct nfulnl_msg_packet_hdr pmsg;
    struct nlmsghdr *nlh;
    struct nfgenmsg *nfmsg;
    sk_buff_data_t old_tail = inst->skb->tail;

    nlh = nlmsg_put(inst->skb, 0, 0,
            nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
            sizeof(struct nfgenmsg), 0);
    if (!nlh)
        return -1;
    nfmsg = nlmsg_data(nlh);
    nfmsg->nfgen_family = pf;
    nfmsg->version = NFNETLINK_V0;
    nfmsg->res_id = htons(inst->group_num);

设置nfulnl_msg_packet_hdr信息,包括协议,触发的hook点。指定的日志前缀字符串。

    memset(&pmsg, 0, sizeof(pmsg));
    pmsg.hw_protocol    = skb->protocol;
    pmsg.hook       = hooknum;

    if (nla_put(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg))
        goto nla_put_failure;

    if (prefix && nla_put(inst->skb, NFULA_PREFIX, plen, prefix))
        goto nla_put_failure;

如果输入设备有效,在没有使能网桥netfilter的时候,记录其接口索引值。否则,如果使能网桥netfilter功能,如果pf等于PF_BRIDGE,即日志是在BRIDGE中生成(ebtables规则),增加设置网桥设备的索引值。

对于pf不等于PF_BRIDGE的情况,如iptables设置的nflog规则,位于IP层,可见的有可能就是网桥设备,由函数nf_bridge_get_physindev获得物理设备,记录物理设备的索引。

    if (indev) {
#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
        if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, htonl(indev->ifindex)))
            goto nla_put_failure;
#else
        if (pf == PF_BRIDGE) {
            /* Case 1: outdev is physical input device, we need to
             * look for bridge group (when called from netfilter_bridge) */
            if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
                     htonl(indev->ifindex)) ||
            /* this is the bridge group "brX" */
            /* rcu_read_lock()ed by nf_hook_thresh or nf_log_packet. */
                nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
                     htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
                goto nla_put_failure;
        } else {
            struct net_device *physindev;

            /* Case 2: indev is bridge group, we need to look for
             * physical device (when called from ipv4) */
            if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, htonl(indev->ifindex)))
                goto nla_put_failure;

            physindev = nf_bridge_get_physindev(skb);
            if (physindev && nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
                     htonl(physindev->ifindex)))
                goto nla_put_failure;
        }
#endif
    }

对于输出设备有效的情况,与以上处理相同。

    if (outdev) {
#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
        if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
                 htonl(outdev->ifindex)))
            goto nla_put_failure;
#else
        if (pf == PF_BRIDGE) {
            /* Case 1: outdev is physical output device, we need to
             * look for bridge group (when called from netfilter_bridge) */
            if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
                     htonl(outdev->ifindex)) ||
            /* this is the bridge group "brX" */
            /* rcu_read_lock()ed by nf_hook_thresh or nf_log_packet.
             */
                nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
                     htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
                goto nla_put_failure;
        } else {
            struct net_device *physoutdev;

            /* Case 2: indev is a bridge group, we need to look
             * for physical device (when called from ipv4) */
            if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
                     htonl(outdev->ifindex)))
                goto nla_put_failure;

            physoutdev = nf_bridge_get_physoutdev(skb);
            if (physoutdev && nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
                     htonl(physoutdev->ifindex)))
                goto nla_put_failure;
        }
#endif
    }

记录mark值和硬件地址字段值。

    if (skb->mark &&
        nla_put_be32(inst->skb, NFULA_MARK, htonl(skb->mark)))
        goto nla_put_failure;

    if (indev && skb->dev && skb->mac_header != skb->network_header) {
        struct nfulnl_msg_packet_hw phw;
        int len;

        memset(&phw, 0, sizeof(phw));
        len = dev_parse_header(skb, phw.hw_addr);
        if (len > 0) {
            phw.hw_addrlen = htons(len);
            if (nla_put(inst->skb, NFULA_HWADDR, sizeof(phw), &phw))
                goto nla_put_failure;
        }
    }

记录设备类型和硬件头部长度。

    if (indev && skb_mac_header_was_set(skb)) {
        if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) ||
            nla_put_be16(inst->skb, NFULA_HWLEN,
                 htons(skb->dev->hard_header_len)))
            goto nla_put_failure;

        hwhdrp = skb_mac_header(skb);

        if (skb->dev->type == ARPHRD_SIT)
            hwhdrp -= ETH_HLEN;

        if (hwhdrp >= skb->head && nla_put(inst->skb, NFULA_HWHEADER,
                skb->dev->hard_header_len, hwhdrp))
            goto nla_put_failure;
    }

如果hook点小于等于NF_INET_FORWARD,还包括NF_INET_PRE_ROUTING和NF_INET_LOCAL_IN,记录时间戳。

    if (hooknum <= NF_INET_FORWARD && skb->tstamp) {
        struct nfulnl_msg_packet_timestamp ts;
        struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
        ts.sec = cpu_to_be64(kts.tv_sec);
        ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);

        if (nla_put(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts))
            goto nla_put_failure;
    }

记录UID和GID。

    /* UID */
    sk = skb->sk;
    if (sk && sk_fullsock(sk)) {
        read_lock_bh(&sk->sk_callback_lock);
        if (sk->sk_socket && sk->sk_socket->file) {
            struct file *file = sk->sk_socket->file;
            const struct cred *cred = file->f_cred;
            struct user_namespace *user_ns = inst->peer_user_ns;
            __be32 uid = htonl(from_kuid_munged(user_ns, cred->fsuid));
            __be32 gid = htonl(from_kgid_munged(user_ns, cred->fsgid));
            read_unlock_bh(&sk->sk_callback_lock);
            if (nla_put_be32(inst->skb, NFULA_UID, uid) ||
                nla_put_be32(inst->skb, NFULA_GID, gid))
                goto nla_put_failure;
        } else
            read_unlock_bh(&sk->sk_callback_lock);
    }

根据标志位,记录本地和全局的序号值。

    /* local sequence number */
    if ((inst->flags & NFULNL_CFG_F_SEQ) &&
        nla_put_be32(inst->skb, NFULA_SEQ, htonl(inst->seq++)))
        goto nla_put_failure;

    /* global sequence number */
    if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) &&
        nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL,
             htonl(atomic_inc_return(&log->global_seq))))
        goto nla_put_failure;

添加连接跟踪信息,参见函数ctnetlink_glue_build。对于pf为NFPROTO_NETDEV或者NFPROTO_BRIDGE,添加VLAN信息,及MAC层信息。

    if (ct && nfnl_ct->build(inst->skb, ct, ctinfo,
                 NFULA_CT, NFULA_CT_INFO) < 0)
        goto nla_put_failure;

    if ((pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE) &&
        nfulnl_put_bridge(inst, skb) < 0)
        goto nla_put_failure;

最后,拷贝报文数据。

    if (data_len) {
        struct nlattr *nla;
        int size = nla_attr_size(data_len);

        if (skb_tailroom(inst->skb) < nla_total_size(data_len))
            goto nla_put_failure;

        nla = skb_put(inst->skb, nla_total_size(data_len));
        nla->nla_type = NFULA_PAYLOAD;
        nla->nla_len = size;

        if (skb_copy_bits(skb, 0, nla_data(nla), data_len))
            BUG();
    }

    nlh->nlmsg_len = inst->skb->tail - old_tail;
    return 0;

内核版本 5.10

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值