【Android】(四) Fwmark

fwmark 是一个用于标识数据包的标记,它可以用来标识不同的数据流。它可以用来指定某些特定的数据包通过特定的路由表,或者用于指定某些特定的数据包应该使用特定的策略,例如指定某些数据包应该使用特定的 QoS 策略。

有掩码的 fwmark 通常被用来指定某些特定的数据包应该使用特定的策略,而不是所有的数据包都使用特定的策略。在这种情况下,fwmark 的掩码用于指定哪些数据包应该使用特定的策略。

1 iptables

1.1 MARK

MARK 是 iptables 的一个目标,用于给数据包打上指定mark

--set-mark value    设置数据包的mark值。
--and-mark value    数据包的mark值和value进行按位与运算。
--or-mark  value    数据包的mark值和value进行按或与运算。

//打标记
iptables -t mangle -A PREROUTING -j MARK --set-mark 33
//or-mark 打标记
iptables -t mangle -A PREROUTING -j MARK --or-mark 0x400
//匹配标记
iptables -t nat -A PREROUTING -m mark --mark 33  -j ACCEPT
//掩码匹配标记
iptables -t nat -A PREROUTING -m mark --mark 0x400/0x400  -j ACCEPT

1.2 CONNMARK

CONNMARK 是 iptables 的一个目标,用于给一个网络连接标记一个连接 mark,也就是网络连接的整个过程的交互包都被该连接mark标记。

--set-mark     value[/mask]     给链接跟踪记录打标记。
--save-mark    [--mask mask]    将数据包上的mark设置到连接上上。
--restore-mark [--mask mask]    将连接mark设置到数据包中

//打标记
iptables -t mangle -A PREROUTING -j CONNMARK --set-mark 4
//save
iptables -t mangle -A PREROUTING -j CONNMARK --save-mark
//restore
iptables -t mangle -A PREROUTING -j CONNMARK --restore-mark

1.3 kernel match code(xt_mark and xt_connmark)

1.3.1 iptables 钩子函数具体执行

接之前 iptables 四表在每个钩子点的具体执行函数

//kernel/msm-5.4/net/ipv4/netfilter/ip_tables.c
unsigned int
ipt_do_table(struct sk_buff *skb,
         const struct nf_hook_state *state,
         struct xt_table *table)
{
    //...
    e = get_entry(table_base, private->hook_entry[hook]);
    do {
        const struct xt_entry_target *t;
        const struct xt_entry_match *ematch;
        struct xt_counters *counter;
        WARN_ON(!e);
        if (!ip_packet_match(ip, indev, outdev,
            &e->ip, acpar.fragoff)) {
 no_match:
            e = ipt_next_entry(e);
            continue;
        }
        xt_ematch_foreach(ematch, e) {
            acpar.match     = ematch->u.kernel.match;
            acpar.matchinfo = ematch->data;
            if (!acpar.match->match(skb, &acpar))
                goto no_match;
        }
        t = ipt_get_target_c(e);
        WARN_ON(!t->u.kernel.target);
        /* Standard target? */
        //...
        acpar.target   = t->u.kernel.target;
        acpar.targinfo = t->data;
        verdict = t->u.kernel.target->target(skb, &acpar);
        if (verdict == XT_CONTINUE) {
            /* Target might have changed stuff. */
            ip = ip_hdr(skb);
            e = ipt_next_entry(e);
        } else {
            /* Verdict */
            break;
        }
    } while (!acpar.hotdrop);
    xt_write_recseq_end(addend);
    local_bh_enable();
    if (acpar.hotdrop)
        return NF_DROP;
    else return verdict;
}

假设现在在 PREROUTING 点去执行 nat 表的操作,其实就是去调用 nat 表的钩子函数,一直向下调用,其实也就是执行上述函数。

/* Furniture shopping... */
struct xt_table {
    struct list_head list;
    /* What hooks you will enter on */
    unsigned int valid_hooks;
    /* Man behind the curtain... */
    struct xt_table_info *private;
    /* Set this to THIS_MODULE if you are a module, otherwise NULL */
    struct module *me;
    u_int8_t af;        /* address/protocol family */
    int priority;       /* hook order */
    /* called when table is needed in the given netns */
    int (*table_init)(struct net *net);
    /* A unique name... */
    const char name[XT_TABLE_MAXNAMELEN];
};

/* The table itself */
struct xt_table_info {
    /* Size per table */
    unsigned int size;
    /* Number of entries: FIXME. --RR */
    unsigned int number;
    /* Initial number of entries. Needed for module usage count */
    unsigned int initial_entries;
    /* Entry points and underflows */
    unsigned int hook_entry[NF_INET_NUMHOOKS];
    unsigned int underflow[NF_INET_NUMHOOKS];
    /*
     * Number of user chains. Since tables cannot have loops, at most
     * @stacksize jumps (number of user chains) can possibly be made.
     */
    unsigned int stacksize;
    void ***jumpstack;
    unsigned char entries[0] __aligned(8);
};

每个表都有对应的 xt_table 注册到了网络命名空间中(struct net),xt_table 有一个 xt_table_info 结构体成员 private,相关的规则也是添加到 private 的 entries 字段中。

/* This structure defines each of the firewall rules.  Consists of 3
   parts which are 1) general IP header stuff 2) match specific
   stuff 3) the target to perform if the rule matches */
struct ipt_entry {
    struct ipt_ip ip;
    /* Mark with fields that we care about. */
    unsigned int nfcache;
    /* Size of ipt_entry + matches */
    __u16 target_offset;
    /* Size of ipt_entry + matches + target */
    __u16 next_offset;
    /* Back pointer */
    unsigned int comefrom;
    /* Packet and byte counters. */
    struct xt_counters counters;
    /* The matches (if any), then the target. */
    unsigned char elems[0];
};

ipt_do_table 函数的具体执行流程如下:

根据特定的钩子点作为偏移,从 private 中直接定位到特定 钩子点的第一个 entry(struct ipt_entry),以此为起点开始遍历。

首先判断 判断数据包是否符合被处理的条件,例如判断 in-dev 和 out-dev 等。

接着从 entry 中首先遍历获取到 matches(struct xt_entry_match),其中最为关键的成员就是 match(struct xt_match)。xt_match 处理完以后再从 entry 中获取 xt_entry_target(如果有的话),其中最为关键的成员是 target(struct xt_target),再通过 xt_target 来处理数据包。最后将处理结果返回。

struct xt_entry_match {
    union {
        //user
        struct {
            __u16 match_size;

            /* Used inside the kernel */
            struct xt_match *match;
        } kernel;
        /* Total length */
        __u16 match_size;
    } u;
    unsigned char data[0];
};

struct xt_entry_target {
    union {
        //...
        struct {
            __u16 target_size;

            /* Used inside the kernel */
            struct xt_target *target;
        } kernel;
        /* Total length */
        __u16 target_size;
    } u;
    unsigned char data[0];
};

接下来重点分析 xt_match 和 xt_target。

Netfilter一个很重要的思想就是将netfilter作为一个协议无关的框架,表现在内核结构树中单独建立net/netfilter目录,在net/netfilter下的匹配和目标模块文件名称以“xt_”开头。 为了和iptables兼容,这些文件中增加了一个新的宏定义:module_alias,来表示模块的别名。所有扩展程序的名称也是以xt开头。所以 mark 和 connmark 也必然会出现在 net/netfilter 目录。

1.3.2 MARK 和 CONNMARK

接下来主要看 xt_mark 和 xt_connmark 这两个文件。

//kenrel/msm-5.4/net/netfilter/xt_mark.c
static struct xt_target mark_tg_reg __read_mostly = {
    .name           = "MARK",
    .revision       = 2,
    .family         = NFPROTO_UNSPEC,
    .target         = mark_tg,
    .targetsize     = sizeof(struct xt_mark_tginfo2),
    .me             = THIS_MODULE,
};

static struct xt_match mark_mt_reg __read_mostly = {
    .name           = "mark",
    .revision       = 1,
    .family         = NFPROTO_UNSPEC,
    .match          = mark_mt,
    .matchsize      = sizeof(struct xt_mark_mtinfo1),
    .me             = THIS_MODULE,
};

static unsigned int
mark_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
    const struct xt_mark_tginfo2 *info = par->targinfo;

    skb->mark = (skb->mark & ~info->mask) ^ info->mark;
    return XT_CONTINUE;
}

static bool
mark_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
    const struct xt_mark_mtinfo1 *info = par->matchinfo;

    return ((skb->mark & info->mask) == info->mark) ^ info->invert;
}

mark_tg 是为数据包加上mark。

mark_mt 是根据 mark 来进行匹配,匹配成功返回 true,否则返回false。

//kenrel/msm-5.4/net/netfilter/xt_connmark.c
static struct xt_target connmark_tg_reg[] __read_mostly = {
    {
        .name           = "CONNMARK",
        .revision       = 1,
        .family         = NFPROTO_UNSPEC,
        .checkentry     = connmark_tg_check,
        .target         = connmark_tg,
        .targetsize     = sizeof(struct xt_connmark_tginfo1),
        .destroy        = connmark_tg_destroy,
        .me             = THIS_MODULE,
    },
    {
        .name           = "CONNMARK",
        .revision       = 2,
        .family         = NFPROTO_UNSPEC,
        .checkentry     = connmark_tg_check,
        .target         = connmark_tg_v2,
        .targetsize     = sizeof(struct xt_connmark_tginfo2),
        .destroy        = connmark_tg_destroy,
        .me             = THIS_MODULE,
    }
};

static struct xt_match connmark_mt_reg __read_mostly = {
    .name           = "connmark",
    .revision       = 1,
    .family         = NFPROTO_UNSPEC,
    .checkentry     = connmark_mt_check,
    .match          = connmark_mt,
    .matchsize      = sizeof(struct xt_connmark_mtinfo1),
    .destroy        = connmark_mt_destroy,
    .me             = THIS_MODULE,
};

static unsigned int
connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info)
{
    enum ip_conntrack_info ctinfo;
    u_int32_t new_targetmark;
    struct nf_conn *ct;
    u_int32_t newmark;

    ct = nf_ct_get(skb, &ctinfo);
    if (ct == NULL)
        return XT_CONTINUE;

    switch (info->mode) {
    case XT_CONNMARK_SET:
        newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
        if (info->shift_dir == D_SHIFT_RIGHT)
            newmark >>= info->shift_bits;
        else
            newmark <<= info->shift_bits;

        if (ct->mark != newmark) {
            ct->mark = newmark;
            nf_conntrack_event_cache(IPCT_MARK, ct);
        }
        break;
    case XT_CONNMARK_SAVE:
        new_targetmark = (skb->mark & info->nfmask);
        if (info->shift_dir == D_SHIFT_RIGHT)
            new_targetmark >>= info->shift_bits;
        else
            new_targetmark <<= info->shift_bits;

        newmark = (ct->mark & ~info->ctmask) ^
              new_targetmark;
        if (ct->mark != newmark) {
            ct->mark = newmark;
            nf_conntrack_event_cache(IPCT_MARK, ct);
        }
        break;
    case XT_CONNMARK_RESTORE:
        new_targetmark = (ct->mark & info->ctmask);
        if (info->shift_dir == D_SHIFT_RIGHT)
            new_targetmark >>= info->shift_bits;
        else
            new_targetmark <<= info->shift_bits;

        newmark = (skb->mark & ~info->nfmask) ^
              new_targetmark;
        skb->mark = newmark;
        break;
    }
    return XT_CONTINUE;
}

static unsigned int
connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
    const struct xt_connmark_tginfo1 *info = par->targinfo;
    const struct xt_connmark_tginfo2 info2 = {
        .ctmark = info->ctmark,
        .ctmask = info->ctmask,
        .nfmask = info->nfmask,
        .mode   = info->mode,
    };

    return connmark_tg_shift(skb, &info2);
}

static unsigned int
connmark_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
{
    const struct xt_connmark_tginfo2 *info = par->targinfo;

    return connmark_tg_shift(skb, info);
}

static bool
connmark_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
    const struct xt_connmark_mtinfo1 *info = par->matchinfo;
    enum ip_conntrack_info ctinfo;
    const struct nf_conn *ct;

    ct = nf_ct_get(skb, &ctinfo);
    if (ct == NULL)
        return false;

    return ((ct->mark & info->mask) == info->mark) ^ info->invert;
}

connmark_mt 跟 mark_mt 大致差不多,但 connmark_mt 是根据 nf_conn 的 mark 来进行匹配,匹配成功返回 true,否则返回false。

connmark_tg 的处理则是分了三种情况,XT_CONNMARK_SET,XT_CONNMARK_SAVE,XT_CONNMARK_RESTORE。SET 是给连接打 mark,SAVE 是将数据包的 mark 配置到连接中,RESTORE 是将连接 mark 配置到数据包中。

1.4 Android set iptables

http://aospxref.com/android-12.0.0_r3/xref/system/netd/server/Controllers.cpp#278

2 策略路由

2.1 ip rule

linux策略路由(RPDB)

anakin:/ # ip rule list
0:      from all lookup local
200:    from all lookup main  
10000:  from all fwmark 0xc0000/0xd0000 lookup legacy_system
11000:  from all iif lo oif dummy0 uidrange 0-0 lookup dummy0
11000:  from all iif lo oif eth0.40 uidrange 0-0 lookup eth0.40
11000:  from all iif lo oif eth0.50 uidrange 0-0 lookup eth0.50
11000:  from all iif lo oif eth0.51 uidrange 0-0 lookup eth0.51
15010:  from all fwmark 0x10064/0x1ffff iif lo uidrange 10049-10049 lookup eth0.40
16000:  from all fwmark 0x10063/0x1ffff iif lo lookup local_network
16000:  from all fwmark 0x10064/0x1ffff iif lo lookup eth0.40
16000:  from all fwmark 0x10065/0x1ffff iif lo lookup eth0.50
16000:  from all fwmark 0x10066/0x1ffff iif lo lookup eth0.51
17000:  from all iif lo oif dummy0 lookup dummy0
17000:  from all iif lo oif eth0.40 lookup eth0.40
17000:  from all iif lo oif eth0.50 lookup eth0.50
17000:  from all iif lo oif eth0.51 lookup eth0.51
18000:  from all fwmark 0x0/0x10000 lookup legacy_system
19000:  from all fwmark 0x0/0x10000 lookup legacy_network
20000:  from all fwmark 0x0/0x10000 lookup local_network
22010:  from all fwmark 0x64/0x1ffff iif lo uidrange 10049-10049 lookup eth0.40
23000:  from all fwmark 0x64/0x1ffff iif lo lookup eth0.40
23000:  from all fwmark 0x65/0x1ffff iif lo lookup eth0.50
23000:  from all fwmark 0x66/0x1ffff iif lo lookup eth0.51
27010:  from all fwmark 0x0/0xffff iif lo uidrange 10049-10049 lookup eth0.40
29000:  from all fwmark 0x0/0xffff iif lo lookup eth0.50
32000:  from all unreachable

2.3 kernel code

in : ip_rcv_finish -> ip_rcv_finish_core -> ip_route_input_noref -> … -> fib_lookup -> fib_rules_lookup

out : ip_queue_xmit -> __ip_queue_xmit -> ip_route_output_ports -> … -> fib_lookup -> fib_rules_lookup

//kernel/msm-5.4/net/core/fib_rules.c
int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
             int flags, struct fib_lookup_arg *arg)
{
    struct fib_rule *rule;
    int err;
    rcu_read_lock();
    list_for_each_entry_rcu(rule, &ops->rules_list, list) {
jumped:
        if (!fib_rule_match(rule, ops, fl, flags, arg))
            continue;
        //...
    }
    //...
}

static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
              struct flowi *fl, int flags,
              struct fib_lookup_arg *arg)
{
    int ret = 0;
    if (rule->iifindex && (rule->iifindex != fl->flowi_iif))
        goto out;
    if (rule->oifindex && (rule->oifindex != fl->flowi_oif))
        goto out;
    if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
        goto out;
    if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id))
        goto out;
    if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg))
        goto out;
    if (uid_lt(fl->flowi_uid, rule->uid_range.start) ||
        uid_gt(fl->flowi_uid, rule->uid_range.end))
        goto out;

    ret = ops->match(rule, fl, flags);
out:
    return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
}

2.4 Android 添加 ip rules

http://aospxref.com/android-12.0.0_r3/xref/system/netd/server/RouteController.cpp#1150

http://aospxref.com/android-12.0.0_r3/xref/system/netd/server/RouteController.cpp#1188

2.5 Android 对网路流量进行mark

https://blog.csdn.net/woai110120130/article/details/119479392

http://aospxref.com/android-12.0.0_r3/xref/system/netd/server/FwmarkServer.cpp#101

int FwmarkServer::processClient(SocketClient* client, int* socketFd) {
    //...

    Fwmark fwmark;
    socklen_t fwmarkLen = sizeof(fwmark.intValue);
    if (getsockopt(*socketFd, SOL_SOCKET, SO_MARK, &fwmark.intValue, &fwmarkLen) == -1) {
        return -errno;
    }

    switch (command.cmdId) {
        case FwmarkCommand::ON_ACCEPT: {
            // Called after a socket accept(). The kernel would've marked the NetId and necessary
            // permissions bits, so we just add the rest of the user's permissions here.
            permission = static_cast<Permission>(permission | fwmark.permission);
            break;
        }

        case FwmarkCommand::ON_CONNECT: {
            // Called before a socket connect() happens. Set an appropriate NetId into the fwmark so
            // that the socket routes consistently over that network. Do this even if the socket
            // already has a NetId, so that calling connect() multiple times still works.
            //
            // But if the explicit bit was set, the existing NetId was explicitly preferred (and not
            // a case of connect() being called multiple times). Don't reset the NetId in that case.
            //
            // An "appropriate" NetId is the NetId of a bypassable VPN that applies to the user, or
            // failing that, the default network. We'll never set the NetId of a secure VPN here.
            // See the comments in the implementation of getNetworkForConnect() for more details.
            //
            // If the protect bit is set, this could be either a system proxy (e.g.: the dns proxy
            // or the download manager) acting on behalf of another user, or a VPN provider. If it's
            // a proxy, we shouldn't reset the NetId. If it's a VPN provider, we should set the
            // default network's NetId.
            //
            // There's no easy way to tell the difference between a proxy and a VPN app. We can't
            // use PERMISSION_SYSTEM to identify the proxy because a VPN app may also have those
            // permissions. So we use the following heuristic:
            //
            // If it's a proxy, but the existing NetId is not a VPN, that means the user (that the
            // proxy is acting on behalf of) is not subject to a VPN, so the proxy must have picked
            // the default network's NetId. So, it's okay to replace that with the current default
            // network's NetId (which in all likelihood is the same).
            //
            // Conversely, if it's a VPN provider, the existing NetId cannot be a VPN. The only time
            // we set a VPN's NetId into a socket without setting the explicit bit is here, in
            // ON_CONNECT, but we won't do that if the socket has the protect bit set. If the VPN
            // provider connect()ed (and got the VPN NetId set) and then called protect(), we
            // would've unset the NetId in PROTECT_FROM_VPN below.
            //
            // So, overall (when the explicit bit is not set but the protect bit is set), if the
            // existing NetId is a VPN, don't reset it. Else, set the default network's NetId.
            if (!fwmark.explicitlySelected) {
                if (!fwmark.protectedFromVpn) {
                    fwmark.netId = mNetworkController->getNetworkForConnect(client->getUid());
                } else if (!mNetworkController->isVirtualNetwork(fwmark.netId)) {
                    fwmark.netId = mNetworkController->getDefaultNetwork();
                }
            }
            break;
        }
    //...
    }
    fwmark.permission = permission;

    if (setsockopt(*socketFd, SOL_SOCKET, SO_MARK, &fwmark.intValue,
                   sizeof(fwmark.intValue)) == -1) {
        return -errno;
    }
}

通过 setsockopt 设置mark。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值